diff --git a/.venv-backups/25365439/venv-main-2025-08-28-0900.txt b/.venv-backups/25365439/venv-main-2025-08-28-0900.txt
new file mode 100644
index 0000000000000000000000000000000000000000..563e41d44bddaf0273710768c6a5d671bc727760
--- /dev/null
+++ b/.venv-backups/25365439/venv-main-2025-08-28-0900.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.5
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25365439/venv-main-2025-08-28-1130.txt b/.venv-backups/25365439/venv-main-2025-08-28-1130.txt
new file mode 100644
index 0000000000000000000000000000000000000000..563e41d44bddaf0273710768c6a5d671bc727760
--- /dev/null
+++ b/.venv-backups/25365439/venv-main-2025-08-28-1130.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.5
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25365439/venv-main-2025-08-28-1300.txt b/.venv-backups/25365439/venv-main-2025-08-28-1300.txt
new file mode 100644
index 0000000000000000000000000000000000000000..563e41d44bddaf0273710768c6a5d671bc727760
--- /dev/null
+++ b/.venv-backups/25365439/venv-main-2025-08-28-1300.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.5
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25365439/venv-main-2025-08-28-1330.txt b/.venv-backups/25365439/venv-main-2025-08-28-1330.txt
new file mode 100644
index 0000000000000000000000000000000000000000..563e41d44bddaf0273710768c6a5d671bc727760
--- /dev/null
+++ b/.venv-backups/25365439/venv-main-2025-08-28-1330.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.5
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25468028/backup.log b/.venv-backups/25468028/backup.log
new file mode 100644
index 0000000000000000000000000000000000000000..10d62b265a2200eab32dfb7da10b40555466ebc9
--- /dev/null
+++ b/.venv-backups/25468028/backup.log
@@ -0,0 +1,3 @@
+[2025-08-30 08:52:37] Processing virtual environment: /venv/main
+[2025-08-30 08:52:38] SUCCESS: Created backup at /workspace/.venv-backups/25468028/venv-main-2025-08-30-0852.txt
+[2025-08-30 08:52:38] Backup process completed
diff --git a/.venv-backups/25468028/venv-main-2025-08-30-0852.txt b/.venv-backups/25468028/venv-main-2025-08-30-0852.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62960b2d2e51924d955ae5de2b87ecefa6d491f5
--- /dev/null
+++ b/.venv-backups/25468028/venv-main-2025-08-30-0852.txt
@@ -0,0 +1,165 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+cryptography==45.0.5
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25474989/backup.log b/.venv-backups/25474989/backup.log
new file mode 100644
index 0000000000000000000000000000000000000000..d1d0fae865de062460399790abef362dfed61264
--- /dev/null
+++ b/.venv-backups/25474989/backup.log
@@ -0,0 +1,12 @@
+[2025-08-30 14:37:04] Processing virtual environment: /venv/main
+[2025-08-30 14:37:04] SUCCESS: Created backup at /workspace/.venv-backups/25474989/venv-main-2025-08-30-1437.txt
+[2025-08-30 14:37:04] Backup process completed
+[2025-08-30 15:30:01] Processing virtual environment: /venv/main
+[2025-08-30 15:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25474989/venv-main-2025-08-30-1530.txt
+[2025-08-30 15:30:02] Backup process completed
+[2025-08-30 16:00:01] Processing virtual environment: /venv/main
+[2025-08-30 16:00:02] SUCCESS: Created backup at /workspace/.venv-backups/25474989/venv-main-2025-08-30-1600.txt
+[2025-08-30 16:00:02] Backup process completed
+[2025-08-30 17:30:01] Processing virtual environment: /venv/main
+[2025-08-30 17:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25474989/venv-main-2025-08-30-1730.txt
+[2025-08-30 17:30:01] Backup process completed
diff --git a/.venv-backups/25474989/venv-main-2025-08-30-1530.txt b/.venv-backups/25474989/venv-main-2025-08-30-1530.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25474989/venv-main-2025-08-30-1530.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25474989/venv-main-2025-08-30-1600.txt b/.venv-backups/25474989/venv-main-2025-08-30-1600.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25474989/venv-main-2025-08-30-1600.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25474989/venv-main-2025-08-30-1730.txt b/.venv-backups/25474989/venv-main-2025-08-30-1730.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25474989/venv-main-2025-08-30-1730.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/backup.log b/.venv-backups/25478920/backup.log
new file mode 100644
index 0000000000000000000000000000000000000000..25c1c9f7a9caa79b01022b607caae315936f530b
--- /dev/null
+++ b/.venv-backups/25478920/backup.log
@@ -0,0 +1,63 @@
+[2025-08-30 17:53:15] Processing virtual environment: /venv/main
+[2025-08-30 17:53:15] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-30-1753.txt
+[2025-08-30 17:53:16] Backup process completed
+[2025-08-30 19:00:01] Processing virtual environment: /venv/main
+[2025-08-30 19:00:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-30-1900.txt
+[2025-08-30 19:00:01] Backup process completed
+[2025-08-30 22:30:01] Processing virtual environment: /venv/main
+[2025-08-30 22:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-30-2230.txt
+[2025-08-30 22:30:01] Backup process completed
+[2025-08-30 23:00:01] Processing virtual environment: /venv/main
+[2025-08-30 23:00:02] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-30-2300.txt
+[2025-08-30 23:00:02] Backup process completed
+[2025-08-30 23:30:01] Processing virtual environment: /venv/main
+[2025-08-30 23:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-30-2330.txt
+[2025-08-30 23:30:01] Backup process completed
+[2025-08-31 00:00:01] Processing virtual environment: /venv/main
+[2025-08-31 00:00:02] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0000.txt
+[2025-08-31 00:00:02] Backup process completed
+[2025-08-31 00:30:01] Processing virtual environment: /venv/main
+[2025-08-31 00:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0030.txt
+[2025-08-31 00:30:01] Backup process completed
+[2025-08-31 01:00:01] Processing virtual environment: /venv/main
+[2025-08-31 01:00:02] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0100.txt
+[2025-08-31 01:00:02] Backup process completed
+[2025-08-31 01:30:01] Processing virtual environment: /venv/main
+[2025-08-31 01:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0130.txt
+[2025-08-31 01:30:01] Backup process completed
+[2025-08-31 02:00:01] Processing virtual environment: /venv/main
+[2025-08-31 02:00:02] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0200.txt
+[2025-08-31 02:00:02] Backup process completed
+[2025-08-31 02:30:01] Processing virtual environment: /venv/main
+[2025-08-31 02:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0230.txt
+[2025-08-31 02:30:01] Backup process completed
+[2025-08-31 03:00:01] Processing virtual environment: /venv/main
+[2025-08-31 03:00:02] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0300.txt
+[2025-08-31 03:00:02] Backup process completed
+[2025-08-31 03:30:01] Processing virtual environment: /venv/main
+[2025-08-31 03:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0330.txt
+[2025-08-31 03:30:01] Backup process completed
+[2025-08-31 04:00:01] Processing virtual environment: /venv/main
+[2025-08-31 04:00:02] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0400.txt
+[2025-08-31 04:00:02] Backup process completed
+[2025-08-31 04:30:01] Processing virtual environment: /venv/main
+[2025-08-31 04:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0430.txt
+[2025-08-31 04:30:02] Backup process completed
+[2025-08-31 05:00:01] Processing virtual environment: /venv/main
+[2025-08-31 05:00:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0500.txt
+[2025-08-31 05:00:01] Backup process completed
+[2025-08-31 05:30:01] Processing virtual environment: /venv/main
+[2025-08-31 05:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0530.txt
+[2025-08-31 05:30:02] Backup process completed
+[2025-08-31 06:00:01] Processing virtual environment: /venv/main
+[2025-08-31 06:00:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0600.txt
+[2025-08-31 06:00:01] Backup process completed
+[2025-08-31 06:30:01] Processing virtual environment: /venv/main
+[2025-08-31 06:30:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-08-31-0630.txt
+[2025-08-31 06:30:02] Backup process completed
+[2025-09-03 07:00:01] Processing virtual environment: /venv/main
+[2025-09-03 07:00:02] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-09-03-0700.txt
+[2025-09-03 07:00:02] Backup process completed
+[2025-09-03 08:00:01] Processing virtual environment: /venv/main
+[2025-09-03 08:00:01] SUCCESS: Created backup at /workspace/.venv-backups/25478920/venv-main-2025-09-03-0800.txt
+[2025-09-03 08:00:02] Backup process completed
diff --git a/.venv-backups/25478920/venv-main-2025-08-30-1753.txt b/.venv-backups/25478920/venv-main-2025-08-30-1753.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62960b2d2e51924d955ae5de2b87ecefa6d491f5
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-30-1753.txt
@@ -0,0 +1,165 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+cryptography==45.0.5
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-30-1900.txt b/.venv-backups/25478920/venv-main-2025-08-30-1900.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-30-1900.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-30-2230.txt b/.venv-backups/25478920/venv-main-2025-08-30-2230.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-30-2230.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-30-2300.txt b/.venv-backups/25478920/venv-main-2025-08-30-2300.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-30-2300.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-30-2330.txt b/.venv-backups/25478920/venv-main-2025-08-30-2330.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-30-2330.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0000.txt b/.venv-backups/25478920/venv-main-2025-08-31-0000.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0000.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0030.txt b/.venv-backups/25478920/venv-main-2025-08-31-0030.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0030.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0100.txt b/.venv-backups/25478920/venv-main-2025-08-31-0100.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0100.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0130.txt b/.venv-backups/25478920/venv-main-2025-08-31-0130.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0130.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0200.txt b/.venv-backups/25478920/venv-main-2025-08-31-0200.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0200.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0230.txt b/.venv-backups/25478920/venv-main-2025-08-31-0230.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0230.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0300.txt b/.venv-backups/25478920/venv-main-2025-08-31-0300.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0300.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0330.txt b/.venv-backups/25478920/venv-main-2025-08-31-0330.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0330.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0400.txt b/.venv-backups/25478920/venv-main-2025-08-31-0400.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0400.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0430.txt b/.venv-backups/25478920/venv-main-2025-08-31-0430.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0430.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0500.txt b/.venv-backups/25478920/venv-main-2025-08-31-0500.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0500.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0530.txt b/.venv-backups/25478920/venv-main-2025-08-31-0530.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0530.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0600.txt b/.venv-backups/25478920/venv-main-2025-08-31-0600.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0600.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-08-31-0630.txt b/.venv-backups/25478920/venv-main-2025-08-31-0630.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-08-31-0630.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-09-03-0700.txt b/.venv-backups/25478920/venv-main-2025-09-03-0700.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-09-03-0700.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-2025-09-03-0800.txt b/.venv-backups/25478920/venv-main-2025-09-03-0800.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-2025-09-03-0800.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/.venv-backups/25478920/venv-main-latest.txt b/.venv-backups/25478920/venv-main-latest.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed81bbb11f6718c38143b5d45a4b902cda2cfd1b
--- /dev/null
+++ b/.venv-backups/25478920/venv-main-latest.txt
@@ -0,0 +1,173 @@
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiohttp_socks==0.10.1
+aiosignal==1.4.0
+albucore==0.0.24
+albumentations==2.0.8
+alembic==1.16.4
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+av==15.0.0
+beautifulsoup4==4.13.4
+certifi==2025.6.15
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+click==8.2.1
+colour-science==0.4.6
+comfyui-embedded-docs==0.2.4
+comfyui_frontend_package==1.23.4
+comfyui_workflow_templates==0.1.41
+comm==0.2.2
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+debugpy==1.8.14
+decorator==5.2.1
+easydict==1.13
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fsspec==2025.5.1
+gdown==5.2.0
+gitdb==4.0.12
+GitPython==3.1.45
+greenlet==3.2.3
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.5
+hpack==4.1.0
+huggingface-hub==0.34.3
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+inquirerpy==0.3.4
+ipykernel==6.29.5
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.4
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+kornia==0.8.1
+kornia_rs==0.1.9
+lazy_loader==0.4
+llvmlite==0.44.0
+Mako==1.3.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+matrix-nio==0.25.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.6.3
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.61.2
+numpy==2.1.2
+nvidia-cublas-cu12==12.8.3.14
+nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cudnn-cu12==9.7.1.26
+nvidia-cufft-cu12==11.3.3.41
+nvidia-cufile-cu12==1.13.0.11
+nvidia-curand-cu12==10.3.9.55
+nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvtx-cu12==12.8.55
+opencv-python==4.12.0.88
+opencv-python-headless==4.12.0.88
+packaging==25.0
+parso==0.8.4
+pexpect==4.9.0
+pfzy==0.3.4
+piexif==1.1.3
+pillow==11.0.0
+pixeloe==0.1.4
+platformdirs==4.3.8
+pooch==1.8.2
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pycryptodome==3.23.0
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyGithub==2.7.0
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMatting==1.1.14
+PyNaCl==1.5.0
+pyparsing==3.2.3
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-socks==2.7.1
+PyWavelets==1.9.0
+PyYAML==6.0.2
+pyzmq==27.0.0
+referencing==0.36.2
+regex==2025.7.34
+rembg==2.0.67
+requests==2.32.4
+rich==14.1.0
+rpds-py==0.26.0
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.1
+sentencepiece==0.2.0
+setuptools==80.9.0
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+smmap==5.0.2
+soundfile==0.13.1
+soupsieve==2.7
+spandrel==0.4.1
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+stringzilla==3.12.5
+sympy==1.13.3
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+toml==0.10.2
+torch==2.7.1+cu128
+torchaudio==2.7.1+cu128
+torchsde==0.2.6
+torchvision==0.22.1+cu128
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+trampoline==0.1.2
+transformers==4.54.1
+transparent-background==1.3.4
+triton==3.3.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+unpaddedbase64==2.1.0
+urllib3==2.5.0
+uv==0.8.4
+wcwidth==0.2.13
+wget==3.2
+wheel==0.45.1
+widgetsnbextension==4.0.14
+yarl==1.20.1
diff --git a/ComfyUI/.ipynb_checkpoints/install-checkpoint.sh b/ComfyUI/.ipynb_checkpoints/install-checkpoint.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f866cd3594dc1d52581db39e1a1d0f8ae3c39f51
--- /dev/null
+++ b/ComfyUI/.ipynb_checkpoints/install-checkpoint.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+# Update system packages
+apt update && apt upgrade -y
+
+# Install base packages
+apt install -y software-properties-common build-essential git python3-pip wget cmake pkg-config ninja-build
+
+# Add deadsnakes PPA and install Python 3.12
+add-apt-repository ppa:deadsnakes/ppa -y
+apt update
+apt install -y python3.12 python3.12-venv python3.12-dev
+
+# Set python and python3 to Python 3.12
+update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
+update-alternatives --set python3 /usr/bin/python3.12
+
+echo "System Python version: $(python --version)"
+
+# Stop nginx if present
+systemctl stop nginx 2>/dev/null || true
+systemctl disable nginx 2>/dev/null || true
+pkill -f nginx || true
+
+# Set up workspace
+mkdir -p /workspace && cd /workspace
+
+# Clone ComfyUI
+git clone https://github.com/comfyanonymous/ComfyUI.git
+cd ComfyUI
+
+# Create and activate Python 3.12 virtual environment
+python3 -m venv venv
+source venv/bin/activate
+
+echo "Virtualenv Python version: $(python --version)"
+
+# Upgrade pip
+pip install --upgrade pip
+
+# Install PyTorch 2.7.0 with CUDA 12.8
+pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/cu128
+
+# Install Triton (latest)
+pip install triton
+
+# Install packaging
+pip install packaging
+
+# Install SageAttention2 from prebuilt wheel (no compilation needed)
+cd /workspace
+wget https://huggingface.co/nitin19/flash-attention-wheels/resolve/main/sageattention-2.1.1-cp312-cp312-linux_x86_64.whl
+pip install ./sageattention-2.1.1-cp312-cp312-linux_x86_64.whl
+
+# Check torch, CUDA, Triton, compile
+echo "Torch: $(python -c 'import torch; print(torch.__version__)')"
+echo "CUDA: $(python -c 'import torch; print(torch.version.cuda)')"
+echo "Triton: $(python -c 'import triton; print(triton.__version__)')"
+echo "torch.compile available: $(python -c 'import torch; print(hasattr(torch, "compile"))')"
+
+# Install ComfyUI requirements
+cd /workspace/ComfyUI
+pip install -r requirements.txt
+
+# Install ComfyUI Manager
+mkdir -p custom_nodes && cd custom_nodes
+git clone https://github.com/ltdrdata/ComfyUI-Manager.git
+cd ComfyUI-Manager
+pip install -r requirements.txt
+
+# Make sure ComfyUI main is executable
+chmod +x /workspace/ComfyUI/main.py
+
+# Kill any process on port 3001
+fuser -k 3001/tcp || true
+
+# Start ComfyUI with SageAttention
+cd /workspace/ComfyUI
+source venv/bin/activate
+python main.py --use-sage-attention --listen --port 3001
\ No newline at end of file
diff --git a/ComfyUI/alembic_db/README.md b/ComfyUI/alembic_db/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3b808c7cab30eaab91c9ff5c1b2d5cc960904e14
--- /dev/null
+++ b/ComfyUI/alembic_db/README.md
@@ -0,0 +1,4 @@
+## Generate new revision
+
+1. Update models in `/app/database/models.py`
+2. Run `alembic revision --autogenerate -m "{your message}"`
diff --git a/ComfyUI/alembic_db/env.py b/ComfyUI/alembic_db/env.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d7770679875e2fad065fba0c9222758a40d266b
--- /dev/null
+++ b/ComfyUI/alembic_db/env.py
@@ -0,0 +1,64 @@
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+
+from app.database.models import Base
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+ """Run migrations in 'offline' mode.
+ This configures the context with just a URL
+ and not an Engine, though an Engine is acceptable
+ here as well. By skipping the Engine creation
+ we don't even need a DBAPI to be available.
+ Calls to context.execute() here emit the given string to the
+ script output.
+ """
+ url = config.get_main_option("sqlalchemy.url")
+ context.configure(
+ url=url,
+ target_metadata=target_metadata,
+ literal_binds=True,
+ dialect_opts={"paramstyle": "named"},
+ )
+
+ with context.begin_transaction():
+ context.run_migrations()
+
+
+def run_migrations_online() -> None:
+ """Run migrations in 'online' mode.
+ In this scenario we need to create an Engine
+ and associate a connection with the context.
+ """
+ connectable = engine_from_config(
+ config.get_section(config.config_ini_section, {}),
+ prefix="sqlalchemy.",
+ poolclass=pool.NullPool,
+ )
+
+ with connectable.connect() as connection:
+ context.configure(
+ connection=connection, target_metadata=target_metadata
+ )
+
+ with context.begin_transaction():
+ context.run_migrations()
+
+
+if context.is_offline_mode():
+ run_migrations_offline()
+else:
+ run_migrations_online()
diff --git a/ComfyUI/alembic_db/script.py.mako b/ComfyUI/alembic_db/script.py.mako
new file mode 100644
index 0000000000000000000000000000000000000000..480b130d632ca677c11f23d9fe82cf4014d15e0c
--- /dev/null
+++ b/ComfyUI/alembic_db/script.py.mako
@@ -0,0 +1,28 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+ """Upgrade schema."""
+ ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+ """Downgrade schema."""
+ ${downgrades if downgrades else "pass"}
diff --git a/ComfyUI/api_server/__init__.py b/ComfyUI/api_server/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/ComfyUI/app/__init__.py b/ComfyUI/app/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/ComfyUI/app/app_settings.py b/ComfyUI/app/app_settings.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7ac73bf6a59d3e89647aed13b70ca9428e16f2a
--- /dev/null
+++ b/ComfyUI/app/app_settings.py
@@ -0,0 +1,65 @@
+import os
+import json
+from aiohttp import web
+import logging
+
+
+class AppSettings():
+ def __init__(self, user_manager):
+ self.user_manager = user_manager
+
+ def get_settings(self, request):
+ try:
+ file = self.user_manager.get_request_user_filepath(
+ request,
+ "comfy.settings.json"
+ )
+ except KeyError as e:
+ logging.error("User settings not found.")
+ raise web.HTTPUnauthorized() from e
+ if os.path.isfile(file):
+ try:
+ with open(file) as f:
+ return json.load(f)
+ except:
+ logging.error(f"The user settings file is corrupted: {file}")
+ return {}
+ else:
+ return {}
+
+ def save_settings(self, request, settings):
+ file = self.user_manager.get_request_user_filepath(
+ request, "comfy.settings.json")
+ with open(file, "w") as f:
+ f.write(json.dumps(settings, indent=4))
+
+ def add_routes(self, routes):
+ @routes.get("/settings")
+ async def get_settings(request):
+ return web.json_response(self.get_settings(request))
+
+ @routes.get("/settings/{id}")
+ async def get_setting(request):
+ value = None
+ settings = self.get_settings(request)
+ setting_id = request.match_info.get("id", None)
+ if setting_id and setting_id in settings:
+ value = settings[setting_id]
+ return web.json_response(value)
+
+ @routes.post("/settings")
+ async def post_settings(request):
+ settings = self.get_settings(request)
+ new_settings = await request.json()
+ self.save_settings(request, {**settings, **new_settings})
+ return web.Response(status=200)
+
+ @routes.post("/settings/{id}")
+ async def post_setting(request):
+ setting_id = request.match_info.get("id", None)
+ if not setting_id:
+ return web.Response(status=400)
+ settings = self.get_settings(request)
+ settings[setting_id] = await request.json()
+ self.save_settings(request, settings)
+ return web.Response(status=200)
diff --git a/ComfyUI/app/custom_node_manager.py b/ComfyUI/app/custom_node_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..281febca952363659eb3925280fab00f07c986d0
--- /dev/null
+++ b/ComfyUI/app/custom_node_manager.py
@@ -0,0 +1,145 @@
+from __future__ import annotations
+
+import os
+import folder_paths
+import glob
+from aiohttp import web
+import json
+import logging
+from functools import lru_cache
+
+from utils.json_util import merge_json_recursive
+
+
+# Extra locale files to load into main.json
+EXTRA_LOCALE_FILES = [
+ "nodeDefs.json",
+ "commands.json",
+ "settings.json",
+]
+
+
+def safe_load_json_file(file_path: str) -> dict:
+ if not os.path.exists(file_path):
+ return {}
+
+ try:
+ with open(file_path, "r", encoding="utf-8") as f:
+ return json.load(f)
+ except json.JSONDecodeError:
+ logging.error(f"Error loading {file_path}")
+ return {}
+
+
+class CustomNodeManager:
+ @lru_cache(maxsize=1)
+ def build_translations(self):
+ """Load all custom nodes translations during initialization. Translations are
+ expected to be loaded from `locales/` folder.
+
+ The folder structure is expected to be the following:
+ - custom_nodes/
+ - custom_node_1/
+ - locales/
+ - en/
+ - main.json
+ - commands.json
+ - settings.json
+
+ returned translations are expected to be in the following format:
+ {
+ "en": {
+ "nodeDefs": {...},
+ "commands": {...},
+ "settings": {...},
+ ...{other main.json keys}
+ }
+ }
+ """
+
+ translations = {}
+
+ for folder in folder_paths.get_folder_paths("custom_nodes"):
+ # Sort glob results for deterministic ordering
+ for custom_node_dir in sorted(glob.glob(os.path.join(folder, "*/"))):
+ locales_dir = os.path.join(custom_node_dir, "locales")
+ if not os.path.exists(locales_dir):
+ continue
+
+ for lang_dir in glob.glob(os.path.join(locales_dir, "*/")):
+ lang_code = os.path.basename(os.path.dirname(lang_dir))
+
+ if lang_code not in translations:
+ translations[lang_code] = {}
+
+ # Load main.json
+ main_file = os.path.join(lang_dir, "main.json")
+ node_translations = safe_load_json_file(main_file)
+
+ # Load extra locale files
+ for extra_file in EXTRA_LOCALE_FILES:
+ extra_file_path = os.path.join(lang_dir, extra_file)
+ key = extra_file.split(".")[0]
+ json_data = safe_load_json_file(extra_file_path)
+ if json_data:
+ node_translations[key] = json_data
+
+ if node_translations:
+ translations[lang_code] = merge_json_recursive(
+ translations[lang_code], node_translations
+ )
+
+ return translations
+
+ def add_routes(self, routes, webapp, loadedModules):
+
+ example_workflow_folder_names = ["example_workflows", "example", "examples", "workflow", "workflows"]
+
+ @routes.get("/workflow_templates")
+ async def get_workflow_templates(request):
+ """Returns a web response that contains the map of custom_nodes names and their associated workflow templates. The ones without templates are omitted."""
+
+ files = []
+
+ for folder in folder_paths.get_folder_paths("custom_nodes"):
+ for folder_name in example_workflow_folder_names:
+ pattern = os.path.join(folder, f"*/{folder_name}/*.json")
+ matched_files = glob.glob(pattern)
+ files.extend(matched_files)
+
+ workflow_templates_dict = (
+ {}
+ ) # custom_nodes folder name -> example workflow names
+ for file in files:
+ custom_nodes_name = os.path.basename(
+ os.path.dirname(os.path.dirname(file))
+ )
+ workflow_name = os.path.splitext(os.path.basename(file))[0]
+ workflow_templates_dict.setdefault(custom_nodes_name, []).append(
+ workflow_name
+ )
+ return web.json_response(workflow_templates_dict)
+
+ # Serve workflow templates from custom nodes.
+ for module_name, module_dir in loadedModules:
+ for folder_name in example_workflow_folder_names:
+ workflows_dir = os.path.join(module_dir, folder_name)
+
+ if os.path.exists(workflows_dir):
+ if folder_name != "example_workflows":
+ logging.debug(
+ "Found example workflow folder '%s' for custom node '%s', consider renaming it to 'example_workflows'",
+ folder_name, module_name)
+
+ webapp.add_routes(
+ [
+ web.static(
+ "/api/workflow_templates/" + module_name, workflows_dir
+ )
+ ]
+ )
+
+ @routes.get("/i18n")
+ async def get_i18n(request):
+ """Returns translations from all custom nodes' locales folders."""
+ return web.json_response(self.build_translations())
diff --git a/ComfyUI/app/frontend_management.py b/ComfyUI/app/frontend_management.py
new file mode 100644
index 0000000000000000000000000000000000000000..0bee73685b931bc1eddef79c7acafc2d0958098b
--- /dev/null
+++ b/ComfyUI/app/frontend_management.py
@@ -0,0 +1,361 @@
+from __future__ import annotations
+import argparse
+import logging
+import os
+import re
+import sys
+import tempfile
+import zipfile
+import importlib
+from dataclasses import dataclass
+from functools import cached_property
+from pathlib import Path
+from typing import TypedDict, Optional
+from importlib.metadata import version
+
+import requests
+from typing_extensions import NotRequired
+
+from utils.install_util import get_missing_requirements_message, requirements_path
+
+from comfy.cli_args import DEFAULT_VERSION_STRING
+import app.logger
+
+
+def frontend_install_warning_message():
+ return f"""
+{get_missing_requirements_message()}
+
+This error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.
+""".strip()
+
+def parse_version(version: str) -> tuple[int, int, int]:
+ return tuple(map(int, version.split(".")))
+
+def is_valid_version(version: str) -> bool:
+ """Validate if a string is a valid semantic version (X.Y.Z format)."""
+ pattern = r"^(\d+)\.(\d+)\.(\d+)$"
+ return bool(re.match(pattern, version))
+
+def get_installed_frontend_version():
+ """Get the currently installed frontend package version."""
+ frontend_version_str = version("comfyui-frontend-package")
+ return frontend_version_str
+
+def get_required_frontend_version():
+ """Get the required frontend version from requirements.txt."""
+ try:
+ with open(requirements_path, "r", encoding="utf-8") as f:
+ for line in f:
+ line = line.strip()
+ if line.startswith("comfyui-frontend-package=="):
+ version_str = line.split("==")[-1]
+ if not is_valid_version(version_str):
+ logging.error(f"Invalid version format in requirements.txt: {version_str}")
+ return None
+ return version_str
+ logging.error("comfyui-frontend-package not found in requirements.txt")
+ return None
+ except FileNotFoundError:
+ logging.error("requirements.txt not found. Cannot determine required frontend version.")
+ return None
+ except Exception as e:
+ logging.error(f"Error reading requirements.txt: {e}")
+ return None
+
+def check_frontend_version():
+ """Check if the frontend version is up to date."""
+
+ try:
+ frontend_version_str = get_installed_frontend_version()
+ frontend_version = parse_version(frontend_version_str)
+ required_frontend_str = get_required_frontend_version()
+ required_frontend = parse_version(required_frontend_str)
+ if frontend_version < required_frontend:
+ app.logger.log_startup_warning(
+ f"""
+________________________________________________________________________
+WARNING WARNING WARNING WARNING WARNING
+
+Installed frontend version {".".join(map(str, frontend_version))} is lower than the recommended version {".".join(map(str, required_frontend))}.
+
+{frontend_install_warning_message()}
+________________________________________________________________________
+""".strip()
+ )
+ else:
+ logging.info("ComfyUI frontend version: {}".format(frontend_version_str))
+ except Exception as e:
+ logging.error(f"Failed to check frontend version: {e}")
+
+
+REQUEST_TIMEOUT = 10 # seconds
+
+
+class Asset(TypedDict):
+ url: str
+
+
+class Release(TypedDict):
+ id: int
+ tag_name: str
+ name: str
+ prerelease: bool
+ created_at: str
+ published_at: str
+ body: str
+ assets: NotRequired[list[Asset]]
+
+
+@dataclass
+class FrontEndProvider:
+ owner: str
+ repo: str
+
+ @property
+ def folder_name(self) -> str:
+ return f"{self.owner}_{self.repo}"
+
+ @property
+ def release_url(self) -> str:
+ return f"https://api.github.com/repos/{self.owner}/{self.repo}/releases"
+
+ @cached_property
+ def all_releases(self) -> list[Release]:
+ releases = []
+ api_url = self.release_url
+ while api_url:
+ response = requests.get(api_url, timeout=REQUEST_TIMEOUT)
+ response.raise_for_status() # Raises an HTTPError if the response was an error
+ releases.extend(response.json())
+ # GitHub uses the Link header to provide pagination links. Check if it exists and update api_url accordingly.
+ if "next" in response.links:
+ api_url = response.links["next"]["url"]
+ else:
+ api_url = None
+ return releases
+
+ @cached_property
+ def latest_release(self) -> Release:
+ latest_release_url = f"{self.release_url}/latest"
+ response = requests.get(latest_release_url, timeout=REQUEST_TIMEOUT)
+ response.raise_for_status() # Raises an HTTPError if the response was an error
+ return response.json()
+
+ @cached_property
+ def latest_prerelease(self) -> Release:
+ """Get the latest pre-release version - even if it's older than the latest release"""
+ release = [release for release in self.all_releases if release["prerelease"]]
+
+ if not release:
+ raise ValueError("No pre-releases found")
+
+ # GitHub returns releases in reverse chronological order, so first is latest
+ return release[0]
+
+ def get_release(self, version: str) -> Release:
+ if version == "latest":
+ return self.latest_release
+ elif version == "prerelease":
+ return self.latest_prerelease
+ else:
+ for release in self.all_releases:
+ if release["tag_name"] in [version, f"v{version}"]:
+ return release
+ raise ValueError(f"Version {version} not found in releases")
+
+
+def download_release_asset_zip(release: Release, destination_path: str) -> None:
+ """Download dist.zip from github release."""
+ asset_url = None
+ for asset in release.get("assets", []):
+ if asset["name"] == "dist.zip":
+ asset_url = asset["url"]
+ break
+
+ if not asset_url:
+ raise ValueError("dist.zip not found in the release assets")
+
+ # Use a temporary file to download the zip content
+ with tempfile.TemporaryFile() as tmp_file:
+ headers = {"Accept": "application/octet-stream"}
+ response = requests.get(
+ asset_url, headers=headers, allow_redirects=True, timeout=REQUEST_TIMEOUT
+ )
+ response.raise_for_status() # Ensure we got a successful response
+
+ # Write the content to the temporary file
+ tmp_file.write(response.content)
+
+ # Go back to the beginning of the temporary file
+ tmp_file.seek(0)
+
+ # Extract the zip file content to the destination path
+ with zipfile.ZipFile(tmp_file, "r") as zip_ref:
+ zip_ref.extractall(destination_path)
+
+
+class FrontendManager:
+ CUSTOM_FRONTENDS_ROOT = str(Path(__file__).parents[1] / "web_custom_versions")
+
+ @classmethod
+ def get_required_frontend_version(cls) -> str:
+ """Get the required frontend package version."""
+ return get_required_frontend_version()
+
+ @classmethod
+ def default_frontend_path(cls) -> str:
+ try:
+ import comfyui_frontend_package
+
+ return str(importlib.resources.files(comfyui_frontend_package) / "static")
+ except ImportError:
+ logging.error(
+ f"""
+********** ERROR ***********
+
+comfyui-frontend-package is not installed.
+
+{frontend_install_warning_message()}
+
+********** ERROR ***********
+""".strip()
+ )
+ sys.exit(-1)
+
+ @classmethod
+ def templates_path(cls) -> str:
+ try:
+ import comfyui_workflow_templates
+
+ return str(
+ importlib.resources.files(comfyui_workflow_templates) / "templates"
+ )
+ except ImportError:
+ logging.error(
+ f"""
+********** ERROR ***********
+
+comfyui-workflow-templates is not installed.
+
+{frontend_install_warning_message()}
+
+********** ERROR ***********
+""".strip()
+ )
+
+ @classmethod
+ def embedded_docs_path(cls) -> str:
+ """Get the path to embedded documentation"""
+ try:
+ import comfyui_embedded_docs
+
+ return str(
+ importlib.resources.files(comfyui_embedded_docs) / "docs"
+ )
+ except ImportError:
+ logging.info("comfyui-embedded-docs package not found")
+ return None
+
+ @classmethod
+ def parse_version_string(cls, value: str) -> tuple[str, str, str]:
+ """
+ Args:
+ value (str): The version string to parse.
+
+ Returns:
+ tuple[str, str]: A tuple containing provider name and version.
+
+ Raises:
+ argparse.ArgumentTypeError: If the version string is invalid.
+ """
+ VERSION_PATTERN = r"^([a-zA-Z0-9][a-zA-Z0-9-]{0,38})/([a-zA-Z0-9_.-]+)@(v?\d+\.\d+\.\d+[-._a-zA-Z0-9]*|latest|prerelease)$"
+ match_result = re.match(VERSION_PATTERN, value)
+ if match_result is None:
+ raise argparse.ArgumentTypeError(f"Invalid version string: {value}")
+
+ return match_result.group(1), match_result.group(2), match_result.group(3)
+
+ @classmethod
+ def init_frontend_unsafe(
+ cls, version_string: str, provider: Optional[FrontEndProvider] = None
+ ) -> str:
+ """
+ Initializes the frontend for the specified version.
+
+ Args:
+ version_string (str): The version string.
+ provider (FrontEndProvider, optional): The provider to use. Defaults to None.
+
+ Returns:
+ str: The path to the initialized frontend.
+
+ Raises:
+ Exception: If there is an error during the initialization process.
+ main error source might be request timeout or invalid URL.
+ """
+ if version_string == DEFAULT_VERSION_STRING:
+ check_frontend_version()
+ return cls.default_frontend_path()
+
+ repo_owner, repo_name, version = cls.parse_version_string(version_string)
+
+ if version.startswith("v"):
+ expected_path = str(
+ Path(cls.CUSTOM_FRONTENDS_ROOT)
+ / f"{repo_owner}_{repo_name}"
+ / version.lstrip("v")
+ )
+ if os.path.exists(expected_path):
+ logging.info(
+ f"Using existing copy of specific frontend version tag: {repo_owner}/{repo_name}@{version}"
+ )
+ return expected_path
+
+ logging.info(
+ f"Initializing frontend: {repo_owner}/{repo_name}@{version}, requesting version details from GitHub..."
+ )
+
+ provider = provider or FrontEndProvider(repo_owner, repo_name)
+ release = provider.get_release(version)
+
+ semantic_version = release["tag_name"].lstrip("v")
+ web_root = str(
+ Path(cls.CUSTOM_FRONTENDS_ROOT) / provider.folder_name / semantic_version
+ )
+ if not os.path.exists(web_root):
+ try:
+ os.makedirs(web_root, exist_ok=True)
+ logging.info(
+ "Downloading frontend(%s) version(%s) to (%s)",
+ provider.folder_name,
+ semantic_version,
+ web_root,
+ )
+ logging.debug(release)
+ download_release_asset_zip(release, destination_path=web_root)
+ finally:
+ # Clean up the directory if it is empty, i.e. the download failed
+ if not os.listdir(web_root):
+ os.rmdir(web_root)
+
+ return web_root
+
+ @classmethod
+ def init_frontend(cls, version_string: str) -> str:
+ """
+ Initializes the frontend with the specified version string.
+
+ Args:
+ version_string (str): The version string to initialize the frontend with.
+
+ Returns:
+ str: The path of the initialized frontend.
+ """
+ try:
+ return cls.init_frontend_unsafe(version_string)
+ except Exception as e:
+ logging.error("Failed to initialize frontend: %s", e)
+ logging.info("Falling back to the default frontend.")
+ check_frontend_version()
+ return cls.default_frontend_path()
diff --git a/ComfyUI/app/logger.py b/ComfyUI/app/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d26d98fe28005a3ed8b6deb7da34823b98b5696
--- /dev/null
+++ b/ComfyUI/app/logger.py
@@ -0,0 +1,98 @@
+from collections import deque
+from datetime import datetime
+import io
+import logging
+import sys
+import threading
+
+logs = None
+stdout_interceptor = None
+stderr_interceptor = None
+
+
+class LogInterceptor(io.TextIOWrapper):
+ def __init__(self, stream, *args, **kwargs):
+ buffer = stream.buffer
+ encoding = stream.encoding
+ super().__init__(buffer, *args, **kwargs, encoding=encoding, line_buffering=stream.line_buffering)
+ self._lock = threading.Lock()
+ self._flush_callbacks = []
+ self._logs_since_flush = []
+
+ def write(self, data):
+ entry = {"t": datetime.now().isoformat(), "m": data}
+ with self._lock:
+ self._logs_since_flush.append(entry)
+
+ # Simple handling for cr to overwrite the last output if it isnt a full line
+ # else logs just get full of progress messages
+ if isinstance(data, str) and data.startswith("\r") and not logs[-1]["m"].endswith("\n"):
+ logs.pop()
+ logs.append(entry)
+ super().write(data)
+
+ def flush(self):
+ super().flush()
+ for cb in self._flush_callbacks:
+ cb(self._logs_since_flush)
+ self._logs_since_flush = []
+
+ def on_flush(self, callback):
+ self._flush_callbacks.append(callback)
+
+
+def get_logs():
+ return logs
+
+
+def on_flush(callback):
+ if stdout_interceptor is not None:
+ stdout_interceptor.on_flush(callback)
+ if stderr_interceptor is not None:
+ stderr_interceptor.on_flush(callback)
+
+def setup_logger(log_level: str = 'INFO', capacity: int = 300, use_stdout: bool = False):
+ global logs
+ if logs:
+ return
+
+ # Override output streams and log to buffer
+ logs = deque(maxlen=capacity)
+
+ global stdout_interceptor
+ global stderr_interceptor
+ stdout_interceptor = sys.stdout = LogInterceptor(sys.stdout)
+ stderr_interceptor = sys.stderr = LogInterceptor(sys.stderr)
+
+ # Setup default global logger
+ logger = logging.getLogger()
+ logger.setLevel(log_level)
+
+ stream_handler = logging.StreamHandler()
+ stream_handler.setFormatter(logging.Formatter("%(message)s"))
+
+ if use_stdout:
+ # Only errors and critical to stderr
+ stream_handler.addFilter(lambda record: not record.levelno < logging.ERROR)
+
+ # Lesser to stdout
+ stdout_handler = logging.StreamHandler(sys.stdout)
+ stdout_handler.setFormatter(logging.Formatter("%(message)s"))
+ stdout_handler.addFilter(lambda record: record.levelno < logging.ERROR)
+ logger.addHandler(stdout_handler)
+
+ logger.addHandler(stream_handler)
+
+
+STARTUP_WARNINGS = []
+
+
+def log_startup_warning(msg):
+ logging.warning(msg)
+ STARTUP_WARNINGS.append(msg)
+
+
+def print_startup_warnings():
+ for s in STARTUP_WARNINGS:
+ logging.warning(s)
+ STARTUP_WARNINGS.clear()
diff --git a/ComfyUI/app/model_manager.py b/ComfyUI/app/model_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..74d942fb85c560dd85903e1cc82ef64968b74316
--- /dev/null
+++ b/ComfyUI/app/model_manager.py
@@ -0,0 +1,184 @@
+from __future__ import annotations
+
+import os
+import base64
+import json
+import time
+import logging
+import folder_paths
+import glob
+import comfy.utils
+from aiohttp import web
+from PIL import Image
+from io import BytesIO
+from folder_paths import map_legacy, filter_files_extensions, filter_files_content_types
+
+
+class ModelFileManager:
+ def __init__(self) -> None:
+ self.cache: dict[str, tuple[list[dict], dict[str, float], float]] = {}
+
+ def get_cache(self, key: str, default=None) -> tuple[list[dict], dict[str, float], float] | None:
+ return self.cache.get(key, default)
+
+ def set_cache(self, key: str, value: tuple[list[dict], dict[str, float], float]):
+ self.cache[key] = value
+
+ def clear_cache(self):
+ self.cache.clear()
+
+ def add_routes(self, routes):
+ # NOTE: This is an experiment to replace `/models`
+ @routes.get("/experiment/models")
+ async def get_model_folders(request):
+ model_types = list(folder_paths.folder_names_and_paths.keys())
+ folder_black_list = ["configs", "custom_nodes"]
+ output_folders: list[dict] = []
+ for folder in model_types:
+ if folder in folder_black_list:
+ continue
+ output_folders.append({"name": folder, "folders": folder_paths.get_folder_paths(folder)})
+ return web.json_response(output_folders)
+
+ # NOTE: This is an experiment to replace `/models/{folder}`
+ @routes.get("/experiment/models/{folder}")
+ async def get_all_models(request):
+ folder = request.match_info.get("folder", None)
+ if not folder in folder_paths.folder_names_and_paths:
+ return web.Response(status=404)
+ files = self.get_model_file_list(folder)
+ return web.json_response(files)
+
+ @routes.get("/experiment/models/preview/{folder}/{path_index}/{filename:.*}")
+ async def get_model_preview(request):
+ folder_name = request.match_info.get("folder", None)
+ path_index = int(request.match_info.get("path_index", None))
+ filename = request.match_info.get("filename", None)
+
+ if not folder_name in folder_paths.folder_names_and_paths:
+ return web.Response(status=404)
+
+ folders = folder_paths.folder_names_and_paths[folder_name]
+ folder = folders[0][path_index]
+ full_filename = os.path.join(folder, filename)
+
+ previews = self.get_model_previews(full_filename)
+ default_preview = previews[0] if len(previews) > 0 else None
+ if default_preview is None or (isinstance(default_preview, str) and not os.path.isfile(default_preview)):
+ return web.Response(status=404)
+
+ try:
+ with Image.open(default_preview) as img:
+ img_bytes = BytesIO()
+ img.save(img_bytes, format="WEBP")
+ img_bytes.seek(0)
+ return web.Response(body=img_bytes.getvalue(), content_type="image/webp")
+ except:
+ return web.Response(status=404)
+
+ def get_model_file_list(self, folder_name: str):
+ folder_name = map_legacy(folder_name)
+ folders = folder_paths.folder_names_and_paths[folder_name]
+ output_list: list[dict] = []
+
+ for index, folder in enumerate(folders[0]):
+ if not os.path.isdir(folder):
+ continue
+ out = self.cache_model_file_list_(folder)
+ if out is None:
+ out = self.recursive_search_models_(folder, index)
+ self.set_cache(folder, out)
+ output_list.extend(out[0])
+
+ return output_list
+
+ def cache_model_file_list_(self, folder: str):
+ model_file_list_cache = self.get_cache(folder)
+
+ if model_file_list_cache is None:
+ return None
+ if not os.path.isdir(folder):
+ return None
+ if os.path.getmtime(folder) != model_file_list_cache[1]:
+ return None
+ for x in model_file_list_cache[1]:
+ time_modified = model_file_list_cache[1][x]
+ folder = x
+ if os.path.getmtime(folder) != time_modified:
+ return None
+
+ return model_file_list_cache
+
+ def recursive_search_models_(self, directory: str, pathIndex: int) -> tuple[list[str], dict[str, float], float]:
+ if not os.path.isdir(directory):
+ return [], {}, time.perf_counter()
+
+ excluded_dir_names = [".git"]
+ # TODO use settings
+ include_hidden_files = False
+
+ result: list[str] = []
+ dirs: dict[str, float] = {}
+
+ for dirpath, subdirs, filenames in os.walk(directory, followlinks=True, topdown=True):
+ subdirs[:] = [d for d in subdirs if d not in excluded_dir_names]
+ if not include_hidden_files:
+ subdirs[:] = [d for d in subdirs if not d.startswith(".")]
+ filenames = [f for f in filenames if not f.startswith(".")]
+
+ filenames = filter_files_extensions(filenames, folder_paths.supported_pt_extensions)
+
+ for file_name in filenames:
+ try:
+ relative_path = os.path.relpath(os.path.join(dirpath, file_name), directory)
+ result.append(relative_path)
+ except:
+ logging.warning(f"Warning: Unable to access {file_name}. Skipping this file.")
+ continue
+
+ for d in subdirs:
+ path: str = os.path.join(dirpath, d)
+ try:
+ dirs[path] = os.path.getmtime(path)
+ except FileNotFoundError:
+ logging.warning(f"Warning: Unable to access {path}. Skipping this path.")
+ continue
+
+ return [{"name": f, "pathIndex": pathIndex} for f in result], dirs, time.perf_counter()
+
+ def get_model_previews(self, filepath: str) -> list[str | BytesIO]:
+ dirname = os.path.dirname(filepath)
+
+ if not os.path.exists(dirname):
+ return []
+
+ basename = os.path.splitext(filepath)[0]
+ match_files = glob.glob(f"{basename}.*", recursive=False)
+ image_files = filter_files_content_types(match_files, "image")
+ safetensors_file = next(filter(lambda x: x.endswith(".safetensors"), match_files), None)
+ safetensors_metadata = {}
+
+ result: list[str | BytesIO] = []
+
+ for filename in image_files:
+ _basename = os.path.splitext(filename)[0]
+ if _basename == basename:
+ result.append(filename)
+ if _basename == f"{basename}.preview":
+ result.append(filename)
+
+ if safetensors_file:
+ safetensors_filepath = os.path.join(dirname, safetensors_file)
+ header = comfy.utils.safetensors_header(safetensors_filepath, max_size=8*1024*1024)
+ if header:
+ safetensors_metadata = json.loads(header)
+ safetensors_images = safetensors_metadata.get("__metadata__", {}).get("ssmd_cover_images", None)
+ if safetensors_images:
+ safetensors_images = json.loads(safetensors_images)
+ for image in safetensors_images:
+ result.append(BytesIO(base64.b64decode(image)))
+
+ return result
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.clear_cache()
diff --git a/ComfyUI/app/user_manager.py b/ComfyUI/app/user_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..d31da5b9b89dda908e900041ef41b082dbac38eb
--- /dev/null
+++ b/ComfyUI/app/user_manager.py
@@ -0,0 +1,436 @@
+from __future__ import annotations
+import json
+import os
+import re
+import uuid
+import glob
+import shutil
+import logging
+from aiohttp import web
+from urllib import parse
+from comfy.cli_args import args
+import folder_paths
+from .app_settings import AppSettings
+from typing import TypedDict
+
+default_user = "default"
+
+
+class FileInfo(TypedDict):
+ path: str
+ size: int
+ modified: int
+
+
+def get_file_info(path: str, relative_to: str) -> FileInfo:
+ return {
+ "path": os.path.relpath(path, relative_to).replace(os.sep, '/'),
+ "size": os.path.getsize(path),
+ "modified": os.path.getmtime(path)
+ }
+
+
+class UserManager():
+ def __init__(self):
+ user_directory = folder_paths.get_user_directory()
+
+ self.settings = AppSettings(self)
+ if not os.path.exists(user_directory):
+ os.makedirs(user_directory, exist_ok=True)
+ if not args.multi_user:
+ logging.warning("****** User settings have been changed to be stored on the server instead of browser storage. ******")
+ logging.warning("****** For multi-user setups add the --multi-user CLI argument to enable multiple user profiles. ******")
+
+ if args.multi_user:
+ if os.path.isfile(self.get_users_file()):
+ with open(self.get_users_file()) as f:
+ self.users = json.load(f)
+ else:
+ self.users = {}
+ else:
+ self.users = {"default": "default"}
+
+ def get_users_file(self):
+ return os.path.join(folder_paths.get_user_directory(), "users.json")
+
+ def get_request_user_id(self, request):
+ user = "default"
+ if args.multi_user and "comfy-user" in request.headers:
+ user = request.headers["comfy-user"]
+
+ if user not in self.users:
+ raise KeyError("Unknown user: " + user)
+
+ return user
+
+ def get_request_user_filepath(self, request, file, type="userdata", create_dir=True):
+ user_directory = folder_paths.get_user_directory()
+
+ if type == "userdata":
+ root_dir = user_directory
+ else:
+ raise KeyError("Unknown filepath type:" + type)
+
+ user = self.get_request_user_id(request)
+ path = user_root = os.path.abspath(os.path.join(root_dir, user))
+
+ # prevent leaving /{type}
+ if os.path.commonpath((root_dir, user_root)) != root_dir:
+ return None
+
+ if file is not None:
+ # Check if filename is url encoded
+ if "%" in file:
+ file = parse.unquote(file)
+
+ # prevent leaving /{type}/{user}
+ path = os.path.abspath(os.path.join(user_root, file))
+ if os.path.commonpath((user_root, path)) != user_root:
+ return None
+
+ parent = os.path.split(path)[0]
+
+ if create_dir and not os.path.exists(parent):
+ os.makedirs(parent, exist_ok=True)
+
+ return path
+
+ def add_user(self, name):
+ name = name.strip()
+ if not name:
+ raise ValueError("username not provided")
+ user_id = re.sub("[^a-zA-Z0-9-_]+", '-', name)
+ user_id = user_id + "_" + str(uuid.uuid4())
+
+ self.users[user_id] = name
+
+ with open(self.get_users_file(), "w") as f:
+ json.dump(self.users, f)
+
+ return user_id
+
+ def add_routes(self, routes):
+ self.settings.add_routes(routes)
+
+ @routes.get("/users")
+ async def get_users(request):
+ if args.multi_user:
+ return web.json_response({"storage": "server", "users": self.users})
+ else:
+ user_dir = self.get_request_user_filepath(request, None, create_dir=False)
+ return web.json_response({
+ "storage": "server",
+ "migrated": os.path.exists(user_dir)
+ })
+
+ @routes.post("/users")
+ async def post_users(request):
+ body = await request.json()
+ username = body["username"]
+ if username in self.users.values():
+ return web.json_response({"error": "Duplicate username."}, status=400)
+
+ user_id = self.add_user(username)
+ return web.json_response(user_id)
+
+ @routes.get("/userdata")
+ async def listuserdata(request):
+ """
+ List user data files in a specified directory.
+
+ This endpoint allows listing files in a user's data directory, with options for recursion,
+ full file information, and path splitting.
+
+ Query Parameters:
+ - dir (required): The directory to list files from.
+ - recurse (optional): If "true", recursively list files in subdirectories.
+ - full_info (optional): If "true", return detailed file information (path, size, modified time).
+ - split (optional): If "true", split file paths into components (only applies when full_info is false).
+
+ Returns:
+ - 400: If 'dir' parameter is missing.
+ - 403: If the requested path is not allowed.
+ - 404: If the requested directory does not exist.
+ - 200: JSON response with the list of files or file information.
+
+ The response format depends on the query parameters:
+ - Default: List of relative file paths.
+ - full_info=true: List of dictionaries with file details.
+ - split=true (and full_info=false): List of lists, each containing path components.
+ """
+ directory = request.rel_url.query.get('dir', '')
+ if not directory:
+ return web.Response(status=400, text="Directory not provided")
+
+ path = self.get_request_user_filepath(request, directory)
+ if not path:
+ return web.Response(status=403, text="Invalid directory")
+
+ if not os.path.exists(path):
+ return web.Response(status=404, text="Directory not found")
+
+ recurse = request.rel_url.query.get('recurse', '').lower() == "true"
+ full_info = request.rel_url.query.get('full_info', '').lower() == "true"
+ split_path = request.rel_url.query.get('split', '').lower() == "true"
+
+ # Use different patterns based on whether we're recursing or not
+ if recurse:
+ pattern = os.path.join(glob.escape(path), '**', '*')
+ else:
+ pattern = os.path.join(glob.escape(path), '*')
+
+ def process_full_path(full_path: str) -> FileInfo | str | list[str]:
+ if full_info:
+ return get_file_info(full_path, path)
+
+ rel_path = os.path.relpath(full_path, path).replace(os.sep, '/')
+ if split_path:
+ return [rel_path] + rel_path.split('/')
+
+ return rel_path
+
+ results = [
+ process_full_path(full_path)
+ for full_path in glob.glob(pattern, recursive=recurse)
+ if os.path.isfile(full_path)
+ ]
+
+ return web.json_response(results)
+
+ @routes.get("/v2/userdata")
+ async def list_userdata_v2(request):
+ """
+ List files and directories in a user's data directory.
+
+ This endpoint provides a structured listing of contents within a specified
+ subdirectory of the user's data storage.
+
+ Query Parameters:
+ - path (optional): The relative path within the user's data directory
+ to list. Defaults to the root ('').
+
+ Returns:
+ - 400: If the requested path is invalid, outside the user's data directory, or is not a directory.
+ - 404: If the requested path does not exist.
+ - 403: If the user is invalid.
+ - 500: If there is an error reading the directory contents.
+ - 200: JSON response containing a list of file and directory objects.
+ Each object includes:
+ - name: The name of the file or directory.
+ - type: 'file' or 'directory'.
+ - path: The relative path from the user's data root.
+ - size (for files): The size in bytes.
+ - modified (for files): The last modified timestamp (Unix epoch).
+ """
+ requested_rel_path = request.rel_url.query.get('path', '')
+
+ # URL-decode the path parameter
+ try:
+ requested_rel_path = parse.unquote(requested_rel_path)
+ except Exception as e:
+ logging.warning(f"Failed to decode path parameter: {requested_rel_path}, Error: {e}")
+ return web.Response(status=400, text="Invalid characters in path parameter")
+
+
+ # Check user validity and get the absolute path for the requested directory
+ try:
+ base_user_path = self.get_request_user_filepath(request, None, create_dir=False)
+
+ if requested_rel_path:
+ target_abs_path = self.get_request_user_filepath(request, requested_rel_path, create_dir=False)
+ else:
+ target_abs_path = base_user_path
+
+ except KeyError as e:
+ # Invalid user detected by get_request_user_id inside get_request_user_filepath
+ logging.warning(f"Access denied for user: {e}")
+ return web.Response(status=403, text="Invalid user specified in request")
+
+
+ if not target_abs_path:
+ # Path traversal or other issue detected by get_request_user_filepath
+ return web.Response(status=400, text="Invalid path requested")
+
+ # Handle cases where the user directory or target path doesn't exist
+ if not os.path.exists(target_abs_path):
+ # Check if it's the base user directory that's missing (new user case)
+ if target_abs_path == base_user_path:
+ # It's okay if the base user directory doesn't exist yet, return empty list
+ return web.json_response([])
+ else:
+ # A specific subdirectory was requested but doesn't exist
+ return web.Response(status=404, text="Requested path not found")
+
+ if not os.path.isdir(target_abs_path):
+ return web.Response(status=400, text="Requested path is not a directory")
+
+ results = []
+ try:
+ for root, dirs, files in os.walk(target_abs_path, topdown=True):
+ # Process directories
+ for dir_name in dirs:
+ dir_path = os.path.join(root, dir_name)
+ rel_path = os.path.relpath(dir_path, base_user_path).replace(os.sep, '/')
+ results.append({
+ "name": dir_name,
+ "path": rel_path,
+ "type": "directory"
+ })
+
+ # Process files
+ for file_name in files:
+ file_path = os.path.join(root, file_name)
+ rel_path = os.path.relpath(file_path, base_user_path).replace(os.sep, '/')
+ entry_info = {
+ "name": file_name,
+ "path": rel_path,
+ "type": "file"
+ }
+ try:
+ stats = os.stat(file_path) # Use os.stat for potentially better performance with os.walk
+ entry_info["size"] = stats.st_size
+ entry_info["modified"] = stats.st_mtime
+ except OSError as stat_error:
+ logging.warning(f"Could not stat file {file_path}: {stat_error}")
+ pass # Include file with available info
+ results.append(entry_info)
+ except OSError as e:
+ logging.error(f"Error listing directory {target_abs_path}: {e}")
+ return web.Response(status=500, text="Error reading directory contents")
+
+ # Sort results alphabetically, directories first then files
+ results.sort(key=lambda x: (x['type'] != 'directory', x['name'].lower()))
+
+ return web.json_response(results)
+
+ def get_user_data_path(request, check_exists = False, param = "file"):
+ file = request.match_info.get(param, None)
+ if not file:
+ return web.Response(status=400)
+
+ path = self.get_request_user_filepath(request, file)
+ if not path:
+ return web.Response(status=403)
+
+ if check_exists and not os.path.exists(path):
+ return web.Response(status=404)
+
+ return path
+
+ @routes.get("/userdata/{file}")
+ async def getuserdata(request):
+ path = get_user_data_path(request, check_exists=True)
+ if not isinstance(path, str):
+ return path
+
+ return web.FileResponse(path)
+
+ @routes.post("/userdata/{file}")
+ async def post_userdata(request):
+ """
+ Upload or update a user data file.
+
+ This endpoint handles file uploads to a user's data directory, with options for
+ controlling overwrite behavior and response format.
+
+ Query Parameters:
+ - overwrite (optional): If "false", prevents overwriting existing files. Defaults to "true".
+ - full_info (optional): If "true", returns detailed file information (path, size, modified time).
+ If "false", returns only the relative file path.
+
+ Path Parameters:
+ - file: The target file path (URL encoded if necessary).
+
+ Returns:
+ - 400: If 'file' parameter is missing.
+ - 403: If the requested path is not allowed.
+ - 409: If overwrite=false and the file already exists.
+ - 200: JSON response with either:
+ - Full file information (if full_info=true)
+ - Relative file path (if full_info=false)
+
+ The request body should contain the raw file content to be written.
+ """
+ path = get_user_data_path(request)
+ if not isinstance(path, str):
+ return path
+
+ overwrite = request.query.get("overwrite", 'true') != "false"
+ full_info = request.query.get('full_info', 'false').lower() == "true"
+
+ if not overwrite and os.path.exists(path):
+ return web.Response(status=409, text="File already exists")
+
+ body = await request.read()
+
+ with open(path, "wb") as f:
+ f.write(body)
+
+ user_path = self.get_request_user_filepath(request, None)
+ if full_info:
+ resp = get_file_info(path, user_path)
+ else:
+ resp = os.path.relpath(path, user_path)
+
+ return web.json_response(resp)
+
+ @routes.delete("/userdata/{file}")
+ async def delete_userdata(request):
+ path = get_user_data_path(request, check_exists=True)
+ if not isinstance(path, str):
+ return path
+
+ os.remove(path)
+
+ return web.Response(status=204)
+
+ @routes.post("/userdata/{file}/move/{dest}")
+ async def move_userdata(request):
+ """
+ Move or rename a user data file.
+
+ This endpoint handles moving or renaming files within a user's data directory, with options for
+ controlling overwrite behavior and response format.
+
+ Path Parameters:
+ - file: The source file path (URL encoded if necessary)
+ - dest: The destination file path (URL encoded if necessary)
+
+ Query Parameters:
+ - overwrite (optional): If "false", prevents overwriting existing files. Defaults to "true".
+ - full_info (optional): If "true", returns detailed file information (path, size, modified time).
+ If "false", returns only the relative file path.
+
+ Returns:
+ - 400: If either 'file' or 'dest' parameter is missing
+ - 403: If either requested path is not allowed
+ - 404: If the source file does not exist
+ - 409: If overwrite=false and the destination file already exists
+ - 200: JSON response with either:
+ - Full file information (if full_info=true)
+ - Relative file path (if full_info=false)
+ """
+ source = get_user_data_path(request, check_exists=True)
+ if not isinstance(source, str):
+ return source
+
+ dest = get_user_data_path(request, check_exists=False, param="dest")
+ if not isinstance(source, str):
+ return dest
+
+ overwrite = request.query.get("overwrite", 'true') != "false"
+ full_info = request.query.get('full_info', 'false').lower() == "true"
+
+ if not overwrite and os.path.exists(dest):
+ return web.Response(status=409, text="File already exists")
+
+ logging.info(f"moving '{source}' -> '{dest}'")
+ shutil.move(source, dest)
+
+ user_path = self.get_request_user_filepath(request, None)
+ if full_info:
+ resp = get_file_info(dest, user_path)
+ else:
+ resp = os.path.relpath(dest, user_path)
+
+ return web.json_response(resp)
diff --git a/ComfyUI/comfy/checkpoint_pickle.py b/ComfyUI/comfy/checkpoint_pickle.py
new file mode 100644
index 0000000000000000000000000000000000000000..206551d3c1cf0d654c907534629a800196ba138b
--- /dev/null
+++ b/ComfyUI/comfy/checkpoint_pickle.py
@@ -0,0 +1,13 @@
+import pickle
+
+load = pickle.load
+
+class Empty:
+ pass
+
+class Unpickler(pickle.Unpickler):
+ def find_class(self, module, name):
+ #TODO: safe unpickle
+ if module.startswith("pytorch_lightning"):
+ return Empty
+ return super().find_class(module, name)
diff --git a/ComfyUI/comfy/cli_args.py b/ComfyUI/comfy/cli_args.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d760d524d07cc072a0687bd35791decdf4a0106
--- /dev/null
+++ b/ComfyUI/comfy/cli_args.py
@@ -0,0 +1,237 @@
+import argparse
+import enum
+import os
+import comfy.options
+
+
+class EnumAction(argparse.Action):
+ """
+ Argparse action for handling Enums
+ """
+ def __init__(self, **kwargs):
+ # Pop off the type value
+ enum_type = kwargs.pop("type", None)
+
+ # Ensure an Enum subclass is provided
+ if enum_type is None:
+ raise ValueError("type must be assigned an Enum when using EnumAction")
+ if not issubclass(enum_type, enum.Enum):
+ raise TypeError("type must be an Enum when using EnumAction")
+
+ # Generate choices from the Enum
+ choices = tuple(e.value for e in enum_type)
+ kwargs.setdefault("choices", choices)
+ kwargs.setdefault("metavar", f"[{','.join(list(choices))}]")
+
+ super(EnumAction, self).__init__(**kwargs)
+
+ self._enum = enum_type
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ # Convert value back into an Enum
+ value = self._enum(values)
+ setattr(namespace, self.dest, value)
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nargs="?", const="0.0.0.0,::", help="Specify the IP address to listen on (default: 127.0.0.1). You can give a list of ip addresses by separating them with a comma like: 127.2.2.2,127.3.3.3 If --listen is provided without an argument, it defaults to 0.0.0.0,:: (listens on all ipv4 and ipv6)")
+parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
+parser.add_argument("--tls-keyfile", type=str, help="Path to TLS (SSL) key file. Enables TLS, makes app accessible at https://... requires --tls-certfile to function")
+parser.add_argument("--tls-certfile", type=str, help="Path to TLS (SSL) certificate file. Enables TLS, makes app accessible at https://... requires --tls-keyfile to function")
+parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
+parser.add_argument("--max-upload-size", type=float, default=100, help="Set the maximum upload size in MB.")
+
+parser.add_argument("--base-directory", type=str, default=None, help="Set the ComfyUI base directory for models, custom_nodes, input, output, temp, and user directories.")
+parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.")
+parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory. Overrides --base-directory.")
+parser.add_argument("--temp-directory", type=str, default=None, help="Set the ComfyUI temp directory (default is in the ComfyUI directory). Overrides --base-directory.")
+parser.add_argument("--input-directory", type=str, default=None, help="Set the ComfyUI input directory. Overrides --base-directory.")
+parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.")
+parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.")
+parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use. All other devices will not be visible.")
+parser.add_argument("--default-device", type=int, default=None, metavar="DEFAULT_DEVICE_ID", help="Set the id of the default device, all other devices will stay visible.")
+cm_group = parser.add_mutually_exclusive_group()
+cm_group.add_argument("--cuda-malloc", action="store_true", help="Enable cudaMallocAsync (enabled by default for torch 2.0 and up).")
+cm_group.add_argument("--disable-cuda-malloc", action="store_true", help="Disable cudaMallocAsync.")
+
+
+fp_group = parser.add_mutually_exclusive_group()
+fp_group.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).")
+fp_group.add_argument("--force-fp16", action="store_true", help="Force fp16.")
+
+fpunet_group = parser.add_mutually_exclusive_group()
+fpunet_group.add_argument("--fp32-unet", action="store_true", help="Run the diffusion model in fp32.")
+fpunet_group.add_argument("--fp64-unet", action="store_true", help="Run the diffusion model in fp64.")
+fpunet_group.add_argument("--bf16-unet", action="store_true", help="Run the diffusion model in bf16.")
+fpunet_group.add_argument("--fp16-unet", action="store_true", help="Run the diffusion model in fp16")
+fpunet_group.add_argument("--fp8_e4m3fn-unet", action="store_true", help="Store unet weights in fp8_e4m3fn.")
+fpunet_group.add_argument("--fp8_e5m2-unet", action="store_true", help="Store unet weights in fp8_e5m2.")
+fpunet_group.add_argument("--fp8_e8m0fnu-unet", action="store_true", help="Store unet weights in fp8_e8m0fnu.")
+
+fpvae_group = parser.add_mutually_exclusive_group()
+fpvae_group.add_argument("--fp16-vae", action="store_true", help="Run the VAE in fp16, might cause black images.")
+fpvae_group.add_argument("--fp32-vae", action="store_true", help="Run the VAE in full precision fp32.")
+fpvae_group.add_argument("--bf16-vae", action="store_true", help="Run the VAE in bf16.")
+
+parser.add_argument("--cpu-vae", action="store_true", help="Run the VAE on the CPU.")
+
+fpte_group = parser.add_mutually_exclusive_group()
+fpte_group.add_argument("--fp8_e4m3fn-text-enc", action="store_true", help="Store text encoder weights in fp8 (e4m3fn variant).")
+fpte_group.add_argument("--fp8_e5m2-text-enc", action="store_true", help="Store text encoder weights in fp8 (e5m2 variant).")
+fpte_group.add_argument("--fp16-text-enc", action="store_true", help="Store text encoder weights in fp16.")
+fpte_group.add_argument("--fp32-text-enc", action="store_true", help="Store text encoder weights in fp32.")
+fpte_group.add_argument("--bf16-text-enc", action="store_true", help="Store text encoder weights in bf16.")
+
+parser.add_argument("--force-channels-last", action="store_true", help="Force channels last format when inferencing the models.")
+
+parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.")
+
+parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
+parser.add_argument("--disable-ipex-optimize", action="store_true", help="Disables ipex.optimize default when loading models with Intel's Extension for Pytorch.")
+parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")
+
+class LatentPreviewMethod(enum.Enum):
+ NoPreviews = "none"
+ Auto = "auto"
+ Latent2RGB = "latent2rgb"
+ TAESD = "taesd"
+
+parser.add_argument("--preview-method", type=LatentPreviewMethod, default=LatentPreviewMethod.NoPreviews, help="Default preview method for sampler nodes.", action=EnumAction)
+
+parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")
+
+cache_group = parser.add_mutually_exclusive_group()
+cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
+cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
+cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
+
+attn_group = parser.add_mutually_exclusive_group()
+attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
+attn_group.add_argument("--use-quad-cross-attention", action="store_true", help="Use the sub-quadratic cross attention optimization . Ignored when xformers is used.")
+attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.")
+attn_group.add_argument("--use-sage-attention", action="store_true", help="Use sage attention.")
+attn_group.add_argument("--use-flash-attention", action="store_true", help="Use FlashAttention.")
+
+parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.")
+
+upcast = parser.add_mutually_exclusive_group()
+upcast.add_argument("--force-upcast-attention", action="store_true", help="Force enable attention upcasting, please report if it fixes black images.")
+upcast.add_argument("--dont-upcast-attention", action="store_true", help="Disable all upcasting of attention. Should be unnecessary except for debugging.")
+
+
+vram_group = parser.add_mutually_exclusive_group()
+vram_group.add_argument("--gpu-only", action="store_true", help="Store and run everything (text encoders/CLIP models, etc... on the GPU).")
+vram_group.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.")
+vram_group.add_argument("--normalvram", action="store_true", help="Used to force normal vram use if lowvram gets automatically enabled.")
+vram_group.add_argument("--lowvram", action="store_true", help="Split the unet in parts to use less vram.")
+vram_group.add_argument("--novram", action="store_true", help="When lowvram isn't enough.")
+vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
+
+parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reserved depending on your OS.")
+
+parser.add_argument("--async-offload", action="store_true", help="Use async weight offloading.")
+
+parser.add_argument("--default-hashing-function", type=str, choices=['md5', 'sha1', 'sha256', 'sha512'], default='sha256', help="Allows you to choose the hash function to use for duplicate filename / contents comparison. Default is sha256.")
+
+parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.")
+parser.add_argument("--deterministic", action="store_true", help="Make pytorch use slower deterministic algorithms when it can. Note that this might not make images deterministic in all cases.")
+
+class PerformanceFeature(enum.Enum):
+ Fp16Accumulation = "fp16_accumulation"
+ Fp8MatrixMultiplication = "fp8_matrix_mult"
+ CublasOps = "cublas_ops"
+
+parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: fp16_accumulation fp8_matrix_mult cublas_ops")
+
+parser.add_argument("--mmap-torch-files", action="store_true", help="Use mmap when loading ckpt/pt files.")
+parser.add_argument("--disable-mmap", action="store_true", help="Don't use mmap when loading safetensors.")
+
+parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
+parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.")
+parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build: Enable convenient things that most people using the standalone windows build will probably enjoy (like auto opening the page on startup).")
+
+parser.add_argument("--disable-metadata", action="store_true", help="Disable saving prompt metadata in files.")
+parser.add_argument("--disable-all-custom-nodes", action="store_true", help="Disable loading all custom nodes.")
+parser.add_argument("--whitelist-custom-nodes", type=str, nargs='+', default=[], help="Specify custom node folders to load even when --disable-all-custom-nodes is enabled.")
+parser.add_argument("--disable-api-nodes", action="store_true", help="Disable loading all api nodes.")
+
+parser.add_argument("--multi-user", action="store_true", help="Enables per-user storage.")
+
+parser.add_argument("--verbose", default='INFO', const='DEBUG', nargs="?", choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Set the logging level')
+parser.add_argument("--log-stdout", action="store_true", help="Send normal process output to stdout instead of stderr (default).")
+
+# The default built-in provider hosted under web/
+DEFAULT_VERSION_STRING = "comfyanonymous/ComfyUI@latest"
+
+parser.add_argument(
+ "--front-end-version",
+ type=str,
+ default=DEFAULT_VERSION_STRING,
+ help="""
+ Specifies the version of the frontend to be used. This command needs internet connectivity to query and
+ download available frontend implementations from GitHub releases.
+
+ The version string should be in the format of:
+ [repoOwner]/[repoName]@[version]
+ where version is one of: "latest" or a valid version number (e.g. "1.0.0")
+ """,
+)
+
+def is_valid_directory(path: str) -> str:
+ """Validate if the given path is a directory, and check permissions."""
+ if not os.path.exists(path):
+ raise argparse.ArgumentTypeError(f"The path '{path}' does not exist.")
+ if not os.path.isdir(path):
+ raise argparse.ArgumentTypeError(f"'{path}' is not a directory.")
+ if not os.access(path, os.R_OK):
+ raise argparse.ArgumentTypeError(f"You do not have read permissions for '{path}'.")
+ return path
+
+parser.add_argument(
+ "--front-end-root",
+ type=is_valid_directory,
+ default=None,
+ help="The local filesystem path to the directory where the frontend is located. Overrides --front-end-version.",
+)
+
+parser.add_argument("--user-directory", type=is_valid_directory, default=None, help="Set the ComfyUI user directory with an absolute path. Overrides --base-directory.")
+
+parser.add_argument("--enable-compress-response-body", action="store_true", help="Enable compressing response body.")
+
+parser.add_argument(
+ "--comfy-api-base",
+ type=str,
+ default="https://api.comfy.org",
+ help="Set the base URL for the ComfyUI API. (default: https://api.comfy.org)",
+)
+
+database_default_path = os.path.abspath(
+ os.path.join(os.path.dirname(__file__), "..", "user", "comfyui.db")
+)
+parser.add_argument("--database-url", type=str, default=f"sqlite:///{database_default_path}", help="Specify the database URL, e.g. for an in-memory database you can use 'sqlite:///:memory:'.")
+
+if comfy.options.args_parsing:
+ args = parser.parse_args()
+else:
+ args = parser.parse_args([])
+
+if args.windows_standalone_build:
+ args.auto_launch = True
+
+if args.disable_auto_launch:
+ args.auto_launch = False
+
+if args.force_fp16:
+ args.fp16_unet = True
+
+
+# '--fast' is not provided, use an empty set
+if args.fast is None:
+ args.fast = set()
+# '--fast' is provided with an empty list, enable all optimizations
+elif args.fast == []:
+ args.fast = set(PerformanceFeature)
+# '--fast' is provided with a list of performance features, use that list
+else:
+ args.fast = set(args.fast)
diff --git a/ComfyUI/comfy/clip_config_bigg.json b/ComfyUI/comfy/clip_config_bigg.json
new file mode 100644
index 0000000000000000000000000000000000000000..35261deef14a68fcc6c5b1fc32914b5c102781a9
--- /dev/null
+++ b/ComfyUI/comfy/clip_config_bigg.json
@@ -0,0 +1,23 @@
+{
+ "architectures": [
+ "CLIPTextModel"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "dropout": 0.0,
+ "eos_token_id": 49407,
+ "hidden_act": "gelu",
+ "hidden_size": 1280,
+ "initializer_factor": 1.0,
+ "initializer_range": 0.02,
+ "intermediate_size": 5120,
+ "layer_norm_eps": 1e-05,
+ "max_position_embeddings": 77,
+ "model_type": "clip_text_model",
+ "num_attention_heads": 20,
+ "num_hidden_layers": 32,
+ "pad_token_id": 1,
+ "projection_dim": 1280,
+ "torch_dtype": "float32",
+ "vocab_size": 49408
+}
diff --git a/ComfyUI/comfy/clip_model.py b/ComfyUI/comfy/clip_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8294d4832e06adb3611a3b9311c1bca06297df7
--- /dev/null
+++ b/ComfyUI/comfy/clip_model.py
@@ -0,0 +1,244 @@
+import torch
+from comfy.ldm.modules.attention import optimized_attention_for_device
+import comfy.ops
+
+class CLIPAttention(torch.nn.Module):
+ def __init__(self, embed_dim, heads, dtype, device, operations):
+ super().__init__()
+
+ self.heads = heads
+ self.q_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device)
+ self.k_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device)
+ self.v_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device)
+
+ self.out_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device)
+
+ def forward(self, x, mask=None, optimized_attention=None):
+ q = self.q_proj(x)
+ k = self.k_proj(x)
+ v = self.v_proj(x)
+
+ out = optimized_attention(q, k, v, self.heads, mask)
+ return self.out_proj(out)
+
+ACTIVATIONS = {"quick_gelu": lambda a: a * torch.sigmoid(1.702 * a),
+ "gelu": torch.nn.functional.gelu,
+ "gelu_pytorch_tanh": lambda a: torch.nn.functional.gelu(a, approximate="tanh"),
+}
+
+class CLIPMLP(torch.nn.Module):
+ def __init__(self, embed_dim, intermediate_size, activation, dtype, device, operations):
+ super().__init__()
+ self.fc1 = operations.Linear(embed_dim, intermediate_size, bias=True, dtype=dtype, device=device)
+ self.activation = ACTIVATIONS[activation]
+ self.fc2 = operations.Linear(intermediate_size, embed_dim, bias=True, dtype=dtype, device=device)
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.activation(x)
+ x = self.fc2(x)
+ return x
+
+class CLIPLayer(torch.nn.Module):
+ def __init__(self, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations):
+ super().__init__()
+ self.layer_norm1 = operations.LayerNorm(embed_dim, dtype=dtype, device=device)
+ self.self_attn = CLIPAttention(embed_dim, heads, dtype, device, operations)
+ self.layer_norm2 = operations.LayerNorm(embed_dim, dtype=dtype, device=device)
+ self.mlp = CLIPMLP(embed_dim, intermediate_size, intermediate_activation, dtype, device, operations)
+
+ def forward(self, x, mask=None, optimized_attention=None):
+ x += self.self_attn(self.layer_norm1(x), mask, optimized_attention)
+ x += self.mlp(self.layer_norm2(x))
+ return x
+
+
+class CLIPEncoder(torch.nn.Module):
+ def __init__(self, num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations):
+ super().__init__()
+ self.layers = torch.nn.ModuleList([CLIPLayer(embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations) for i in range(num_layers)])
+
+ def forward(self, x, mask=None, intermediate_output=None):
+ optimized_attention = optimized_attention_for_device(x.device, mask=mask is not None, small_input=True)
+
+ if intermediate_output is not None:
+ if intermediate_output < 0:
+ intermediate_output = len(self.layers) + intermediate_output
+
+ intermediate = None
+ for i, l in enumerate(self.layers):
+ x = l(x, mask, optimized_attention)
+ if i == intermediate_output:
+ intermediate = x.clone()
+ return x, intermediate
+
+class CLIPEmbeddings(torch.nn.Module):
+ def __init__(self, embed_dim, vocab_size=49408, num_positions=77, dtype=None, device=None, operations=None):
+ super().__init__()
+ self.token_embedding = operations.Embedding(vocab_size, embed_dim, dtype=dtype, device=device)
+ self.position_embedding = operations.Embedding(num_positions, embed_dim, dtype=dtype, device=device)
+
+ def forward(self, input_tokens, dtype=torch.float32):
+ return self.token_embedding(input_tokens, out_dtype=dtype) + comfy.ops.cast_to(self.position_embedding.weight, dtype=dtype, device=input_tokens.device)
+
+
+class CLIPTextModel_(torch.nn.Module):
+ def __init__(self, config_dict, dtype, device, operations):
+ num_layers = config_dict["num_hidden_layers"]
+ embed_dim = config_dict["hidden_size"]
+ heads = config_dict["num_attention_heads"]
+ intermediate_size = config_dict["intermediate_size"]
+ intermediate_activation = config_dict["hidden_act"]
+ num_positions = config_dict["max_position_embeddings"]
+ self.eos_token_id = config_dict["eos_token_id"]
+
+ super().__init__()
+ self.embeddings = CLIPEmbeddings(embed_dim, num_positions=num_positions, dtype=dtype, device=device, operations=operations)
+ self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations)
+ self.final_layer_norm = operations.LayerNorm(embed_dim, dtype=dtype, device=device)
+
+ def forward(self, input_tokens=None, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=torch.float32):
+ if embeds is not None:
+ x = embeds + comfy.ops.cast_to(self.embeddings.position_embedding.weight, dtype=dtype, device=embeds.device)
+ else:
+ x = self.embeddings(input_tokens, dtype=dtype)
+
+ mask = None
+ if attention_mask is not None:
+ mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, attention_mask.shape[-1], attention_mask.shape[-1])
+ mask = mask.masked_fill(mask.to(torch.bool), -torch.finfo(x.dtype).max)
+
+ causal_mask = torch.full((x.shape[1], x.shape[1]), -torch.finfo(x.dtype).max, dtype=x.dtype, device=x.device).triu_(1)
+
+ if mask is not None:
+ mask += causal_mask
+ else:
+ mask = causal_mask
+
+ x, i = self.encoder(x, mask=mask, intermediate_output=intermediate_output)
+ x = self.final_layer_norm(x)
+ if i is not None and final_layer_norm_intermediate:
+ i = self.final_layer_norm(i)
+
+ if num_tokens is not None:
+ pooled_output = x[list(range(x.shape[0])), list(map(lambda a: a - 1, num_tokens))]
+ else:
+ pooled_output = x[torch.arange(x.shape[0], device=x.device), (torch.round(input_tokens).to(dtype=torch.int, device=x.device) == self.eos_token_id).int().argmax(dim=-1),]
+ return x, i, pooled_output
+
+class CLIPTextModel(torch.nn.Module):
+ def __init__(self, config_dict, dtype, device, operations):
+ super().__init__()
+ self.num_layers = config_dict["num_hidden_layers"]
+ self.text_model = CLIPTextModel_(config_dict, dtype, device, operations)
+ embed_dim = config_dict["hidden_size"]
+ self.text_projection = operations.Linear(embed_dim, embed_dim, bias=False, dtype=dtype, device=device)
+ self.dtype = dtype
+
+ def get_input_embeddings(self):
+ return self.text_model.embeddings.token_embedding
+
+ def set_input_embeddings(self, embeddings):
+ self.text_model.embeddings.token_embedding = embeddings
+
+ def forward(self, *args, **kwargs):
+ x = self.text_model(*args, **kwargs)
+ out = self.text_projection(x[2])
+ return (x[0], x[1], out, x[2])
+
+
+class CLIPVisionEmbeddings(torch.nn.Module):
+ def __init__(self, embed_dim, num_channels=3, patch_size=14, image_size=224, model_type="", dtype=None, device=None, operations=None):
+ super().__init__()
+
+ num_patches = (image_size // patch_size) ** 2
+ if model_type == "siglip_vision_model":
+ self.class_embedding = None
+ patch_bias = True
+ else:
+ num_patches = num_patches + 1
+ self.class_embedding = torch.nn.Parameter(torch.empty(embed_dim, dtype=dtype, device=device))
+ patch_bias = False
+
+ self.patch_embedding = operations.Conv2d(
+ in_channels=num_channels,
+ out_channels=embed_dim,
+ kernel_size=patch_size,
+ stride=patch_size,
+ bias=patch_bias,
+ dtype=dtype,
+ device=device
+ )
+
+ self.position_embedding = operations.Embedding(num_patches, embed_dim, dtype=dtype, device=device)
+
+ def forward(self, pixel_values):
+ embeds = self.patch_embedding(pixel_values).flatten(2).transpose(1, 2)
+ if self.class_embedding is not None:
+ embeds = torch.cat([comfy.ops.cast_to_input(self.class_embedding, embeds).expand(pixel_values.shape[0], 1, -1), embeds], dim=1)
+ return embeds + comfy.ops.cast_to_input(self.position_embedding.weight, embeds)
+
+
+class CLIPVision(torch.nn.Module):
+ def __init__(self, config_dict, dtype, device, operations):
+ super().__init__()
+ num_layers = config_dict["num_hidden_layers"]
+ embed_dim = config_dict["hidden_size"]
+ heads = config_dict["num_attention_heads"]
+ intermediate_size = config_dict["intermediate_size"]
+ intermediate_activation = config_dict["hidden_act"]
+ model_type = config_dict["model_type"]
+
+ self.embeddings = CLIPVisionEmbeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], model_type=model_type, dtype=dtype, device=device, operations=operations)
+ if model_type == "siglip_vision_model":
+ self.pre_layrnorm = lambda a: a
+ self.output_layernorm = True
+ else:
+ self.pre_layrnorm = operations.LayerNorm(embed_dim)
+ self.output_layernorm = False
+ self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations)
+ self.post_layernorm = operations.LayerNorm(embed_dim)
+
+ def forward(self, pixel_values, attention_mask=None, intermediate_output=None):
+ x = self.embeddings(pixel_values)
+ x = self.pre_layrnorm(x)
+ #TODO: attention_mask?
+ x, i = self.encoder(x, mask=None, intermediate_output=intermediate_output)
+ if self.output_layernorm:
+ x = self.post_layernorm(x)
+ pooled_output = x
+ else:
+ pooled_output = self.post_layernorm(x[:, 0, :])
+ return x, i, pooled_output
+
+class LlavaProjector(torch.nn.Module):
+ def __init__(self, in_dim, out_dim, dtype, device, operations):
+ super().__init__()
+ self.linear_1 = operations.Linear(in_dim, out_dim, bias=True, device=device, dtype=dtype)
+ self.linear_2 = operations.Linear(out_dim, out_dim, bias=True, device=device, dtype=dtype)
+
+ def forward(self, x):
+ return self.linear_2(torch.nn.functional.gelu(self.linear_1(x[:, 1:])))
+
+class CLIPVisionModelProjection(torch.nn.Module):
+ def __init__(self, config_dict, dtype, device, operations):
+ super().__init__()
+ self.vision_model = CLIPVision(config_dict, dtype, device, operations)
+ if "projection_dim" in config_dict:
+ self.visual_projection = operations.Linear(config_dict["hidden_size"], config_dict["projection_dim"], bias=False)
+ else:
+ self.visual_projection = lambda a: a
+
+ if "llava3" == config_dict.get("projector_type", None):
+ self.multi_modal_projector = LlavaProjector(config_dict["hidden_size"], 4096, dtype, device, operations)
+ else:
+ self.multi_modal_projector = None
+
+ def forward(self, *args, **kwargs):
+ x = self.vision_model(*args, **kwargs)
+ out = self.visual_projection(x[2])
+ projected = None
+ if self.multi_modal_projector is not None:
+ projected = self.multi_modal_projector(x[1])
+
+ return (x[0], x[1], out, projected)
diff --git a/ComfyUI/comfy/clip_vision.py b/ComfyUI/comfy/clip_vision.py
new file mode 100644
index 0000000000000000000000000000000000000000..00aab9164e5ec2060e34f22df9d1098cfe7b7e47
--- /dev/null
+++ b/ComfyUI/comfy/clip_vision.py
@@ -0,0 +1,148 @@
+from .utils import load_torch_file, transformers_convert, state_dict_prefix_replace
+import os
+import torch
+import json
+import logging
+
+import comfy.ops
+import comfy.model_patcher
+import comfy.model_management
+import comfy.utils
+import comfy.clip_model
+import comfy.image_encoders.dino2
+
+class Output:
+ def __getitem__(self, key):
+ return getattr(self, key)
+ def __setitem__(self, key, item):
+ setattr(self, key, item)
+
+def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711], crop=True):
+ image = image[:, :, :, :3] if image.shape[3] > 3 else image
+ mean = torch.tensor(mean, device=image.device, dtype=image.dtype)
+ std = torch.tensor(std, device=image.device, dtype=image.dtype)
+ image = image.movedim(-1, 1)
+ if not (image.shape[2] == size and image.shape[3] == size):
+ if crop:
+ scale = (size / min(image.shape[2], image.shape[3]))
+ scale_size = (round(scale * image.shape[2]), round(scale * image.shape[3]))
+ else:
+ scale_size = (size, size)
+
+ image = torch.nn.functional.interpolate(image, size=scale_size, mode="bicubic", antialias=True)
+ h = (image.shape[2] - size)//2
+ w = (image.shape[3] - size)//2
+ image = image[:,:,h:h+size,w:w+size]
+ image = torch.clip((255. * image), 0, 255).round() / 255.0
+ return (image - mean.view([3,1,1])) / std.view([3,1,1])
+
+IMAGE_ENCODERS = {
+ "clip_vision_model": comfy.clip_model.CLIPVisionModelProjection,
+ "siglip_vision_model": comfy.clip_model.CLIPVisionModelProjection,
+ "dinov2": comfy.image_encoders.dino2.Dinov2Model,
+}
+
+class ClipVisionModel():
+ def __init__(self, json_config):
+ with open(json_config) as f:
+ config = json.load(f)
+
+ self.image_size = config.get("image_size", 224)
+ self.image_mean = config.get("image_mean", [0.48145466, 0.4578275, 0.40821073])
+ self.image_std = config.get("image_std", [0.26862954, 0.26130258, 0.27577711])
+ model_class = IMAGE_ENCODERS.get(config.get("model_type", "clip_vision_model"))
+ self.load_device = comfy.model_management.text_encoder_device()
+ offload_device = comfy.model_management.text_encoder_offload_device()
+ self.dtype = comfy.model_management.text_encoder_dtype(self.load_device)
+ self.model = model_class(config, self.dtype, offload_device, comfy.ops.manual_cast)
+ self.model.eval()
+
+ self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
+
+ def load_sd(self, sd):
+ return self.model.load_state_dict(sd, strict=False)
+
+ def get_sd(self):
+ return self.model.state_dict()
+
+ def encode_image(self, image, crop=True):
+ comfy.model_management.load_model_gpu(self.patcher)
+ pixel_values = clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=crop).float()
+ out = self.model(pixel_values=pixel_values, intermediate_output=-2)
+
+ outputs = Output()
+ outputs["last_hidden_state"] = out[0].to(comfy.model_management.intermediate_device())
+ outputs["image_embeds"] = out[2].to(comfy.model_management.intermediate_device())
+ outputs["penultimate_hidden_states"] = out[1].to(comfy.model_management.intermediate_device())
+ outputs["mm_projected"] = out[3]
+ return outputs
+
+def convert_to_transformers(sd, prefix):
+ sd_k = sd.keys()
+ if "{}transformer.resblocks.0.attn.in_proj_weight".format(prefix) in sd_k:
+ keys_to_replace = {
+ "{}class_embedding".format(prefix): "vision_model.embeddings.class_embedding",
+ "{}conv1.weight".format(prefix): "vision_model.embeddings.patch_embedding.weight",
+ "{}positional_embedding".format(prefix): "vision_model.embeddings.position_embedding.weight",
+ "{}ln_post.bias".format(prefix): "vision_model.post_layernorm.bias",
+ "{}ln_post.weight".format(prefix): "vision_model.post_layernorm.weight",
+ "{}ln_pre.bias".format(prefix): "vision_model.pre_layrnorm.bias",
+ "{}ln_pre.weight".format(prefix): "vision_model.pre_layrnorm.weight",
+ }
+
+ for x in keys_to_replace:
+ if x in sd_k:
+ sd[keys_to_replace[x]] = sd.pop(x)
+
+ if "{}proj".format(prefix) in sd_k:
+ sd['visual_projection.weight'] = sd.pop("{}proj".format(prefix)).transpose(0, 1)
+
+ sd = transformers_convert(sd, prefix, "vision_model.", 48)
+ else:
+ replace_prefix = {prefix: ""}
+ sd = state_dict_prefix_replace(sd, replace_prefix)
+ return sd
+
+def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
+ if convert_keys:
+ sd = convert_to_transformers(sd, prefix)
+ if "vision_model.encoder.layers.47.layer_norm1.weight" in sd:
+ json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_g.json")
+ elif "vision_model.encoder.layers.30.layer_norm1.weight" in sd:
+ json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json")
+ elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd:
+ embed_shape = sd["vision_model.embeddings.position_embedding.weight"].shape[0]
+ if sd["vision_model.encoder.layers.0.layer_norm1.weight"].shape[0] == 1152:
+ if embed_shape == 729:
+ json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_384.json")
+ elif embed_shape == 1024:
+ json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_512.json")
+ elif embed_shape == 577:
+ if "multi_modal_projector.linear_1.bias" in sd:
+ json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336_llava.json")
+ else:
+ json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336.json")
+ else:
+ json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")
+ elif "embeddings.patch_embeddings.projection.weight" in sd:
+ json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_giant.json")
+ else:
+ return None
+
+ clip = ClipVisionModel(json_config)
+ m, u = clip.load_sd(sd)
+ if len(m) > 0:
+ logging.warning("missing clip vision: {}".format(m))
+ u = set(u)
+ keys = list(sd.keys())
+ for k in keys:
+ if k not in u:
+ sd.pop(k)
+ return clip
+
+def load(ckpt_path):
+ sd = load_torch_file(ckpt_path)
+ if "visual.transformer.resblocks.0.attn.in_proj_weight" in sd:
+ return load_clipvision_from_sd(sd, prefix="visual.", convert_keys=True)
+ else:
+ return load_clipvision_from_sd(sd)
diff --git a/ComfyUI/comfy/clip_vision_config_g.json b/ComfyUI/comfy/clip_vision_config_g.json
new file mode 100644
index 0000000000000000000000000000000000000000..708e7e21ac3513a719d6a49e88e756f5ef7e2c8d
--- /dev/null
+++ b/ComfyUI/comfy/clip_vision_config_g.json
@@ -0,0 +1,18 @@
+{
+ "attention_dropout": 0.0,
+ "dropout": 0.0,
+ "hidden_act": "gelu",
+ "hidden_size": 1664,
+ "image_size": 224,
+ "initializer_factor": 1.0,
+ "initializer_range": 0.02,
+ "intermediate_size": 8192,
+ "layer_norm_eps": 1e-05,
+ "model_type": "clip_vision_model",
+ "num_attention_heads": 16,
+ "num_channels": 3,
+ "num_hidden_layers": 48,
+ "patch_size": 14,
+ "projection_dim": 1280,
+ "torch_dtype": "float32"
+}
diff --git a/ComfyUI/comfy/clip_vision_config_h.json b/ComfyUI/comfy/clip_vision_config_h.json
new file mode 100644
index 0000000000000000000000000000000000000000..bb71be419a4be0ad5c8c157850de032a65593cb9
--- /dev/null
+++ b/ComfyUI/comfy/clip_vision_config_h.json
@@ -0,0 +1,18 @@
+{
+ "attention_dropout": 0.0,
+ "dropout": 0.0,
+ "hidden_act": "gelu",
+ "hidden_size": 1280,
+ "image_size": 224,
+ "initializer_factor": 1.0,
+ "initializer_range": 0.02,
+ "intermediate_size": 5120,
+ "layer_norm_eps": 1e-05,
+ "model_type": "clip_vision_model",
+ "num_attention_heads": 16,
+ "num_channels": 3,
+ "num_hidden_layers": 32,
+ "patch_size": 14,
+ "projection_dim": 1024,
+ "torch_dtype": "float32"
+}
diff --git a/ComfyUI/comfy/clip_vision_config_vitl.json b/ComfyUI/comfy/clip_vision_config_vitl.json
new file mode 100644
index 0000000000000000000000000000000000000000..c59b8ed5a4c1f41fbcc9e6811d2c7dfe44273de7
--- /dev/null
+++ b/ComfyUI/comfy/clip_vision_config_vitl.json
@@ -0,0 +1,18 @@
+{
+ "attention_dropout": 0.0,
+ "dropout": 0.0,
+ "hidden_act": "quick_gelu",
+ "hidden_size": 1024,
+ "image_size": 224,
+ "initializer_factor": 1.0,
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "layer_norm_eps": 1e-05,
+ "model_type": "clip_vision_model",
+ "num_attention_heads": 16,
+ "num_channels": 3,
+ "num_hidden_layers": 24,
+ "patch_size": 14,
+ "projection_dim": 768,
+ "torch_dtype": "float32"
+}
diff --git a/ComfyUI/comfy/clip_vision_config_vitl_336.json b/ComfyUI/comfy/clip_vision_config_vitl_336.json
new file mode 100644
index 0000000000000000000000000000000000000000..f26945273d99e88f207d64dcec78feee63b4b625
--- /dev/null
+++ b/ComfyUI/comfy/clip_vision_config_vitl_336.json
@@ -0,0 +1,18 @@
+{
+ "attention_dropout": 0.0,
+ "dropout": 0.0,
+ "hidden_act": "quick_gelu",
+ "hidden_size": 1024,
+ "image_size": 336,
+ "initializer_factor": 1.0,
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "layer_norm_eps": 1e-5,
+ "model_type": "clip_vision_model",
+ "num_attention_heads": 16,
+ "num_channels": 3,
+ "num_hidden_layers": 24,
+ "patch_size": 14,
+ "projection_dim": 768,
+ "torch_dtype": "float32"
+}
diff --git a/ComfyUI/comfy/clip_vision_config_vitl_336_llava.json b/ComfyUI/comfy/clip_vision_config_vitl_336_llava.json
new file mode 100644
index 0000000000000000000000000000000000000000..f23a50d8b77fa29de2af621fb50c5825cf9b1a86
--- /dev/null
+++ b/ComfyUI/comfy/clip_vision_config_vitl_336_llava.json
@@ -0,0 +1,19 @@
+{
+ "attention_dropout": 0.0,
+ "dropout": 0.0,
+ "hidden_act": "quick_gelu",
+ "hidden_size": 1024,
+ "image_size": 336,
+ "initializer_factor": 1.0,
+ "initializer_range": 0.02,
+ "intermediate_size": 4096,
+ "layer_norm_eps": 1e-5,
+ "model_type": "clip_vision_model",
+ "num_attention_heads": 16,
+ "num_channels": 3,
+ "num_hidden_layers": 24,
+ "patch_size": 14,
+ "projection_dim": 768,
+ "projector_type": "llava3",
+ "torch_dtype": "float32"
+}
diff --git a/ComfyUI/comfy/clip_vision_siglip_384.json b/ComfyUI/comfy/clip_vision_siglip_384.json
new file mode 100644
index 0000000000000000000000000000000000000000..532e03ac181d8849a7202445d42565f01441177b
--- /dev/null
+++ b/ComfyUI/comfy/clip_vision_siglip_384.json
@@ -0,0 +1,13 @@
+{
+ "num_channels": 3,
+ "hidden_act": "gelu_pytorch_tanh",
+ "hidden_size": 1152,
+ "image_size": 384,
+ "intermediate_size": 4304,
+ "model_type": "siglip_vision_model",
+ "num_attention_heads": 16,
+ "num_hidden_layers": 27,
+ "patch_size": 14,
+ "image_mean": [0.5, 0.5, 0.5],
+ "image_std": [0.5, 0.5, 0.5]
+}
diff --git a/ComfyUI/comfy/clip_vision_siglip_512.json b/ComfyUI/comfy/clip_vision_siglip_512.json
new file mode 100644
index 0000000000000000000000000000000000000000..7fb93ce15e6da3ce7653a7a91ab278d707a096b4
--- /dev/null
+++ b/ComfyUI/comfy/clip_vision_siglip_512.json
@@ -0,0 +1,13 @@
+{
+ "num_channels": 3,
+ "hidden_act": "gelu_pytorch_tanh",
+ "hidden_size": 1152,
+ "image_size": 512,
+ "intermediate_size": 4304,
+ "model_type": "siglip_vision_model",
+ "num_attention_heads": 16,
+ "num_hidden_layers": 27,
+ "patch_size": 16,
+ "image_mean": [0.5, 0.5, 0.5],
+ "image_std": [0.5, 0.5, 0.5]
+}
diff --git a/ComfyUI/comfy/conds.py b/ComfyUI/comfy/conds.py
new file mode 100644
index 0000000000000000000000000000000000000000..2af2a43a3630e50daa357f401a5e4157610a5d65
--- /dev/null
+++ b/ComfyUI/comfy/conds.py
@@ -0,0 +1,130 @@
+import torch
+import math
+import comfy.utils
+
+
+class CONDRegular:
+ def __init__(self, cond):
+ self.cond = cond
+
+ def _copy_with(self, cond):
+ return self.__class__(cond)
+
+ def process_cond(self, batch_size, device, **kwargs):
+ return self._copy_with(comfy.utils.repeat_to_batch_size(self.cond, batch_size).to(device))
+
+ def can_concat(self, other):
+ if self.cond.shape != other.cond.shape:
+ return False
+ return True
+
+ def concat(self, others):
+ conds = [self.cond]
+ for x in others:
+ conds.append(x.cond)
+ return torch.cat(conds)
+
+ def size(self):
+ return list(self.cond.size())
+
+
+class CONDNoiseShape(CONDRegular):
+ def process_cond(self, batch_size, device, area, **kwargs):
+ data = self.cond
+ if area is not None:
+ dims = len(area) // 2
+ for i in range(dims):
+ data = data.narrow(i + 2, area[i + dims], area[i])
+
+ return self._copy_with(comfy.utils.repeat_to_batch_size(data, batch_size).to(device))
+
+
+class CONDCrossAttn(CONDRegular):
+ def can_concat(self, other):
+ s1 = self.cond.shape
+ s2 = other.cond.shape
+ if s1 != s2:
+ if s1[0] != s2[0] or s1[2] != s2[2]: #these 2 cases should not happen
+ return False
+
+ mult_min = math.lcm(s1[1], s2[1])
+ diff = mult_min // min(s1[1], s2[1])
+ if diff > 4: #arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much
+ return False
+ return True
+
+ def concat(self, others):
+ conds = [self.cond]
+ crossattn_max_len = self.cond.shape[1]
+ for x in others:
+ c = x.cond
+ crossattn_max_len = math.lcm(crossattn_max_len, c.shape[1])
+ conds.append(c)
+
+ out = []
+ for c in conds:
+ if c.shape[1] < crossattn_max_len:
+ c = c.repeat(1, crossattn_max_len // c.shape[1], 1) #padding with repeat doesn't change result
+ out.append(c)
+ return torch.cat(out)
+
+
+class CONDConstant(CONDRegular):
+ def __init__(self, cond):
+ self.cond = cond
+
+ def process_cond(self, batch_size, device, **kwargs):
+ return self._copy_with(self.cond)
+
+ def can_concat(self, other):
+ if self.cond != other.cond:
+ return False
+ return True
+
+ def concat(self, others):
+ return self.cond
+
+ def size(self):
+ return [1]
+
+
+class CONDList(CONDRegular):
+ def __init__(self, cond):
+ self.cond = cond
+
+ def process_cond(self, batch_size, device, **kwargs):
+ out = []
+ for c in self.cond:
+ out.append(comfy.utils.repeat_to_batch_size(c, batch_size).to(device))
+
+ return self._copy_with(out)
+
+ def can_concat(self, other):
+ if len(self.cond) != len(other.cond):
+ return False
+ for i in range(len(self.cond)):
+ if self.cond[i].shape != other.cond[i].shape:
+ return False
+
+ return True
+
+ def concat(self, others):
+ out = []
+ for i in range(len(self.cond)):
+ o = [self.cond[i]]
+ for x in others:
+ o.append(x.cond[i])
+ out.append(torch.cat(o))
+
+ return out
+
+ def size(self): # hackish implementation to make the mem estimation work
+ o = 0
+ c = 1
+ for c in self.cond:
+ size = c.size()
+ o += math.prod(size)
+ if len(size) > 1:
+ c = size[1]
+
+ return [1, c, o // c]
diff --git a/ComfyUI/comfy/controlnet.py b/ComfyUI/comfy/controlnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a47b86f2f5d4f9275a754f24b04b64cabd60f15
--- /dev/null
+++ b/ComfyUI/comfy/controlnet.py
@@ -0,0 +1,858 @@
+"""
+ This file is part of ComfyUI.
+ Copyright (C) 2024 Comfy
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+"""
+
+
+import torch
+from enum import Enum
+import math
+import os
+import logging
+import comfy.utils
+import comfy.model_management
+import comfy.model_detection
+import comfy.model_patcher
+import comfy.ops
+import comfy.latent_formats
+
+import comfy.cldm.cldm
+import comfy.t2i_adapter.adapter
+import comfy.ldm.cascade.controlnet
+import comfy.cldm.mmdit
+import comfy.ldm.hydit.controlnet
+import comfy.ldm.flux.controlnet
+import comfy.cldm.dit_embedder
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+ from comfy.hooks import HookGroup
+
+
+def broadcast_image_to(tensor, target_batch_size, batched_number):
+ current_batch_size = tensor.shape[0]
+ #print(current_batch_size, target_batch_size)
+ if current_batch_size == 1:
+ return tensor
+
+ per_batch = target_batch_size // batched_number
+ tensor = tensor[:per_batch]
+
+ if per_batch > tensor.shape[0]:
+ tensor = torch.cat([tensor] * (per_batch // tensor.shape[0]) + [tensor[:(per_batch % tensor.shape[0])]], dim=0)
+
+ current_batch_size = tensor.shape[0]
+ if current_batch_size == target_batch_size:
+ return tensor
+ else:
+ return torch.cat([tensor] * batched_number, dim=0)
+
+class StrengthType(Enum):
+ CONSTANT = 1
+ LINEAR_UP = 2
+
+class ControlBase:
+ def __init__(self):
+ self.cond_hint_original = None
+ self.cond_hint = None
+ self.strength = 1.0
+ self.timestep_percent_range = (0.0, 1.0)
+ self.latent_format = None
+ self.vae = None
+ self.global_average_pooling = False
+ self.timestep_range = None
+ self.compression_ratio = 8
+ self.upscale_algorithm = 'nearest-exact'
+ self.extra_args = {}
+ self.previous_controlnet = None
+ self.extra_conds = []
+ self.strength_type = StrengthType.CONSTANT
+ self.concat_mask = False
+ self.extra_concat_orig = []
+ self.extra_concat = None
+ self.extra_hooks: HookGroup = None
+ self.preprocess_image = lambda a: a
+
+ def set_cond_hint(self, cond_hint, strength=1.0, timestep_percent_range=(0.0, 1.0), vae=None, extra_concat=[]):
+ self.cond_hint_original = cond_hint
+ self.strength = strength
+ self.timestep_percent_range = timestep_percent_range
+ if self.latent_format is not None:
+ if vae is None:
+ logging.warning("WARNING: no VAE provided to the controlnet apply node when this controlnet requires one.")
+ self.vae = vae
+ self.extra_concat_orig = extra_concat.copy()
+ if self.concat_mask and len(self.extra_concat_orig) == 0:
+ self.extra_concat_orig.append(torch.tensor([[[[1.0]]]]))
+ return self
+
+ def pre_run(self, model, percent_to_timestep_function):
+ self.timestep_range = (percent_to_timestep_function(self.timestep_percent_range[0]), percent_to_timestep_function(self.timestep_percent_range[1]))
+ if self.previous_controlnet is not None:
+ self.previous_controlnet.pre_run(model, percent_to_timestep_function)
+
+ def set_previous_controlnet(self, controlnet):
+ self.previous_controlnet = controlnet
+ return self
+
+ def cleanup(self):
+ if self.previous_controlnet is not None:
+ self.previous_controlnet.cleanup()
+
+ self.cond_hint = None
+ self.extra_concat = None
+ self.timestep_range = None
+
+ def get_models(self):
+ out = []
+ if self.previous_controlnet is not None:
+ out += self.previous_controlnet.get_models()
+ return out
+
+ def get_extra_hooks(self):
+ out = []
+ if self.extra_hooks is not None:
+ out.append(self.extra_hooks)
+ if self.previous_controlnet is not None:
+ out += self.previous_controlnet.get_extra_hooks()
+ return out
+
+ def copy_to(self, c):
+ c.cond_hint_original = self.cond_hint_original
+ c.strength = self.strength
+ c.timestep_percent_range = self.timestep_percent_range
+ c.global_average_pooling = self.global_average_pooling
+ c.compression_ratio = self.compression_ratio
+ c.upscale_algorithm = self.upscale_algorithm
+ c.latent_format = self.latent_format
+ c.extra_args = self.extra_args.copy()
+ c.vae = self.vae
+ c.extra_conds = self.extra_conds.copy()
+ c.strength_type = self.strength_type
+ c.concat_mask = self.concat_mask
+ c.extra_concat_orig = self.extra_concat_orig.copy()
+ c.extra_hooks = self.extra_hooks.clone() if self.extra_hooks else None
+ c.preprocess_image = self.preprocess_image
+
+ def inference_memory_requirements(self, dtype):
+ if self.previous_controlnet is not None:
+ return self.previous_controlnet.inference_memory_requirements(dtype)
+ return 0
+
+ def control_merge(self, control, control_prev, output_dtype):
+ out = {'input':[], 'middle':[], 'output': []}
+
+ for key in control:
+ control_output = control[key]
+ applied_to = set()
+ for i in range(len(control_output)):
+ x = control_output[i]
+ if x is not None:
+ if self.global_average_pooling:
+ x = torch.mean(x, dim=(2, 3), keepdim=True).repeat(1, 1, x.shape[2], x.shape[3])
+
+ if x not in applied_to: #memory saving strategy, allow shared tensors and only apply strength to shared tensors once
+ applied_to.add(x)
+ if self.strength_type == StrengthType.CONSTANT:
+ x *= self.strength
+ elif self.strength_type == StrengthType.LINEAR_UP:
+ x *= (self.strength ** float(len(control_output) - i))
+
+ if output_dtype is not None and x.dtype != output_dtype:
+ x = x.to(output_dtype)
+
+ out[key].append(x)
+
+ if control_prev is not None:
+ for x in ['input', 'middle', 'output']:
+ o = out[x]
+ for i in range(len(control_prev[x])):
+ prev_val = control_prev[x][i]
+ if i >= len(o):
+ o.append(prev_val)
+ elif prev_val is not None:
+ if o[i] is None:
+ o[i] = prev_val
+ else:
+ if o[i].shape[0] < prev_val.shape[0]:
+ o[i] = prev_val + o[i]
+ else:
+ o[i] = prev_val + o[i] #TODO: change back to inplace add if shared tensors stop being an issue
+ return out
+
+ def set_extra_arg(self, argument, value=None):
+ self.extra_args[argument] = value
+
+
+class ControlNet(ControlBase):
+ def __init__(self, control_model=None, global_average_pooling=False, compression_ratio=8, latent_format=None, load_device=None, manual_cast_dtype=None, extra_conds=["y"], strength_type=StrengthType.CONSTANT, concat_mask=False, preprocess_image=lambda a: a):
+ super().__init__()
+ self.control_model = control_model
+ self.load_device = load_device
+ if control_model is not None:
+ self.control_model_wrapped = comfy.model_patcher.ModelPatcher(self.control_model, load_device=load_device, offload_device=comfy.model_management.unet_offload_device())
+
+ self.compression_ratio = compression_ratio
+ self.global_average_pooling = global_average_pooling
+ self.model_sampling_current = None
+ self.manual_cast_dtype = manual_cast_dtype
+ self.latent_format = latent_format
+ self.extra_conds += extra_conds
+ self.strength_type = strength_type
+ self.concat_mask = concat_mask
+ self.preprocess_image = preprocess_image
+
+ def get_control(self, x_noisy, t, cond, batched_number, transformer_options):
+ control_prev = None
+ if self.previous_controlnet is not None:
+ control_prev = self.previous_controlnet.get_control(x_noisy, t, cond, batched_number, transformer_options)
+
+ if self.timestep_range is not None:
+ if t[0] > self.timestep_range[0] or t[0] < self.timestep_range[1]:
+ if control_prev is not None:
+ return control_prev
+ else:
+ return None
+
+ dtype = self.control_model.dtype
+ if self.manual_cast_dtype is not None:
+ dtype = self.manual_cast_dtype
+
+ if self.cond_hint is None or x_noisy.shape[2] * self.compression_ratio != self.cond_hint.shape[2] or x_noisy.shape[3] * self.compression_ratio != self.cond_hint.shape[3]:
+ if self.cond_hint is not None:
+ del self.cond_hint
+ self.cond_hint = None
+ compression_ratio = self.compression_ratio
+ if self.vae is not None:
+ compression_ratio *= self.vae.downscale_ratio
+ else:
+ if self.latent_format is not None:
+ raise ValueError("This Controlnet needs a VAE but none was provided, please use a ControlNetApply node with a VAE input and connect it.")
+ self.cond_hint = comfy.utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * compression_ratio, x_noisy.shape[2] * compression_ratio, self.upscale_algorithm, "center")
+ self.cond_hint = self.preprocess_image(self.cond_hint)
+ if self.vae is not None:
+ loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
+ self.cond_hint = self.vae.encode(self.cond_hint.movedim(1, -1))
+ comfy.model_management.load_models_gpu(loaded_models)
+ if self.latent_format is not None:
+ self.cond_hint = self.latent_format.process_in(self.cond_hint)
+ if len(self.extra_concat_orig) > 0:
+ to_concat = []
+ for c in self.extra_concat_orig:
+ c = c.to(self.cond_hint.device)
+ c = comfy.utils.common_upscale(c, self.cond_hint.shape[3], self.cond_hint.shape[2], self.upscale_algorithm, "center")
+ to_concat.append(comfy.utils.repeat_to_batch_size(c, self.cond_hint.shape[0]))
+ self.cond_hint = torch.cat([self.cond_hint] + to_concat, dim=1)
+
+ self.cond_hint = self.cond_hint.to(device=x_noisy.device, dtype=dtype)
+ if x_noisy.shape[0] != self.cond_hint.shape[0]:
+ self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
+
+ context = cond.get('crossattn_controlnet', cond['c_crossattn'])
+ extra = self.extra_args.copy()
+ for c in self.extra_conds:
+ temp = cond.get(c, None)
+ if temp is not None:
+ extra[c] = temp.to(dtype)
+
+ timestep = self.model_sampling_current.timestep(t)
+ x_noisy = self.model_sampling_current.calculate_input(t, x_noisy)
+
+ control = self.control_model(x=x_noisy.to(dtype), hint=self.cond_hint, timesteps=timestep.to(dtype), context=context.to(dtype), **extra)
+ return self.control_merge(control, control_prev, output_dtype=None)
+
+ def copy(self):
+ c = ControlNet(None, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype)
+ c.control_model = self.control_model
+ c.control_model_wrapped = self.control_model_wrapped
+ self.copy_to(c)
+ return c
+
+ def get_models(self):
+ out = super().get_models()
+ out.append(self.control_model_wrapped)
+ return out
+
+ def pre_run(self, model, percent_to_timestep_function):
+ super().pre_run(model, percent_to_timestep_function)
+ self.model_sampling_current = model.model_sampling
+
+ def cleanup(self):
+ self.model_sampling_current = None
+ super().cleanup()
+
+class ControlLoraOps:
+ class Linear(torch.nn.Module, comfy.ops.CastWeightBiasOp):
+ def __init__(self, in_features: int, out_features: int, bias: bool = True,
+ device=None, dtype=None) -> None:
+ super().__init__()
+ self.in_features = in_features
+ self.out_features = out_features
+ self.weight = None
+ self.up = None
+ self.down = None
+ self.bias = None
+
+ def forward(self, input):
+ weight, bias = comfy.ops.cast_bias_weight(self, input)
+ if self.up is not None:
+ return torch.nn.functional.linear(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias)
+ else:
+ return torch.nn.functional.linear(input, weight, bias)
+
+ class Conv2d(torch.nn.Module, comfy.ops.CastWeightBiasOp):
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ bias=True,
+ padding_mode='zeros',
+ device=None,
+ dtype=None
+ ):
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.kernel_size = kernel_size
+ self.stride = stride
+ self.padding = padding
+ self.dilation = dilation
+ self.transposed = False
+ self.output_padding = 0
+ self.groups = groups
+ self.padding_mode = padding_mode
+
+ self.weight = None
+ self.bias = None
+ self.up = None
+ self.down = None
+
+
+ def forward(self, input):
+ weight, bias = comfy.ops.cast_bias_weight(self, input)
+ if self.up is not None:
+ return torch.nn.functional.conv2d(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias, self.stride, self.padding, self.dilation, self.groups)
+ else:
+ return torch.nn.functional.conv2d(input, weight, bias, self.stride, self.padding, self.dilation, self.groups)
+
+
+class ControlLora(ControlNet):
+ def __init__(self, control_weights, global_average_pooling=False, model_options={}): #TODO? model_options
+ ControlBase.__init__(self)
+ self.control_weights = control_weights
+ self.global_average_pooling = global_average_pooling
+ self.extra_conds += ["y"]
+
+ def pre_run(self, model, percent_to_timestep_function):
+ super().pre_run(model, percent_to_timestep_function)
+ controlnet_config = model.model_config.unet_config.copy()
+ controlnet_config.pop("out_channels")
+ controlnet_config["hint_channels"] = self.control_weights["input_hint_block.0.weight"].shape[1]
+ self.manual_cast_dtype = model.manual_cast_dtype
+ dtype = model.get_dtype()
+ if self.manual_cast_dtype is None:
+ class control_lora_ops(ControlLoraOps, comfy.ops.disable_weight_init):
+ pass
+ else:
+ class control_lora_ops(ControlLoraOps, comfy.ops.manual_cast):
+ pass
+ dtype = self.manual_cast_dtype
+
+ controlnet_config["operations"] = control_lora_ops
+ controlnet_config["dtype"] = dtype
+ self.control_model = comfy.cldm.cldm.ControlNet(**controlnet_config)
+ self.control_model.to(comfy.model_management.get_torch_device())
+ diffusion_model = model.diffusion_model
+ sd = diffusion_model.state_dict()
+
+ for k in sd:
+ weight = sd[k]
+ try:
+ comfy.utils.set_attr_param(self.control_model, k, weight)
+ except:
+ pass
+
+ for k in self.control_weights:
+ if (k not in {"lora_controlnet"}):
+ if (k.endswith(".up") or k.endswith(".down") or k.endswith(".weight") or k.endswith(".bias")) and ("__" not in k):
+ comfy.utils.set_attr_param(self.control_model, k, self.control_weights[k].to(dtype).to(comfy.model_management.get_torch_device()))
+
+ def copy(self):
+ c = ControlLora(self.control_weights, global_average_pooling=self.global_average_pooling)
+ self.copy_to(c)
+ return c
+
+ def cleanup(self):
+ del self.control_model
+ self.control_model = None
+ super().cleanup()
+
+ def get_models(self):
+ out = ControlBase.get_models(self)
+ return out
+
+ def inference_memory_requirements(self, dtype):
+ return comfy.utils.calculate_parameters(self.control_weights) * comfy.model_management.dtype_size(dtype) + ControlBase.inference_memory_requirements(self, dtype)
+
+def controlnet_config(sd, model_options={}):
+ model_config = comfy.model_detection.model_config_from_unet(sd, "", True)
+
+ unet_dtype = model_options.get("dtype", None)
+ if unet_dtype is None:
+ weight_dtype = comfy.utils.weight_dtype(sd)
+
+ supported_inference_dtypes = list(model_config.supported_inference_dtypes)
+ unet_dtype = comfy.model_management.unet_dtype(model_params=-1, supported_dtypes=supported_inference_dtypes, weight_dtype=weight_dtype)
+
+ load_device = comfy.model_management.get_torch_device()
+ manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
+
+ operations = model_options.get("custom_operations", None)
+ if operations is None:
+ operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype, disable_fast_fp8=True)
+
+ offload_device = comfy.model_management.unet_offload_device()
+ return model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device
+
+def controlnet_load_state_dict(control_model, sd):
+ missing, unexpected = control_model.load_state_dict(sd, strict=False)
+
+ if len(missing) > 0:
+ logging.warning("missing controlnet keys: {}".format(missing))
+
+ if len(unexpected) > 0:
+ logging.debug("unexpected controlnet keys: {}".format(unexpected))
+ return control_model
+
+
+def load_controlnet_mmdit(sd, model_options={}):
+ new_sd = comfy.model_detection.convert_diffusers_mmdit(sd, "")
+ model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(new_sd, model_options=model_options)
+ num_blocks = comfy.model_detection.count_blocks(new_sd, 'joint_blocks.{}.')
+ for k in sd:
+ new_sd[k] = sd[k]
+
+ concat_mask = False
+ control_latent_channels = new_sd.get("pos_embed_input.proj.weight").shape[1]
+ if control_latent_channels == 17: #inpaint controlnet
+ concat_mask = True
+
+ control_model = comfy.cldm.mmdit.ControlNet(num_blocks=num_blocks, control_latent_channels=control_latent_channels, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+ control_model = controlnet_load_state_dict(control_model, new_sd)
+
+ latent_format = comfy.latent_formats.SD3()
+ latent_format.shift_factor = 0 #SD3 controlnet weirdness
+ control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype)
+ return control
+
+
+class ControlNetSD35(ControlNet):
+ def pre_run(self, model, percent_to_timestep_function):
+ if self.control_model.double_y_emb:
+ missing, unexpected = self.control_model.orig_y_embedder.load_state_dict(model.diffusion_model.y_embedder.state_dict(), strict=False)
+ else:
+ missing, unexpected = self.control_model.x_embedder.load_state_dict(model.diffusion_model.x_embedder.state_dict(), strict=False)
+ super().pre_run(model, percent_to_timestep_function)
+
+ def copy(self):
+ c = ControlNetSD35(None, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype)
+ c.control_model = self.control_model
+ c.control_model_wrapped = self.control_model_wrapped
+ self.copy_to(c)
+ return c
+
+def load_controlnet_sd35(sd, model_options={}):
+ control_type = -1
+ if "control_type" in sd:
+ control_type = round(sd.pop("control_type").item())
+
+ # blur_cnet = control_type == 0
+ canny_cnet = control_type == 1
+ depth_cnet = control_type == 2
+
+ new_sd = {}
+ for k in comfy.utils.MMDIT_MAP_BASIC:
+ if k[1] in sd:
+ new_sd[k[0]] = sd.pop(k[1])
+ for k in sd:
+ new_sd[k] = sd[k]
+ sd = new_sd
+
+ y_emb_shape = sd["y_embedder.mlp.0.weight"].shape
+ depth = y_emb_shape[0] // 64
+ hidden_size = 64 * depth
+ num_heads = depth
+ head_dim = hidden_size // num_heads
+ num_blocks = comfy.model_detection.count_blocks(new_sd, 'transformer_blocks.{}.')
+
+ load_device = comfy.model_management.get_torch_device()
+ offload_device = comfy.model_management.unet_offload_device()
+ unet_dtype = comfy.model_management.unet_dtype(model_params=-1)
+
+ manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
+
+ operations = model_options.get("custom_operations", None)
+ if operations is None:
+ operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype, disable_fast_fp8=True)
+
+ control_model = comfy.cldm.dit_embedder.ControlNetEmbedder(img_size=None,
+ patch_size=2,
+ in_chans=16,
+ num_layers=num_blocks,
+ main_model_double=depth,
+ double_y_emb=y_emb_shape[0] == y_emb_shape[1],
+ attention_head_dim=head_dim,
+ num_attention_heads=num_heads,
+ adm_in_channels=2048,
+ device=offload_device,
+ dtype=unet_dtype,
+ operations=operations)
+
+ control_model = controlnet_load_state_dict(control_model, sd)
+
+ latent_format = comfy.latent_formats.SD3()
+ preprocess_image = lambda a: a
+ if canny_cnet:
+ preprocess_image = lambda a: (a * 255 * 0.5 + 0.5)
+ elif depth_cnet:
+ preprocess_image = lambda a: 1.0 - a
+
+ control = ControlNetSD35(control_model, compression_ratio=1, latent_format=latent_format, load_device=load_device, manual_cast_dtype=manual_cast_dtype, preprocess_image=preprocess_image)
+ return control
+
+
+
+def load_controlnet_hunyuandit(controlnet_data, model_options={}):
+ model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(controlnet_data, model_options=model_options)
+
+ control_model = comfy.ldm.hydit.controlnet.HunYuanControlNet(operations=operations, device=offload_device, dtype=unet_dtype)
+ control_model = controlnet_load_state_dict(control_model, controlnet_data)
+
+ latent_format = comfy.latent_formats.SDXL()
+ extra_conds = ['text_embedding_mask', 'encoder_hidden_states_t5', 'text_embedding_mask_t5', 'image_meta_size', 'style', 'cos_cis_img', 'sin_cis_img']
+ control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds, strength_type=StrengthType.CONSTANT)
+ return control
+
+def load_controlnet_flux_xlabs_mistoline(sd, mistoline=False, model_options={}):
+ model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd, model_options=model_options)
+ control_model = comfy.ldm.flux.controlnet.ControlNetFlux(mistoline=mistoline, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+ control_model = controlnet_load_state_dict(control_model, sd)
+ extra_conds = ['y', 'guidance']
+ control = ControlNet(control_model, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
+ return control
+
+def load_controlnet_flux_instantx(sd, model_options={}):
+ new_sd = comfy.model_detection.convert_diffusers_mmdit(sd, "")
+ model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(new_sd, model_options=model_options)
+ for k in sd:
+ new_sd[k] = sd[k]
+
+ num_union_modes = 0
+ union_cnet = "controlnet_mode_embedder.weight"
+ if union_cnet in new_sd:
+ num_union_modes = new_sd[union_cnet].shape[0]
+
+ control_latent_channels = new_sd.get("pos_embed_input.weight").shape[1] // 4
+ concat_mask = False
+ if control_latent_channels == 17:
+ concat_mask = True
+
+ control_model = comfy.ldm.flux.controlnet.ControlNetFlux(latent_input=True, num_union_modes=num_union_modes, control_latent_channels=control_latent_channels, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+ control_model = controlnet_load_state_dict(control_model, new_sd)
+
+ latent_format = comfy.latent_formats.Flux()
+ extra_conds = ['y', 'guidance']
+ control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
+ return control
+
+def convert_mistoline(sd):
+ return comfy.utils.state_dict_prefix_replace(sd, {"single_controlnet_blocks.": "controlnet_single_blocks."})
+
+
+def load_controlnet_state_dict(state_dict, model=None, model_options={}):
+ controlnet_data = state_dict
+ if 'after_proj_list.18.bias' in controlnet_data.keys(): #Hunyuan DiT
+ return load_controlnet_hunyuandit(controlnet_data, model_options=model_options)
+
+ if "lora_controlnet" in controlnet_data:
+ return ControlLora(controlnet_data, model_options=model_options)
+
+ controlnet_config = None
+ supported_inference_dtypes = None
+
+ if "controlnet_cond_embedding.conv_in.weight" in controlnet_data: #diffusers format
+ controlnet_config = comfy.model_detection.unet_config_from_diffusers_unet(controlnet_data)
+ diffusers_keys = comfy.utils.unet_to_diffusers(controlnet_config)
+ diffusers_keys["controlnet_mid_block.weight"] = "middle_block_out.0.weight"
+ diffusers_keys["controlnet_mid_block.bias"] = "middle_block_out.0.bias"
+
+ count = 0
+ loop = True
+ while loop:
+ suffix = [".weight", ".bias"]
+ for s in suffix:
+ k_in = "controlnet_down_blocks.{}{}".format(count, s)
+ k_out = "zero_convs.{}.0{}".format(count, s)
+ if k_in not in controlnet_data:
+ loop = False
+ break
+ diffusers_keys[k_in] = k_out
+ count += 1
+
+ count = 0
+ loop = True
+ while loop:
+ suffix = [".weight", ".bias"]
+ for s in suffix:
+ if count == 0:
+ k_in = "controlnet_cond_embedding.conv_in{}".format(s)
+ else:
+ k_in = "controlnet_cond_embedding.blocks.{}{}".format(count - 1, s)
+ k_out = "input_hint_block.{}{}".format(count * 2, s)
+ if k_in not in controlnet_data:
+ k_in = "controlnet_cond_embedding.conv_out{}".format(s)
+ loop = False
+ diffusers_keys[k_in] = k_out
+ count += 1
+
+ new_sd = {}
+ for k in diffusers_keys:
+ if k in controlnet_data:
+ new_sd[diffusers_keys[k]] = controlnet_data.pop(k)
+
+ if "control_add_embedding.linear_1.bias" in controlnet_data: #Union Controlnet
+ controlnet_config["union_controlnet_num_control_type"] = controlnet_data["task_embedding"].shape[0]
+ for k in list(controlnet_data.keys()):
+ new_k = k.replace('.attn.in_proj_', '.attn.in_proj.')
+ new_sd[new_k] = controlnet_data.pop(k)
+
+ leftover_keys = controlnet_data.keys()
+ if len(leftover_keys) > 0:
+ logging.warning("leftover keys: {}".format(leftover_keys))
+ controlnet_data = new_sd
+ elif "controlnet_blocks.0.weight" in controlnet_data:
+ if "double_blocks.0.img_attn.norm.key_norm.scale" in controlnet_data:
+ return load_controlnet_flux_xlabs_mistoline(controlnet_data, model_options=model_options)
+ elif "pos_embed_input.proj.weight" in controlnet_data:
+ if "transformer_blocks.0.adaLN_modulation.1.bias" in controlnet_data:
+ return load_controlnet_sd35(controlnet_data, model_options=model_options) #Stability sd3.5 format
+ else:
+ return load_controlnet_mmdit(controlnet_data, model_options=model_options) #SD3 diffusers controlnet
+ elif "controlnet_x_embedder.weight" in controlnet_data:
+ return load_controlnet_flux_instantx(controlnet_data, model_options=model_options)
+ elif "controlnet_blocks.0.linear.weight" in controlnet_data: #mistoline flux
+ return load_controlnet_flux_xlabs_mistoline(convert_mistoline(controlnet_data), mistoline=True, model_options=model_options)
+
+ pth_key = 'control_model.zero_convs.0.0.weight'
+ pth = False
+ key = 'zero_convs.0.0.weight'
+ if pth_key in controlnet_data:
+ pth = True
+ key = pth_key
+ prefix = "control_model."
+ elif key in controlnet_data:
+ prefix = ""
+ else:
+ net = load_t2i_adapter(controlnet_data, model_options=model_options)
+ if net is None:
+ logging.error("error could not detect control model type.")
+ return net
+
+ if controlnet_config is None:
+ model_config = comfy.model_detection.model_config_from_unet(controlnet_data, prefix, True)
+ supported_inference_dtypes = list(model_config.supported_inference_dtypes)
+ controlnet_config = model_config.unet_config
+
+ unet_dtype = model_options.get("dtype", None)
+ if unet_dtype is None:
+ weight_dtype = comfy.utils.weight_dtype(controlnet_data)
+
+ if supported_inference_dtypes is None:
+ supported_inference_dtypes = [comfy.model_management.unet_dtype()]
+
+ unet_dtype = comfy.model_management.unet_dtype(model_params=-1, supported_dtypes=supported_inference_dtypes, weight_dtype=weight_dtype)
+
+ load_device = comfy.model_management.get_torch_device()
+
+ manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
+ operations = model_options.get("custom_operations", None)
+ if operations is None:
+ operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype)
+
+ controlnet_config["operations"] = operations
+ controlnet_config["dtype"] = unet_dtype
+ controlnet_config["device"] = comfy.model_management.unet_offload_device()
+ controlnet_config.pop("out_channels")
+ controlnet_config["hint_channels"] = controlnet_data["{}input_hint_block.0.weight".format(prefix)].shape[1]
+ control_model = comfy.cldm.cldm.ControlNet(**controlnet_config)
+
+ if pth:
+ if 'difference' in controlnet_data:
+ if model is not None:
+ comfy.model_management.load_models_gpu([model])
+ model_sd = model.model_state_dict()
+ for x in controlnet_data:
+ c_m = "control_model."
+ if x.startswith(c_m):
+ sd_key = "diffusion_model.{}".format(x[len(c_m):])
+ if sd_key in model_sd:
+ cd = controlnet_data[x]
+ cd += model_sd[sd_key].type(cd.dtype).to(cd.device)
+ else:
+ logging.warning("WARNING: Loaded a diff controlnet without a model. It will very likely not work.")
+
+ class WeightsLoader(torch.nn.Module):
+ pass
+ w = WeightsLoader()
+ w.control_model = control_model
+ missing, unexpected = w.load_state_dict(controlnet_data, strict=False)
+ else:
+ missing, unexpected = control_model.load_state_dict(controlnet_data, strict=False)
+
+ if len(missing) > 0:
+ logging.warning("missing controlnet keys: {}".format(missing))
+
+ if len(unexpected) > 0:
+ logging.debug("unexpected controlnet keys: {}".format(unexpected))
+
+ global_average_pooling = model_options.get("global_average_pooling", False)
+ control = ControlNet(control_model, global_average_pooling=global_average_pooling, load_device=load_device, manual_cast_dtype=manual_cast_dtype)
+ return control
+
+def load_controlnet(ckpt_path, model=None, model_options={}):
+ model_options = model_options.copy()
+ if "global_average_pooling" not in model_options:
+ filename = os.path.splitext(ckpt_path)[0]
+ if filename.endswith("_shuffle") or filename.endswith("_shuffle_fp16"): #TODO: smarter way of enabling global_average_pooling
+ model_options["global_average_pooling"] = True
+
+ cnet = load_controlnet_state_dict(comfy.utils.load_torch_file(ckpt_path, safe_load=True), model=model, model_options=model_options)
+ if cnet is None:
+ logging.error("error checkpoint does not contain controlnet or t2i adapter data {}".format(ckpt_path))
+ return cnet
+
+class T2IAdapter(ControlBase):
+ def __init__(self, t2i_model, channels_in, compression_ratio, upscale_algorithm, device=None):
+ super().__init__()
+ self.t2i_model = t2i_model
+ self.channels_in = channels_in
+ self.control_input = None
+ self.compression_ratio = compression_ratio
+ self.upscale_algorithm = upscale_algorithm
+ if device is None:
+ device = comfy.model_management.get_torch_device()
+ self.device = device
+
+ def scale_image_to(self, width, height):
+ unshuffle_amount = self.t2i_model.unshuffle_amount
+ width = math.ceil(width / unshuffle_amount) * unshuffle_amount
+ height = math.ceil(height / unshuffle_amount) * unshuffle_amount
+ return width, height
+
+ def get_control(self, x_noisy, t, cond, batched_number, transformer_options):
+ control_prev = None
+ if self.previous_controlnet is not None:
+ control_prev = self.previous_controlnet.get_control(x_noisy, t, cond, batched_number, transformer_options)
+
+ if self.timestep_range is not None:
+ if t[0] > self.timestep_range[0] or t[0] < self.timestep_range[1]:
+ if control_prev is not None:
+ return control_prev
+ else:
+ return None
+
+ if self.cond_hint is None or x_noisy.shape[2] * self.compression_ratio != self.cond_hint.shape[2] or x_noisy.shape[3] * self.compression_ratio != self.cond_hint.shape[3]:
+ if self.cond_hint is not None:
+ del self.cond_hint
+ self.control_input = None
+ self.cond_hint = None
+ width, height = self.scale_image_to(x_noisy.shape[3] * self.compression_ratio, x_noisy.shape[2] * self.compression_ratio)
+ self.cond_hint = comfy.utils.common_upscale(self.cond_hint_original, width, height, self.upscale_algorithm, "center").float().to(self.device)
+ if self.channels_in == 1 and self.cond_hint.shape[1] > 1:
+ self.cond_hint = torch.mean(self.cond_hint, 1, keepdim=True)
+ if x_noisy.shape[0] != self.cond_hint.shape[0]:
+ self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
+ if self.control_input is None:
+ self.t2i_model.to(x_noisy.dtype)
+ self.t2i_model.to(self.device)
+ self.control_input = self.t2i_model(self.cond_hint.to(x_noisy.dtype))
+ self.t2i_model.cpu()
+
+ control_input = {}
+ for k in self.control_input:
+ control_input[k] = list(map(lambda a: None if a is None else a.clone(), self.control_input[k]))
+
+ return self.control_merge(control_input, control_prev, x_noisy.dtype)
+
+ def copy(self):
+ c = T2IAdapter(self.t2i_model, self.channels_in, self.compression_ratio, self.upscale_algorithm)
+ self.copy_to(c)
+ return c
+
+def load_t2i_adapter(t2i_data, model_options={}): #TODO: model_options
+ compression_ratio = 8
+ upscale_algorithm = 'nearest-exact'
+
+ if 'adapter' in t2i_data:
+ t2i_data = t2i_data['adapter']
+ if 'adapter.body.0.resnets.0.block1.weight' in t2i_data: #diffusers format
+ prefix_replace = {}
+ for i in range(4):
+ for j in range(2):
+ prefix_replace["adapter.body.{}.resnets.{}.".format(i, j)] = "body.{}.".format(i * 2 + j)
+ prefix_replace["adapter.body.{}.".format(i, )] = "body.{}.".format(i * 2)
+ prefix_replace["adapter."] = ""
+ t2i_data = comfy.utils.state_dict_prefix_replace(t2i_data, prefix_replace)
+ keys = t2i_data.keys()
+
+ if "body.0.in_conv.weight" in keys:
+ cin = t2i_data['body.0.in_conv.weight'].shape[1]
+ model_ad = comfy.t2i_adapter.adapter.Adapter_light(cin=cin, channels=[320, 640, 1280, 1280], nums_rb=4)
+ elif 'conv_in.weight' in keys:
+ cin = t2i_data['conv_in.weight'].shape[1]
+ channel = t2i_data['conv_in.weight'].shape[0]
+ ksize = t2i_data['body.0.block2.weight'].shape[2]
+ use_conv = False
+ down_opts = list(filter(lambda a: a.endswith("down_opt.op.weight"), keys))
+ if len(down_opts) > 0:
+ use_conv = True
+ xl = False
+ if cin == 256 or cin == 768:
+ xl = True
+ model_ad = comfy.t2i_adapter.adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl)
+ elif "backbone.0.0.weight" in keys:
+ model_ad = comfy.ldm.cascade.controlnet.ControlNet(c_in=t2i_data['backbone.0.0.weight'].shape[1], proj_blocks=[0, 4, 8, 12, 51, 55, 59, 63])
+ compression_ratio = 32
+ upscale_algorithm = 'bilinear'
+ elif "backbone.10.blocks.0.weight" in keys:
+ model_ad = comfy.ldm.cascade.controlnet.ControlNet(c_in=t2i_data['backbone.0.weight'].shape[1], bottleneck_mode="large", proj_blocks=[0, 4, 8, 12, 51, 55, 59, 63])
+ compression_ratio = 1
+ upscale_algorithm = 'nearest-exact'
+ else:
+ return None
+
+ missing, unexpected = model_ad.load_state_dict(t2i_data)
+ if len(missing) > 0:
+ logging.warning("t2i missing {}".format(missing))
+
+ if len(unexpected) > 0:
+ logging.debug("t2i unexpected {}".format(unexpected))
+
+ return T2IAdapter(model_ad, model_ad.input_channels, compression_ratio, upscale_algorithm)
diff --git a/ComfyUI/comfy/diffusers_convert.py b/ComfyUI/comfy/diffusers_convert.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb9495348704c4f58b537f524ff6194d572d9fad
--- /dev/null
+++ b/ComfyUI/comfy/diffusers_convert.py
@@ -0,0 +1,189 @@
+import re
+import torch
+import logging
+
+# conversion code from https://github.com/huggingface/diffusers/blob/main/scripts/convert_diffusers_to_original_stable_diffusion.py
+
+# ================#
+# VAE Conversion #
+# ================#
+
+vae_conversion_map = [
+ # (stable-diffusion, HF Diffusers)
+ ("nin_shortcut", "conv_shortcut"),
+ ("norm_out", "conv_norm_out"),
+ ("mid.attn_1.", "mid_block.attentions.0."),
+]
+
+for i in range(4):
+ # down_blocks have two resnets
+ for j in range(2):
+ hf_down_prefix = f"encoder.down_blocks.{i}.resnets.{j}."
+ sd_down_prefix = f"encoder.down.{i}.block.{j}."
+ vae_conversion_map.append((sd_down_prefix, hf_down_prefix))
+
+ if i < 3:
+ hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0."
+ sd_downsample_prefix = f"down.{i}.downsample."
+ vae_conversion_map.append((sd_downsample_prefix, hf_downsample_prefix))
+
+ hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
+ sd_upsample_prefix = f"up.{3 - i}.upsample."
+ vae_conversion_map.append((sd_upsample_prefix, hf_upsample_prefix))
+
+ # up_blocks have three resnets
+ # also, up blocks in hf are numbered in reverse from sd
+ for j in range(3):
+ hf_up_prefix = f"decoder.up_blocks.{i}.resnets.{j}."
+ sd_up_prefix = f"decoder.up.{3 - i}.block.{j}."
+ vae_conversion_map.append((sd_up_prefix, hf_up_prefix))
+
+# this part accounts for mid blocks in both the encoder and the decoder
+for i in range(2):
+ hf_mid_res_prefix = f"mid_block.resnets.{i}."
+ sd_mid_res_prefix = f"mid.block_{i + 1}."
+ vae_conversion_map.append((sd_mid_res_prefix, hf_mid_res_prefix))
+
+vae_conversion_map_attn = [
+ # (stable-diffusion, HF Diffusers)
+ ("norm.", "group_norm."),
+ ("q.", "query."),
+ ("k.", "key."),
+ ("v.", "value."),
+ ("q.", "to_q."),
+ ("k.", "to_k."),
+ ("v.", "to_v."),
+ ("proj_out.", "to_out.0."),
+ ("proj_out.", "proj_attn."),
+]
+
+
+def reshape_weight_for_sd(w, conv3d=False):
+ # convert HF linear weights to SD conv2d weights
+ if conv3d:
+ return w.reshape(*w.shape, 1, 1, 1)
+ else:
+ return w.reshape(*w.shape, 1, 1)
+
+
+def convert_vae_state_dict(vae_state_dict):
+ mapping = {k: k for k in vae_state_dict.keys()}
+ conv3d = False
+ for k, v in mapping.items():
+ for sd_part, hf_part in vae_conversion_map:
+ v = v.replace(hf_part, sd_part)
+ if v.endswith(".conv.weight"):
+ if not conv3d and vae_state_dict[k].ndim == 5:
+ conv3d = True
+ mapping[k] = v
+ for k, v in mapping.items():
+ if "attentions" in k:
+ for sd_part, hf_part in vae_conversion_map_attn:
+ v = v.replace(hf_part, sd_part)
+ mapping[k] = v
+ new_state_dict = {v: vae_state_dict[k] for k, v in mapping.items()}
+ weights_to_convert = ["q", "k", "v", "proj_out"]
+ for k, v in new_state_dict.items():
+ for weight_name in weights_to_convert:
+ if f"mid.attn_1.{weight_name}.weight" in k:
+ logging.debug(f"Reshaping {k} for SD format")
+ new_state_dict[k] = reshape_weight_for_sd(v, conv3d=conv3d)
+ return new_state_dict
+
+
+# =========================#
+# Text Encoder Conversion #
+# =========================#
+
+
+textenc_conversion_lst = [
+ # (stable-diffusion, HF Diffusers)
+ ("resblocks.", "text_model.encoder.layers."),
+ ("ln_1", "layer_norm1"),
+ ("ln_2", "layer_norm2"),
+ (".c_fc.", ".fc1."),
+ (".c_proj.", ".fc2."),
+ (".attn", ".self_attn"),
+ ("ln_final.", "transformer.text_model.final_layer_norm."),
+ ("token_embedding.weight", "transformer.text_model.embeddings.token_embedding.weight"),
+ ("positional_embedding", "transformer.text_model.embeddings.position_embedding.weight"),
+]
+protected = {re.escape(x[1]): x[0] for x in textenc_conversion_lst}
+textenc_pattern = re.compile("|".join(protected.keys()))
+
+# Ordering is from https://github.com/pytorch/pytorch/blob/master/test/cpp/api/modules.cpp
+code2idx = {"q": 0, "k": 1, "v": 2}
+
+
+# This function exists because at the time of writing torch.cat can't do fp8 with cuda
+def cat_tensors(tensors):
+ x = 0
+ for t in tensors:
+ x += t.shape[0]
+
+ shape = [x] + list(tensors[0].shape)[1:]
+ out = torch.empty(shape, device=tensors[0].device, dtype=tensors[0].dtype)
+
+ x = 0
+ for t in tensors:
+ out[x:x + t.shape[0]] = t
+ x += t.shape[0]
+
+ return out
+
+
+def convert_text_enc_state_dict_v20(text_enc_dict, prefix=""):
+ new_state_dict = {}
+ capture_qkv_weight = {}
+ capture_qkv_bias = {}
+ for k, v in text_enc_dict.items():
+ if not k.startswith(prefix):
+ continue
+ if (
+ k.endswith(".self_attn.q_proj.weight")
+ or k.endswith(".self_attn.k_proj.weight")
+ or k.endswith(".self_attn.v_proj.weight")
+ ):
+ k_pre = k[: -len(".q_proj.weight")]
+ k_code = k[-len("q_proj.weight")]
+ if k_pre not in capture_qkv_weight:
+ capture_qkv_weight[k_pre] = [None, None, None]
+ capture_qkv_weight[k_pre][code2idx[k_code]] = v
+ continue
+
+ if (
+ k.endswith(".self_attn.q_proj.bias")
+ or k.endswith(".self_attn.k_proj.bias")
+ or k.endswith(".self_attn.v_proj.bias")
+ ):
+ k_pre = k[: -len(".q_proj.bias")]
+ k_code = k[-len("q_proj.bias")]
+ if k_pre not in capture_qkv_bias:
+ capture_qkv_bias[k_pre] = [None, None, None]
+ capture_qkv_bias[k_pre][code2idx[k_code]] = v
+ continue
+
+ text_proj = "transformer.text_projection.weight"
+ if k.endswith(text_proj):
+ new_state_dict[k.replace(text_proj, "text_projection")] = v.transpose(0, 1).contiguous()
+ else:
+ relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k)
+ new_state_dict[relabelled_key] = v
+
+ for k_pre, tensors in capture_qkv_weight.items():
+ if None in tensors:
+ raise Exception("CORRUPTED MODEL: one of the q-k-v values for the text encoder was missing")
+ relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k_pre)
+ new_state_dict[relabelled_key + ".in_proj_weight"] = cat_tensors(tensors)
+
+ for k_pre, tensors in capture_qkv_bias.items():
+ if None in tensors:
+ raise Exception("CORRUPTED MODEL: one of the q-k-v values for the text encoder was missing")
+ relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k_pre)
+ new_state_dict[relabelled_key + ".in_proj_bias"] = cat_tensors(tensors)
+
+ return new_state_dict
+
+
+def convert_text_enc_state_dict(text_enc_dict):
+ return text_enc_dict
diff --git a/ComfyUI/comfy/diffusers_load.py b/ComfyUI/comfy/diffusers_load.py
new file mode 100644
index 0000000000000000000000000000000000000000..56e63a7565f083eb4e3bc484a3a9f90103306a2f
--- /dev/null
+++ b/ComfyUI/comfy/diffusers_load.py
@@ -0,0 +1,36 @@
+import os
+
+import comfy.sd
+
+def first_file(path, filenames):
+ for f in filenames:
+ p = os.path.join(path, f)
+ if os.path.exists(p):
+ return p
+ return None
+
+def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None):
+ diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"]
+ unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names)
+ vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names)
+
+ text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"]
+ text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names)
+ text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names)
+
+ text_encoder_paths = [text_encoder1_path]
+ if text_encoder2_path is not None:
+ text_encoder_paths.append(text_encoder2_path)
+
+ unet = comfy.sd.load_diffusion_model(unet_path)
+
+ clip = None
+ if output_clip:
+ clip = comfy.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory)
+
+ vae = None
+ if output_vae:
+ sd = comfy.utils.load_torch_file(vae_path)
+ vae = comfy.sd.VAE(sd=sd)
+
+ return (unet, clip, vae)
diff --git a/ComfyUI/comfy/float.py b/ComfyUI/comfy/float.py
new file mode 100644
index 0000000000000000000000000000000000000000..521316fd2facaab90583da8487029a365aefd9e7
--- /dev/null
+++ b/ComfyUI/comfy/float.py
@@ -0,0 +1,67 @@
+import torch
+
+def calc_mantissa(abs_x, exponent, normal_mask, MANTISSA_BITS, EXPONENT_BIAS, generator=None):
+ mantissa_scaled = torch.where(
+ normal_mask,
+ (abs_x / (2.0 ** (exponent - EXPONENT_BIAS)) - 1.0) * (2**MANTISSA_BITS),
+ (abs_x / (2.0 ** (-EXPONENT_BIAS + 1 - MANTISSA_BITS)))
+ )
+
+ mantissa_scaled += torch.rand(mantissa_scaled.size(), dtype=mantissa_scaled.dtype, layout=mantissa_scaled.layout, device=mantissa_scaled.device, generator=generator)
+ return mantissa_scaled.floor() / (2**MANTISSA_BITS)
+
+#Not 100% sure about this
+def manual_stochastic_round_to_float8(x, dtype, generator=None):
+ if dtype == torch.float8_e4m3fn:
+ EXPONENT_BITS, MANTISSA_BITS, EXPONENT_BIAS = 4, 3, 7
+ elif dtype == torch.float8_e5m2:
+ EXPONENT_BITS, MANTISSA_BITS, EXPONENT_BIAS = 5, 2, 15
+ else:
+ raise ValueError("Unsupported dtype")
+
+ x = x.half()
+ sign = torch.sign(x)
+ abs_x = x.abs()
+ sign = torch.where(abs_x == 0, 0, sign)
+
+ # Combine exponent calculation and clamping
+ exponent = torch.clamp(
+ torch.floor(torch.log2(abs_x)) + EXPONENT_BIAS,
+ 0, 2**EXPONENT_BITS - 1
+ )
+
+ # Combine mantissa calculation and rounding
+ normal_mask = ~(exponent == 0)
+
+ abs_x[:] = calc_mantissa(abs_x, exponent, normal_mask, MANTISSA_BITS, EXPONENT_BIAS, generator=generator)
+
+ sign *= torch.where(
+ normal_mask,
+ (2.0 ** (exponent - EXPONENT_BIAS)) * (1.0 + abs_x),
+ (2.0 ** (-EXPONENT_BIAS + 1)) * abs_x
+ )
+
+ inf = torch.finfo(dtype)
+ torch.clamp(sign, min=inf.min, max=inf.max, out=sign)
+ return sign
+
+
+
+def stochastic_rounding(value, dtype, seed=0):
+ if dtype == torch.float32:
+ return value.to(dtype=torch.float32)
+ if dtype == torch.float16:
+ return value.to(dtype=torch.float16)
+ if dtype == torch.bfloat16:
+ return value.to(dtype=torch.bfloat16)
+ if dtype == torch.float8_e4m3fn or dtype == torch.float8_e5m2:
+ generator = torch.Generator(device=value.device)
+ generator.manual_seed(seed)
+ output = torch.empty_like(value, dtype=dtype)
+ num_slices = max(1, (value.numel() / (4096 * 4096)))
+ slice_size = max(1, round(value.shape[0] / num_slices))
+ for i in range(0, value.shape[0], slice_size):
+ output[i:i+slice_size].copy_(manual_stochastic_round_to_float8(value[i:i+slice_size], dtype, generator=generator))
+ return output
+
+ return value.to(dtype=dtype)
diff --git a/ComfyUI/comfy/gligen.py b/ComfyUI/comfy/gligen.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d7b6c2f4cd1009ac3c52bc5f68442a91b417048
--- /dev/null
+++ b/ComfyUI/comfy/gligen.py
@@ -0,0 +1,299 @@
+import math
+import torch
+from torch import nn
+from .ldm.modules.attention import CrossAttention, FeedForward
+import comfy.ops
+ops = comfy.ops.manual_cast
+
+
+class GatedCrossAttentionDense(nn.Module):
+ def __init__(self, query_dim, context_dim, n_heads, d_head):
+ super().__init__()
+
+ self.attn = CrossAttention(
+ query_dim=query_dim,
+ context_dim=context_dim,
+ heads=n_heads,
+ dim_head=d_head,
+ operations=ops)
+ self.ff = FeedForward(query_dim, glu=True)
+
+ self.norm1 = ops.LayerNorm(query_dim)
+ self.norm2 = ops.LayerNorm(query_dim)
+
+ self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.)))
+ self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.)))
+
+ # this can be useful: we can externally change magnitude of tanh(alpha)
+ # for example, when it is set to 0, then the entire model is same as
+ # original one
+ self.scale = 1
+
+ def forward(self, x, objs):
+
+ x = x + self.scale * \
+ torch.tanh(self.alpha_attn) * self.attn(self.norm1(x), objs, objs)
+ x = x + self.scale * \
+ torch.tanh(self.alpha_dense) * self.ff(self.norm2(x))
+
+ return x
+
+
+class GatedSelfAttentionDense(nn.Module):
+ def __init__(self, query_dim, context_dim, n_heads, d_head):
+ super().__init__()
+
+ # we need a linear projection since we need cat visual feature and obj
+ # feature
+ self.linear = ops.Linear(context_dim, query_dim)
+
+ self.attn = CrossAttention(
+ query_dim=query_dim,
+ context_dim=query_dim,
+ heads=n_heads,
+ dim_head=d_head,
+ operations=ops)
+ self.ff = FeedForward(query_dim, glu=True)
+
+ self.norm1 = ops.LayerNorm(query_dim)
+ self.norm2 = ops.LayerNorm(query_dim)
+
+ self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.)))
+ self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.)))
+
+ # this can be useful: we can externally change magnitude of tanh(alpha)
+ # for example, when it is set to 0, then the entire model is same as
+ # original one
+ self.scale = 1
+
+ def forward(self, x, objs):
+
+ N_visual = x.shape[1]
+ objs = self.linear(objs)
+
+ x = x + self.scale * torch.tanh(self.alpha_attn) * self.attn(
+ self.norm1(torch.cat([x, objs], dim=1)))[:, 0:N_visual, :]
+ x = x + self.scale * \
+ torch.tanh(self.alpha_dense) * self.ff(self.norm2(x))
+
+ return x
+
+
+class GatedSelfAttentionDense2(nn.Module):
+ def __init__(self, query_dim, context_dim, n_heads, d_head):
+ super().__init__()
+
+ # we need a linear projection since we need cat visual feature and obj
+ # feature
+ self.linear = ops.Linear(context_dim, query_dim)
+
+ self.attn = CrossAttention(
+ query_dim=query_dim, context_dim=query_dim, dim_head=d_head, operations=ops)
+ self.ff = FeedForward(query_dim, glu=True)
+
+ self.norm1 = ops.LayerNorm(query_dim)
+ self.norm2 = ops.LayerNorm(query_dim)
+
+ self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.)))
+ self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.)))
+
+ # this can be useful: we can externally change magnitude of tanh(alpha)
+ # for example, when it is set to 0, then the entire model is same as
+ # original one
+ self.scale = 1
+
+ def forward(self, x, objs):
+
+ B, N_visual, _ = x.shape
+ B, N_ground, _ = objs.shape
+
+ objs = self.linear(objs)
+
+ # sanity check
+ size_v = math.sqrt(N_visual)
+ size_g = math.sqrt(N_ground)
+ assert int(size_v) == size_v, "Visual tokens must be square rootable"
+ assert int(size_g) == size_g, "Grounding tokens must be square rootable"
+ size_v = int(size_v)
+ size_g = int(size_g)
+
+ # select grounding token and resize it to visual token size as residual
+ out = self.attn(self.norm1(torch.cat([x, objs], dim=1)))[
+ :, N_visual:, :]
+ out = out.permute(0, 2, 1).reshape(B, -1, size_g, size_g)
+ out = torch.nn.functional.interpolate(
+ out, (size_v, size_v), mode='bicubic')
+ residual = out.reshape(B, -1, N_visual).permute(0, 2, 1)
+
+ # add residual to visual feature
+ x = x + self.scale * torch.tanh(self.alpha_attn) * residual
+ x = x + self.scale * \
+ torch.tanh(self.alpha_dense) * self.ff(self.norm2(x))
+
+ return x
+
+
+class FourierEmbedder():
+ def __init__(self, num_freqs=64, temperature=100):
+
+ self.num_freqs = num_freqs
+ self.temperature = temperature
+ self.freq_bands = temperature ** (torch.arange(num_freqs) / num_freqs)
+
+ @torch.no_grad()
+ def __call__(self, x, cat_dim=-1):
+ "x: arbitrary shape of tensor. dim: cat dim"
+ out = []
+ for freq in self.freq_bands:
+ out.append(torch.sin(freq * x))
+ out.append(torch.cos(freq * x))
+ return torch.cat(out, cat_dim)
+
+
+class PositionNet(nn.Module):
+ def __init__(self, in_dim, out_dim, fourier_freqs=8):
+ super().__init__()
+ self.in_dim = in_dim
+ self.out_dim = out_dim
+
+ self.fourier_embedder = FourierEmbedder(num_freqs=fourier_freqs)
+ self.position_dim = fourier_freqs * 2 * 4 # 2 is sin&cos, 4 is xyxy
+
+ self.linears = nn.Sequential(
+ ops.Linear(self.in_dim + self.position_dim, 512),
+ nn.SiLU(),
+ ops.Linear(512, 512),
+ nn.SiLU(),
+ ops.Linear(512, out_dim),
+ )
+
+ self.null_positive_feature = torch.nn.Parameter(
+ torch.zeros([self.in_dim]))
+ self.null_position_feature = torch.nn.Parameter(
+ torch.zeros([self.position_dim]))
+
+ def forward(self, boxes, masks, positive_embeddings):
+ B, N, _ = boxes.shape
+ masks = masks.unsqueeze(-1)
+ positive_embeddings = positive_embeddings
+
+ # embedding position (it may includes padding as placeholder)
+ xyxy_embedding = self.fourier_embedder(boxes) # B*N*4 --> B*N*C
+
+ # learnable null embedding
+ positive_null = self.null_positive_feature.to(device=boxes.device, dtype=boxes.dtype).view(1, 1, -1)
+ xyxy_null = self.null_position_feature.to(device=boxes.device, dtype=boxes.dtype).view(1, 1, -1)
+
+ # replace padding with learnable null embedding
+ positive_embeddings = positive_embeddings * \
+ masks + (1 - masks) * positive_null
+ xyxy_embedding = xyxy_embedding * masks + (1 - masks) * xyxy_null
+
+ objs = self.linears(
+ torch.cat([positive_embeddings, xyxy_embedding], dim=-1))
+ assert objs.shape == torch.Size([B, N, self.out_dim])
+ return objs
+
+
+class Gligen(nn.Module):
+ def __init__(self, modules, position_net, key_dim):
+ super().__init__()
+ self.module_list = nn.ModuleList(modules)
+ self.position_net = position_net
+ self.key_dim = key_dim
+ self.max_objs = 30
+ self.current_device = torch.device("cpu")
+
+ def _set_position(self, boxes, masks, positive_embeddings):
+ objs = self.position_net(boxes, masks, positive_embeddings)
+ def func(x, extra_options):
+ key = extra_options["transformer_index"]
+ module = self.module_list[key]
+ return module(x, objs.to(device=x.device, dtype=x.dtype))
+ return func
+
+ def set_position(self, latent_image_shape, position_params, device):
+ batch, c, h, w = latent_image_shape
+ masks = torch.zeros([self.max_objs], device="cpu")
+ boxes = []
+ positive_embeddings = []
+ for p in position_params:
+ x1 = (p[4]) / w
+ y1 = (p[3]) / h
+ x2 = (p[4] + p[2]) / w
+ y2 = (p[3] + p[1]) / h
+ masks[len(boxes)] = 1.0
+ boxes += [torch.tensor((x1, y1, x2, y2)).unsqueeze(0)]
+ positive_embeddings += [p[0]]
+ append_boxes = []
+ append_conds = []
+ if len(boxes) < self.max_objs:
+ append_boxes = [torch.zeros(
+ [self.max_objs - len(boxes), 4], device="cpu")]
+ append_conds = [torch.zeros(
+ [self.max_objs - len(boxes), self.key_dim], device="cpu")]
+
+ box_out = torch.cat(
+ boxes + append_boxes).unsqueeze(0).repeat(batch, 1, 1)
+ masks = masks.unsqueeze(0).repeat(batch, 1)
+ conds = torch.cat(positive_embeddings +
+ append_conds).unsqueeze(0).repeat(batch, 1, 1)
+ return self._set_position(
+ box_out.to(device),
+ masks.to(device),
+ conds.to(device))
+
+ def set_empty(self, latent_image_shape, device):
+ batch, c, h, w = latent_image_shape
+ masks = torch.zeros([self.max_objs], device="cpu").repeat(batch, 1)
+ box_out = torch.zeros([self.max_objs, 4],
+ device="cpu").repeat(batch, 1, 1)
+ conds = torch.zeros([self.max_objs, self.key_dim],
+ device="cpu").repeat(batch, 1, 1)
+ return self._set_position(
+ box_out.to(device),
+ masks.to(device),
+ conds.to(device))
+
+
+def load_gligen(sd):
+ sd_k = sd.keys()
+ output_list = []
+ key_dim = 768
+ for a in ["input_blocks", "middle_block", "output_blocks"]:
+ for b in range(20):
+ k_temp = filter(lambda k: "{}.{}.".format(a, b)
+ in k and ".fuser." in k, sd_k)
+ k_temp = map(lambda k: (k, k.split(".fuser.")[-1]), k_temp)
+
+ n_sd = {}
+ for k in k_temp:
+ n_sd[k[1]] = sd[k[0]]
+ if len(n_sd) > 0:
+ query_dim = n_sd["linear.weight"].shape[0]
+ key_dim = n_sd["linear.weight"].shape[1]
+
+ if key_dim == 768: # SD1.x
+ n_heads = 8
+ d_head = query_dim // n_heads
+ else:
+ d_head = 64
+ n_heads = query_dim // d_head
+
+ gated = GatedSelfAttentionDense(
+ query_dim, key_dim, n_heads, d_head)
+ gated.load_state_dict(n_sd, strict=False)
+ output_list.append(gated)
+
+ if "position_net.null_positive_feature" in sd_k:
+ in_dim = sd["position_net.null_positive_feature"].shape[0]
+ out_dim = sd["position_net.linears.4.weight"].shape[0]
+
+ class WeightsLoader(torch.nn.Module):
+ pass
+ w = WeightsLoader()
+ w.position_net = PositionNet(in_dim, out_dim)
+ w.load_state_dict(sd, strict=False)
+
+ gligen = Gligen(output_list, w.position_net, key_dim)
+ return gligen
diff --git a/ComfyUI/comfy/hooks.py b/ComfyUI/comfy/hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d07310729020a033f705395433c318152432935
--- /dev/null
+++ b/ComfyUI/comfy/hooks.py
@@ -0,0 +1,785 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Callable
+import enum
+import math
+import torch
+import numpy as np
+import itertools
+import logging
+
+if TYPE_CHECKING:
+ from comfy.model_patcher import ModelPatcher, PatcherInjection
+ from comfy.model_base import BaseModel
+ from comfy.sd import CLIP
+import comfy.lora
+import comfy.model_management
+import comfy.patcher_extension
+from node_helpers import conditioning_set_values
+
+# #######################################################################################################
+# Hooks explanation
+# -------------------
+# The purpose of hooks is to allow conds to influence sampling without the need for ComfyUI core code to
+# make explicit special cases like it does for ControlNet and GLIGEN.
+#
+# This is necessary for nodes/features that are intended for use with masked or scheduled conds, or those
+# that should run special code when a 'marked' cond is used in sampling.
+# #######################################################################################################
+
+class EnumHookMode(enum.Enum):
+ '''
+ Priority of hook memory optimization vs. speed, mostly related to WeightHooks.
+
+ MinVram: No caching will occur for any operations related to hooks.
+ MaxSpeed: Excess VRAM (and RAM, once VRAM is sufficiently depleted) will be used to cache hook weights when switching hook groups.
+ '''
+ MinVram = "minvram"
+ MaxSpeed = "maxspeed"
+
+class EnumHookType(enum.Enum):
+ '''
+ Hook types, each of which has different expected behavior.
+ '''
+ Weight = "weight"
+ ObjectPatch = "object_patch"
+ AdditionalModels = "add_models"
+ TransformerOptions = "transformer_options"
+ Injections = "add_injections"
+
+class EnumWeightTarget(enum.Enum):
+ Model = "model"
+ Clip = "clip"
+
+class EnumHookScope(enum.Enum):
+ '''
+ Determines if hook should be limited in its influence over sampling.
+
+ AllConditioning: hook will affect all conds used in sampling.
+ HookedOnly: hook will only affect the conds it was attached to.
+ '''
+ AllConditioning = "all_conditioning"
+ HookedOnly = "hooked_only"
+
+
+class _HookRef:
+ pass
+
+
+def default_should_register(hook: Hook, model: ModelPatcher, model_options: dict, target_dict: dict[str], registered: HookGroup):
+ '''Example for how custom_should_register function can look like.'''
+ return True
+
+
+def create_target_dict(target: EnumWeightTarget=None, **kwargs) -> dict[str]:
+ '''Creates base dictionary for use with Hooks' target param.'''
+ d = {}
+ if target is not None:
+ d['target'] = target
+ d.update(kwargs)
+ return d
+
+
+class Hook:
+ def __init__(self, hook_type: EnumHookType=None, hook_ref: _HookRef=None, hook_id: str=None,
+ hook_keyframe: HookKeyframeGroup=None, hook_scope=EnumHookScope.AllConditioning):
+ self.hook_type = hook_type
+ '''Enum identifying the general class of this hook.'''
+ self.hook_ref = hook_ref if hook_ref else _HookRef()
+ '''Reference shared between hook clones that have the same value. Should NOT be modified.'''
+ self.hook_id = hook_id
+ '''Optional string ID to identify hook; useful if need to consolidate duplicates at registration time.'''
+ self.hook_keyframe = hook_keyframe if hook_keyframe else HookKeyframeGroup()
+ '''Keyframe storage that can be referenced to get strength for current sampling step.'''
+ self.hook_scope = hook_scope
+ '''Scope of where this hook should apply in terms of the conds used in sampling run.'''
+ self.custom_should_register = default_should_register
+ '''Can be overriden with a compatible function to decide if this hook should be registered without the need to override .should_register'''
+
+ @property
+ def strength(self):
+ return self.hook_keyframe.strength
+
+ def initialize_timesteps(self, model: BaseModel):
+ self.reset()
+ self.hook_keyframe.initialize_timesteps(model)
+
+ def reset(self):
+ self.hook_keyframe.reset()
+
+ def clone(self):
+ c: Hook = self.__class__()
+ c.hook_type = self.hook_type
+ c.hook_ref = self.hook_ref
+ c.hook_id = self.hook_id
+ c.hook_keyframe = self.hook_keyframe
+ c.hook_scope = self.hook_scope
+ c.custom_should_register = self.custom_should_register
+ return c
+
+ def should_register(self, model: ModelPatcher, model_options: dict, target_dict: dict[str], registered: HookGroup):
+ return self.custom_should_register(self, model, model_options, target_dict, registered)
+
+ def add_hook_patches(self, model: ModelPatcher, model_options: dict, target_dict: dict[str], registered: HookGroup):
+ raise NotImplementedError("add_hook_patches should be defined for Hook subclasses")
+
+ def __eq__(self, other: Hook):
+ return self.__class__ == other.__class__ and self.hook_ref == other.hook_ref
+
+ def __hash__(self):
+ return hash(self.hook_ref)
+
+class WeightHook(Hook):
+ '''
+ Hook responsible for tracking weights to be applied to some model/clip.
+
+ Note, value of hook_scope is ignored and is treated as HookedOnly.
+ '''
+ def __init__(self, strength_model=1.0, strength_clip=1.0):
+ super().__init__(hook_type=EnumHookType.Weight, hook_scope=EnumHookScope.HookedOnly)
+ self.weights: dict = None
+ self.weights_clip: dict = None
+ self.need_weight_init = True
+ self._strength_model = strength_model
+ self._strength_clip = strength_clip
+ self.hook_scope = EnumHookScope.HookedOnly # this value does not matter for WeightHooks, just for docs
+
+ @property
+ def strength_model(self):
+ return self._strength_model * self.strength
+
+ @property
+ def strength_clip(self):
+ return self._strength_clip * self.strength
+
+ def add_hook_patches(self, model: ModelPatcher, model_options: dict, target_dict: dict[str], registered: HookGroup):
+ if not self.should_register(model, model_options, target_dict, registered):
+ return False
+ weights = None
+
+ target = target_dict.get('target', None)
+ if target == EnumWeightTarget.Clip:
+ strength = self._strength_clip
+ else:
+ strength = self._strength_model
+
+ if self.need_weight_init:
+ key_map = {}
+ if target == EnumWeightTarget.Clip:
+ key_map = comfy.lora.model_lora_keys_clip(model.model, key_map)
+ else:
+ key_map = comfy.lora.model_lora_keys_unet(model.model, key_map)
+ weights = comfy.lora.load_lora(self.weights, key_map, log_missing=False)
+ else:
+ if target == EnumWeightTarget.Clip:
+ weights = self.weights_clip
+ else:
+ weights = self.weights
+ model.add_hook_patches(hook=self, patches=weights, strength_patch=strength)
+ registered.add(self)
+ return True
+ # TODO: add logs about any keys that were not applied
+
+ def clone(self):
+ c: WeightHook = super().clone()
+ c.weights = self.weights
+ c.weights_clip = self.weights_clip
+ c.need_weight_init = self.need_weight_init
+ c._strength_model = self._strength_model
+ c._strength_clip = self._strength_clip
+ return c
+
+class ObjectPatchHook(Hook):
+ def __init__(self, object_patches: dict[str]=None,
+ hook_scope=EnumHookScope.AllConditioning):
+ super().__init__(hook_type=EnumHookType.ObjectPatch)
+ self.object_patches = object_patches
+ self.hook_scope = hook_scope
+
+ def clone(self):
+ c: ObjectPatchHook = super().clone()
+ c.object_patches = self.object_patches
+ return c
+
+ def add_hook_patches(self, model: ModelPatcher, model_options: dict, target_dict: dict[str], registered: HookGroup):
+ raise NotImplementedError("ObjectPatchHook is not supported yet in ComfyUI.")
+
+class AdditionalModelsHook(Hook):
+ '''
+ Hook responsible for telling model management any additional models that should be loaded.
+
+ Note, value of hook_scope is ignored and is treated as AllConditioning.
+ '''
+ def __init__(self, models: list[ModelPatcher]=None, key: str=None):
+ super().__init__(hook_type=EnumHookType.AdditionalModels)
+ self.models = models
+ self.key = key
+
+ def clone(self):
+ c: AdditionalModelsHook = super().clone()
+ c.models = self.models.copy() if self.models else self.models
+ c.key = self.key
+ return c
+
+ def add_hook_patches(self, model: ModelPatcher, model_options: dict, target_dict: dict[str], registered: HookGroup):
+ if not self.should_register(model, model_options, target_dict, registered):
+ return False
+ registered.add(self)
+ return True
+
+class TransformerOptionsHook(Hook):
+ '''
+ Hook responsible for adding wrappers, callbacks, patches, or anything else related to transformer_options.
+ '''
+ def __init__(self, transformers_dict: dict[str, dict[str, dict[str, list[Callable]]]]=None,
+ hook_scope=EnumHookScope.AllConditioning):
+ super().__init__(hook_type=EnumHookType.TransformerOptions)
+ self.transformers_dict = transformers_dict
+ self.hook_scope = hook_scope
+ self._skip_adding = False
+ '''Internal value used to avoid double load of transformer_options when hook_scope is AllConditioning.'''
+
+ def clone(self):
+ c: TransformerOptionsHook = super().clone()
+ c.transformers_dict = self.transformers_dict
+ c._skip_adding = self._skip_adding
+ return c
+
+ def add_hook_patches(self, model: ModelPatcher, model_options: dict, target_dict: dict[str], registered: HookGroup):
+ if not self.should_register(model, model_options, target_dict, registered):
+ return False
+ # NOTE: to_load_options will be used to manually load patches/wrappers/callbacks from hooks
+ self._skip_adding = False
+ if self.hook_scope == EnumHookScope.AllConditioning:
+ add_model_options = {"transformer_options": self.transformers_dict,
+ "to_load_options": self.transformers_dict}
+ # skip_adding if included in AllConditioning to avoid double loading
+ self._skip_adding = True
+ else:
+ add_model_options = {"to_load_options": self.transformers_dict}
+ registered.add(self)
+ comfy.patcher_extension.merge_nested_dicts(model_options, add_model_options, copy_dict1=False)
+ return True
+
+ def on_apply_hooks(self, model: ModelPatcher, transformer_options: dict[str]):
+ if not self._skip_adding:
+ comfy.patcher_extension.merge_nested_dicts(transformer_options, self.transformers_dict, copy_dict1=False)
+
+WrapperHook = TransformerOptionsHook
+'''Only here for backwards compatibility, WrapperHook is identical to TransformerOptionsHook.'''
+
+class InjectionsHook(Hook):
+ def __init__(self, key: str=None, injections: list[PatcherInjection]=None,
+ hook_scope=EnumHookScope.AllConditioning):
+ super().__init__(hook_type=EnumHookType.Injections)
+ self.key = key
+ self.injections = injections
+ self.hook_scope = hook_scope
+
+ def clone(self):
+ c: InjectionsHook = super().clone()
+ c.key = self.key
+ c.injections = self.injections.copy() if self.injections else self.injections
+ return c
+
+ def add_hook_patches(self, model: ModelPatcher, model_options: dict, target_dict: dict[str], registered: HookGroup):
+ raise NotImplementedError("InjectionsHook is not supported yet in ComfyUI.")
+
+class HookGroup:
+ '''
+ Stores groups of hooks, and allows them to be queried by type.
+
+ To prevent breaking their functionality, never modify the underlying self.hooks or self._hook_dict vars directly;
+ always use the provided functions on HookGroup.
+ '''
+ def __init__(self):
+ self.hooks: list[Hook] = []
+ self._hook_dict: dict[EnumHookType, list[Hook]] = {}
+
+ def __len__(self):
+ return len(self.hooks)
+
+ def add(self, hook: Hook):
+ if hook not in self.hooks:
+ self.hooks.append(hook)
+ self._hook_dict.setdefault(hook.hook_type, []).append(hook)
+
+ def remove(self, hook: Hook):
+ if hook in self.hooks:
+ self.hooks.remove(hook)
+ self._hook_dict[hook.hook_type].remove(hook)
+
+ def get_type(self, hook_type: EnumHookType):
+ return self._hook_dict.get(hook_type, [])
+
+ def contains(self, hook: Hook):
+ return hook in self.hooks
+
+ def is_subset_of(self, other: HookGroup):
+ self_hooks = set(self.hooks)
+ other_hooks = set(other.hooks)
+ return self_hooks.issubset(other_hooks)
+
+ def new_with_common_hooks(self, other: HookGroup):
+ c = HookGroup()
+ for hook in self.hooks:
+ if other.contains(hook):
+ c.add(hook.clone())
+ return c
+
+ def clone(self):
+ c = HookGroup()
+ for hook in self.hooks:
+ c.add(hook.clone())
+ return c
+
+ def clone_and_combine(self, other: HookGroup):
+ c = self.clone()
+ if other is not None:
+ for hook in other.hooks:
+ c.add(hook.clone())
+ return c
+
+ def set_keyframes_on_hooks(self, hook_kf: HookKeyframeGroup):
+ if hook_kf is None:
+ hook_kf = HookKeyframeGroup()
+ else:
+ hook_kf = hook_kf.clone()
+ for hook in self.hooks:
+ hook.hook_keyframe = hook_kf
+
+ def get_hooks_for_clip_schedule(self):
+ scheduled_hooks: dict[WeightHook, list[tuple[tuple[float,float], HookKeyframe]]] = {}
+ # only care about WeightHooks, for now
+ for hook in self.get_type(EnumHookType.Weight):
+ hook: WeightHook
+ hook_schedule = []
+ # if no hook keyframes, assign default value
+ if len(hook.hook_keyframe.keyframes) == 0:
+ hook_schedule.append(((0.0, 1.0), None))
+ scheduled_hooks[hook] = hook_schedule
+ continue
+ # find ranges of values
+ prev_keyframe = hook.hook_keyframe.keyframes[0]
+ for keyframe in hook.hook_keyframe.keyframes:
+ if keyframe.start_percent > prev_keyframe.start_percent and not math.isclose(keyframe.strength, prev_keyframe.strength):
+ hook_schedule.append(((prev_keyframe.start_percent, keyframe.start_percent), prev_keyframe))
+ prev_keyframe = keyframe
+ elif keyframe.start_percent == prev_keyframe.start_percent:
+ prev_keyframe = keyframe
+ # create final range, assuming last start_percent was not 1.0
+ if not math.isclose(prev_keyframe.start_percent, 1.0):
+ hook_schedule.append(((prev_keyframe.start_percent, 1.0), prev_keyframe))
+ scheduled_hooks[hook] = hook_schedule
+ # hooks should not have their schedules in a list of tuples
+ all_ranges: list[tuple[float, float]] = []
+ for range_kfs in scheduled_hooks.values():
+ for t_range, keyframe in range_kfs:
+ all_ranges.append(t_range)
+ # turn list of ranges into boundaries
+ boundaries_set = set(itertools.chain.from_iterable(all_ranges))
+ boundaries_set.add(0.0)
+ boundaries = sorted(boundaries_set)
+ real_ranges = [(boundaries[i], boundaries[i + 1]) for i in range(len(boundaries) - 1)]
+ # with real ranges defined, give appropriate hooks w/ keyframes for each range
+ scheduled_keyframes: list[tuple[tuple[float,float], list[tuple[WeightHook, HookKeyframe]]]] = []
+ for t_range in real_ranges:
+ hooks_schedule = []
+ for hook, val in scheduled_hooks.items():
+ keyframe = None
+ # check if is a keyframe that works for the current t_range
+ for stored_range, stored_kf in val:
+ # if stored start is less than current end, then fits - give it assigned keyframe
+ if stored_range[0] < t_range[1] and stored_range[1] > t_range[0]:
+ keyframe = stored_kf
+ break
+ hooks_schedule.append((hook, keyframe))
+ scheduled_keyframes.append((t_range, hooks_schedule))
+ return scheduled_keyframes
+
+ def reset(self):
+ for hook in self.hooks:
+ hook.reset()
+
+ @staticmethod
+ def combine_all_hooks(hooks_list: list[HookGroup], require_count=0) -> HookGroup:
+ actual: list[HookGroup] = []
+ for group in hooks_list:
+ if group is not None:
+ actual.append(group)
+ if len(actual) < require_count:
+ raise Exception(f"Need at least {require_count} hooks to combine, but only had {len(actual)}.")
+ # if no hooks, then return None
+ if len(actual) == 0:
+ return None
+ # if only 1 hook, just return itself without cloning
+ elif len(actual) == 1:
+ return actual[0]
+ final_hook: HookGroup = None
+ for hook in actual:
+ if final_hook is None:
+ final_hook = hook.clone()
+ else:
+ final_hook = final_hook.clone_and_combine(hook)
+ return final_hook
+
+
+class HookKeyframe:
+ def __init__(self, strength: float, start_percent=0.0, guarantee_steps=1):
+ self.strength = strength
+ # scheduling
+ self.start_percent = float(start_percent)
+ self.start_t = 999999999.9
+ self.guarantee_steps = guarantee_steps
+
+ def get_effective_guarantee_steps(self, max_sigma: torch.Tensor):
+ '''If keyframe starts before current sampling range (max_sigma), treat as 0.'''
+ if self.start_t > max_sigma:
+ return 0
+ return self.guarantee_steps
+
+ def clone(self):
+ c = HookKeyframe(strength=self.strength,
+ start_percent=self.start_percent, guarantee_steps=self.guarantee_steps)
+ c.start_t = self.start_t
+ return c
+
+class HookKeyframeGroup:
+ def __init__(self):
+ self.keyframes: list[HookKeyframe] = []
+ self._current_keyframe: HookKeyframe = None
+ self._current_used_steps = 0
+ self._current_index = 0
+ self._current_strength = None
+ self._curr_t = -1.
+
+ # properties shadow those of HookWeightsKeyframe
+ @property
+ def strength(self):
+ if self._current_keyframe is not None:
+ return self._current_keyframe.strength
+ return 1.0
+
+ def reset(self):
+ self._current_keyframe = None
+ self._current_used_steps = 0
+ self._current_index = 0
+ self._current_strength = None
+ self.curr_t = -1.
+ self._set_first_as_current()
+
+ def add(self, keyframe: HookKeyframe):
+ # add to end of list, then sort
+ self.keyframes.append(keyframe)
+ self.keyframes = get_sorted_list_via_attr(self.keyframes, "start_percent")
+ self._set_first_as_current()
+
+ def _set_first_as_current(self):
+ if len(self.keyframes) > 0:
+ self._current_keyframe = self.keyframes[0]
+ else:
+ self._current_keyframe = None
+
+ def has_guarantee_steps(self):
+ for kf in self.keyframes:
+ if kf.guarantee_steps > 0:
+ return True
+ return False
+
+ def has_index(self, index: int):
+ return index >= 0 and index < len(self.keyframes)
+
+ def is_empty(self):
+ return len(self.keyframes) == 0
+
+ def clone(self):
+ c = HookKeyframeGroup()
+ for keyframe in self.keyframes:
+ c.keyframes.append(keyframe.clone())
+ c._set_first_as_current()
+ return c
+
+ def initialize_timesteps(self, model: BaseModel):
+ for keyframe in self.keyframes:
+ keyframe.start_t = model.model_sampling.percent_to_sigma(keyframe.start_percent)
+
+ def prepare_current_keyframe(self, curr_t: float, transformer_options: dict[str, torch.Tensor]) -> bool:
+ if self.is_empty():
+ return False
+ if curr_t == self._curr_t:
+ return False
+ max_sigma = torch.max(transformer_options["sample_sigmas"])
+ prev_index = self._current_index
+ prev_strength = self._current_strength
+ # if met guaranteed steps, look for next keyframe in case need to switch
+ if self._current_used_steps >= self._current_keyframe.get_effective_guarantee_steps(max_sigma):
+ # if has next index, loop through and see if need to switch
+ if self.has_index(self._current_index+1):
+ for i in range(self._current_index+1, len(self.keyframes)):
+ eval_c = self.keyframes[i]
+ # check if start_t is greater or equal to curr_t
+ # NOTE: t is in terms of sigmas, not percent, so bigger number = earlier step in sampling
+ if eval_c.start_t >= curr_t:
+ self._current_index = i
+ self._current_strength = eval_c.strength
+ self._current_keyframe = eval_c
+ self._current_used_steps = 0
+ # if guarantee_steps greater than zero, stop searching for other keyframes
+ if self._current_keyframe.get_effective_guarantee_steps(max_sigma) > 0:
+ break
+ # if eval_c is outside the percent range, stop looking further
+ else: break
+ # update steps current context is used
+ self._current_used_steps += 1
+ # update current timestep this was performed on
+ self._curr_t = curr_t
+ # return True if keyframe changed, False if no change
+ return prev_index != self._current_index and prev_strength != self._current_strength
+
+
+class InterpolationMethod:
+ LINEAR = "linear"
+ EASE_IN = "ease_in"
+ EASE_OUT = "ease_out"
+ EASE_IN_OUT = "ease_in_out"
+
+ _LIST = [LINEAR, EASE_IN, EASE_OUT, EASE_IN_OUT]
+
+ @classmethod
+ def get_weights(cls, num_from: float, num_to: float, length: int, method: str, reverse=False):
+ diff = num_to - num_from
+ if method == cls.LINEAR:
+ weights = torch.linspace(num_from, num_to, length)
+ elif method == cls.EASE_IN:
+ index = torch.linspace(0, 1, length)
+ weights = diff * np.power(index, 2) + num_from
+ elif method == cls.EASE_OUT:
+ index = torch.linspace(0, 1, length)
+ weights = diff * (1 - np.power(1 - index, 2)) + num_from
+ elif method == cls.EASE_IN_OUT:
+ index = torch.linspace(0, 1, length)
+ weights = diff * ((1 - np.cos(index * np.pi)) / 2) + num_from
+ else:
+ raise ValueError(f"Unrecognized interpolation method '{method}'.")
+ if reverse:
+ weights = weights.flip(dims=(0,))
+ return weights
+
+def get_sorted_list_via_attr(objects: list, attr: str) -> list:
+ if not objects:
+ return objects
+ elif len(objects) <= 1:
+ return [x for x in objects]
+ # now that we know we have to sort, do it following these rules:
+ # a) if objects have same value of attribute, maintain their relative order
+ # b) perform sorting of the groups of objects with same attributes
+ unique_attrs = {}
+ for o in objects:
+ val_attr = getattr(o, attr)
+ attr_list: list = unique_attrs.get(val_attr, list())
+ attr_list.append(o)
+ if val_attr not in unique_attrs:
+ unique_attrs[val_attr] = attr_list
+ # now that we have the unique attr values grouped together in relative order, sort them by key
+ sorted_attrs = dict(sorted(unique_attrs.items()))
+ # now flatten out the dict into a list to return
+ sorted_list = []
+ for object_list in sorted_attrs.values():
+ sorted_list.extend(object_list)
+ return sorted_list
+
+def create_transformer_options_from_hooks(model: ModelPatcher, hooks: HookGroup, transformer_options: dict[str]=None):
+ # if no hooks or is not a ModelPatcher for sampling, return empty dict
+ if hooks is None or model.is_clip:
+ return {}
+ if transformer_options is None:
+ transformer_options = {}
+ for hook in hooks.get_type(EnumHookType.TransformerOptions):
+ hook: TransformerOptionsHook
+ hook.on_apply_hooks(model, transformer_options)
+ return transformer_options
+
+def create_hook_lora(lora: dict[str, torch.Tensor], strength_model: float, strength_clip: float):
+ hook_group = HookGroup()
+ hook = WeightHook(strength_model=strength_model, strength_clip=strength_clip)
+ hook_group.add(hook)
+ hook.weights = lora
+ return hook_group
+
+def create_hook_model_as_lora(weights_model, weights_clip, strength_model: float, strength_clip: float):
+ hook_group = HookGroup()
+ hook = WeightHook(strength_model=strength_model, strength_clip=strength_clip)
+ hook_group.add(hook)
+ patches_model = None
+ patches_clip = None
+ if weights_model is not None:
+ patches_model = {}
+ for key in weights_model:
+ patches_model[key] = ("model_as_lora", (weights_model[key],))
+ if weights_clip is not None:
+ patches_clip = {}
+ for key in weights_clip:
+ patches_clip[key] = ("model_as_lora", (weights_clip[key],))
+ hook.weights = patches_model
+ hook.weights_clip = patches_clip
+ hook.need_weight_init = False
+ return hook_group
+
+def get_patch_weights_from_model(model: ModelPatcher, discard_model_sampling=True):
+ if model is None:
+ return None
+ patches_model: dict[str, torch.Tensor] = model.model.state_dict()
+ if discard_model_sampling:
+ # do not include ANY model_sampling components of the model that should act as a patch
+ for key in list(patches_model.keys()):
+ if key.startswith("model_sampling"):
+ patches_model.pop(key, None)
+ return patches_model
+
+# NOTE: this function shows how to register weight hooks directly on the ModelPatchers
+def load_hook_lora_for_models(model: ModelPatcher, clip: CLIP, lora: dict[str, torch.Tensor],
+ strength_model: float, strength_clip: float):
+ key_map = {}
+ if model is not None:
+ key_map = comfy.lora.model_lora_keys_unet(model.model, key_map)
+ if clip is not None:
+ key_map = comfy.lora.model_lora_keys_clip(clip.cond_stage_model, key_map)
+
+ hook_group = HookGroup()
+ hook = WeightHook()
+ hook_group.add(hook)
+ loaded: dict[str] = comfy.lora.load_lora(lora, key_map)
+ if model is not None:
+ new_modelpatcher = model.clone()
+ k = new_modelpatcher.add_hook_patches(hook=hook, patches=loaded, strength_patch=strength_model)
+ else:
+ k = ()
+ new_modelpatcher = None
+
+ if clip is not None:
+ new_clip = clip.clone()
+ k1 = new_clip.patcher.add_hook_patches(hook=hook, patches=loaded, strength_patch=strength_clip)
+ else:
+ k1 = ()
+ new_clip = None
+ k = set(k)
+ k1 = set(k1)
+ for x in loaded:
+ if (x not in k) and (x not in k1):
+ logging.warning(f"NOT LOADED {x}")
+ return (new_modelpatcher, new_clip, hook_group)
+
+def _combine_hooks_from_values(c_dict: dict[str, HookGroup], values: dict[str, HookGroup], cache: dict[tuple[HookGroup, HookGroup], HookGroup]):
+ hooks_key = 'hooks'
+ # if hooks only exist in one dict, do what's needed so that it ends up in c_dict
+ if hooks_key not in values:
+ return
+ if hooks_key not in c_dict:
+ hooks_value = values.get(hooks_key, None)
+ if hooks_value is not None:
+ c_dict[hooks_key] = hooks_value
+ return
+ # otherwise, need to combine with minimum duplication via cache
+ hooks_tuple = (c_dict[hooks_key], values[hooks_key])
+ cached_hooks = cache.get(hooks_tuple, None)
+ if cached_hooks is None:
+ new_hooks = hooks_tuple[0].clone_and_combine(hooks_tuple[1])
+ cache[hooks_tuple] = new_hooks
+ c_dict[hooks_key] = new_hooks
+ else:
+ c_dict[hooks_key] = cache[hooks_tuple]
+
+def conditioning_set_values_with_hooks(conditioning, values={}, append_hooks=True,
+ cache: dict[tuple[HookGroup, HookGroup], HookGroup]=None):
+ c = []
+ if cache is None:
+ cache = {}
+ for t in conditioning:
+ n = [t[0], t[1].copy()]
+ for k in values:
+ if append_hooks and k == 'hooks':
+ _combine_hooks_from_values(n[1], values, cache)
+ else:
+ n[1][k] = values[k]
+ c.append(n)
+
+ return c
+
+def set_hooks_for_conditioning(cond, hooks: HookGroup, append_hooks=True, cache: dict[tuple[HookGroup, HookGroup], HookGroup]=None):
+ if hooks is None:
+ return cond
+ return conditioning_set_values_with_hooks(cond, {'hooks': hooks}, append_hooks=append_hooks, cache=cache)
+
+def set_timesteps_for_conditioning(cond, timestep_range: tuple[float,float]):
+ if timestep_range is None:
+ return cond
+ return conditioning_set_values(cond, {"start_percent": timestep_range[0],
+ "end_percent": timestep_range[1]})
+
+def set_mask_for_conditioning(cond, mask: torch.Tensor, set_cond_area: str, strength: float):
+ if mask is None:
+ return cond
+ set_area_to_bounds = False
+ if set_cond_area != 'default':
+ set_area_to_bounds = True
+ if len(mask.shape) < 3:
+ mask = mask.unsqueeze(0)
+ return conditioning_set_values(cond, {'mask': mask,
+ 'set_area_to_bounds': set_area_to_bounds,
+ 'mask_strength': strength})
+
+def combine_conditioning(conds: list):
+ combined_conds = []
+ for cond in conds:
+ combined_conds.extend(cond)
+ return combined_conds
+
+def combine_with_new_conds(conds: list, new_conds: list):
+ combined_conds = []
+ for c, new_c in zip(conds, new_conds):
+ combined_conds.append(combine_conditioning([c, new_c]))
+ return combined_conds
+
+def set_conds_props(conds: list, strength: float, set_cond_area: str,
+ mask: torch.Tensor=None, hooks: HookGroup=None, timesteps_range: tuple[float,float]=None, append_hooks=True):
+ final_conds = []
+ cache = {}
+ for c in conds:
+ # first, apply lora_hook to conditioning, if provided
+ c = set_hooks_for_conditioning(c, hooks, append_hooks=append_hooks, cache=cache)
+ # next, apply mask to conditioning
+ c = set_mask_for_conditioning(cond=c, mask=mask, strength=strength, set_cond_area=set_cond_area)
+ # apply timesteps, if present
+ c = set_timesteps_for_conditioning(cond=c, timestep_range=timesteps_range)
+ # finally, apply mask to conditioning and store
+ final_conds.append(c)
+ return final_conds
+
+def set_conds_props_and_combine(conds: list, new_conds: list, strength: float=1.0, set_cond_area: str="default",
+ mask: torch.Tensor=None, hooks: HookGroup=None, timesteps_range: tuple[float,float]=None, append_hooks=True):
+ combined_conds = []
+ cache = {}
+ for c, masked_c in zip(conds, new_conds):
+ # first, apply lora_hook to new conditioning, if provided
+ masked_c = set_hooks_for_conditioning(masked_c, hooks, append_hooks=append_hooks, cache=cache)
+ # next, apply mask to new conditioning, if provided
+ masked_c = set_mask_for_conditioning(cond=masked_c, mask=mask, set_cond_area=set_cond_area, strength=strength)
+ # apply timesteps, if present
+ masked_c = set_timesteps_for_conditioning(cond=masked_c, timestep_range=timesteps_range)
+ # finally, combine with existing conditioning and store
+ combined_conds.append(combine_conditioning([c, masked_c]))
+ return combined_conds
+
+def set_default_conds_and_combine(conds: list, new_conds: list,
+ hooks: HookGroup=None, timesteps_range: tuple[float,float]=None, append_hooks=True):
+ combined_conds = []
+ cache = {}
+ for c, new_c in zip(conds, new_conds):
+ # first, apply lora_hook to new conditioning, if provided
+ new_c = set_hooks_for_conditioning(new_c, hooks, append_hooks=append_hooks, cache=cache)
+ # next, add default_cond key to cond so that during sampling, it can be identified
+ new_c = conditioning_set_values(new_c, {'default': True})
+ # apply timesteps, if present
+ new_c = set_timesteps_for_conditioning(cond=new_c, timestep_range=timesteps_range)
+ # finally, combine with existing conditioning and store
+ combined_conds.append(combine_conditioning([c, new_c]))
+ return combined_conds
diff --git a/ComfyUI/comfy/latent_formats.py b/ComfyUI/comfy/latent_formats.py
new file mode 100644
index 0000000000000000000000000000000000000000..caf4991fcab1ef2ad1c6f32effae0e23846aaf0a
--- /dev/null
+++ b/ComfyUI/comfy/latent_formats.py
@@ -0,0 +1,548 @@
+import torch
+
+class LatentFormat:
+ scale_factor = 1.0
+ latent_channels = 4
+ latent_dimensions = 2
+ latent_rgb_factors = None
+ latent_rgb_factors_bias = None
+ taesd_decoder_name = None
+
+ def process_in(self, latent):
+ return latent * self.scale_factor
+
+ def process_out(self, latent):
+ return latent / self.scale_factor
+
+class SD15(LatentFormat):
+ def __init__(self, scale_factor=0.18215):
+ self.scale_factor = scale_factor
+ self.latent_rgb_factors = [
+ # R G B
+ [ 0.3512, 0.2297, 0.3227],
+ [ 0.3250, 0.4974, 0.2350],
+ [-0.2829, 0.1762, 0.2721],
+ [-0.2120, -0.2616, -0.7177]
+ ]
+ self.taesd_decoder_name = "taesd_decoder"
+
+class SDXL(LatentFormat):
+ scale_factor = 0.13025
+
+ def __init__(self):
+ self.latent_rgb_factors = [
+ # R G B
+ [ 0.3651, 0.4232, 0.4341],
+ [-0.2533, -0.0042, 0.1068],
+ [ 0.1076, 0.1111, -0.0362],
+ [-0.3165, -0.2492, -0.2188]
+ ]
+ self.latent_rgb_factors_bias = [ 0.1084, -0.0175, -0.0011]
+
+ self.taesd_decoder_name = "taesdxl_decoder"
+
+class SDXL_Playground_2_5(LatentFormat):
+ def __init__(self):
+ self.scale_factor = 0.5
+ self.latents_mean = torch.tensor([-1.6574, 1.886, -1.383, 2.5155]).view(1, 4, 1, 1)
+ self.latents_std = torch.tensor([8.4927, 5.9022, 6.5498, 5.2299]).view(1, 4, 1, 1)
+
+ self.latent_rgb_factors = [
+ # R G B
+ [ 0.3920, 0.4054, 0.4549],
+ [-0.2634, -0.0196, 0.0653],
+ [ 0.0568, 0.1687, -0.0755],
+ [-0.3112, -0.2359, -0.2076]
+ ]
+ self.taesd_decoder_name = "taesdxl_decoder"
+
+ def process_in(self, latent):
+ latents_mean = self.latents_mean.to(latent.device, latent.dtype)
+ latents_std = self.latents_std.to(latent.device, latent.dtype)
+ return (latent - latents_mean) * self.scale_factor / latents_std
+
+ def process_out(self, latent):
+ latents_mean = self.latents_mean.to(latent.device, latent.dtype)
+ latents_std = self.latents_std.to(latent.device, latent.dtype)
+ return latent * latents_std / self.scale_factor + latents_mean
+
+
+class SD_X4(LatentFormat):
+ def __init__(self):
+ self.scale_factor = 0.08333
+ self.latent_rgb_factors = [
+ [-0.2340, -0.3863, -0.3257],
+ [ 0.0994, 0.0885, -0.0908],
+ [-0.2833, -0.2349, -0.3741],
+ [ 0.2523, -0.0055, -0.1651]
+ ]
+
+class SC_Prior(LatentFormat):
+ latent_channels = 16
+ def __init__(self):
+ self.scale_factor = 1.0
+ self.latent_rgb_factors = [
+ [-0.0326, -0.0204, -0.0127],
+ [-0.1592, -0.0427, 0.0216],
+ [ 0.0873, 0.0638, -0.0020],
+ [-0.0602, 0.0442, 0.1304],
+ [ 0.0800, -0.0313, -0.1796],
+ [-0.0810, -0.0638, -0.1581],
+ [ 0.1791, 0.1180, 0.0967],
+ [ 0.0740, 0.1416, 0.0432],
+ [-0.1745, -0.1888, -0.1373],
+ [ 0.2412, 0.1577, 0.0928],
+ [ 0.1908, 0.0998, 0.0682],
+ [ 0.0209, 0.0365, -0.0092],
+ [ 0.0448, -0.0650, -0.1728],
+ [-0.1658, -0.1045, -0.1308],
+ [ 0.0542, 0.1545, 0.1325],
+ [-0.0352, -0.1672, -0.2541]
+ ]
+
+class SC_B(LatentFormat):
+ def __init__(self):
+ self.scale_factor = 1.0 / 0.43
+ self.latent_rgb_factors = [
+ [ 0.1121, 0.2006, 0.1023],
+ [-0.2093, -0.0222, -0.0195],
+ [-0.3087, -0.1535, 0.0366],
+ [ 0.0290, -0.1574, -0.4078]
+ ]
+
+class SD3(LatentFormat):
+ latent_channels = 16
+ def __init__(self):
+ self.scale_factor = 1.5305
+ self.shift_factor = 0.0609
+ self.latent_rgb_factors = [
+ [-0.0922, -0.0175, 0.0749],
+ [ 0.0311, 0.0633, 0.0954],
+ [ 0.1994, 0.0927, 0.0458],
+ [ 0.0856, 0.0339, 0.0902],
+ [ 0.0587, 0.0272, -0.0496],
+ [-0.0006, 0.1104, 0.0309],
+ [ 0.0978, 0.0306, 0.0427],
+ [-0.0042, 0.1038, 0.1358],
+ [-0.0194, 0.0020, 0.0669],
+ [-0.0488, 0.0130, -0.0268],
+ [ 0.0922, 0.0988, 0.0951],
+ [-0.0278, 0.0524, -0.0542],
+ [ 0.0332, 0.0456, 0.0895],
+ [-0.0069, -0.0030, -0.0810],
+ [-0.0596, -0.0465, -0.0293],
+ [-0.1448, -0.1463, -0.1189]
+ ]
+ self.latent_rgb_factors_bias = [0.2394, 0.2135, 0.1925]
+ self.taesd_decoder_name = "taesd3_decoder"
+
+ def process_in(self, latent):
+ return (latent - self.shift_factor) * self.scale_factor
+
+ def process_out(self, latent):
+ return (latent / self.scale_factor) + self.shift_factor
+
+class StableAudio1(LatentFormat):
+ latent_channels = 64
+ latent_dimensions = 1
+
+class Flux(SD3):
+ latent_channels = 16
+ def __init__(self):
+ self.scale_factor = 0.3611
+ self.shift_factor = 0.1159
+ self.latent_rgb_factors =[
+ [-0.0346, 0.0244, 0.0681],
+ [ 0.0034, 0.0210, 0.0687],
+ [ 0.0275, -0.0668, -0.0433],
+ [-0.0174, 0.0160, 0.0617],
+ [ 0.0859, 0.0721, 0.0329],
+ [ 0.0004, 0.0383, 0.0115],
+ [ 0.0405, 0.0861, 0.0915],
+ [-0.0236, -0.0185, -0.0259],
+ [-0.0245, 0.0250, 0.1180],
+ [ 0.1008, 0.0755, -0.0421],
+ [-0.0515, 0.0201, 0.0011],
+ [ 0.0428, -0.0012, -0.0036],
+ [ 0.0817, 0.0765, 0.0749],
+ [-0.1264, -0.0522, -0.1103],
+ [-0.0280, -0.0881, -0.0499],
+ [-0.1262, -0.0982, -0.0778]
+ ]
+ self.latent_rgb_factors_bias = [-0.0329, -0.0718, -0.0851]
+ self.taesd_decoder_name = "taef1_decoder"
+
+ def process_in(self, latent):
+ return (latent - self.shift_factor) * self.scale_factor
+
+ def process_out(self, latent):
+ return (latent / self.scale_factor) + self.shift_factor
+
+class Mochi(LatentFormat):
+ latent_channels = 12
+ latent_dimensions = 3
+
+ def __init__(self):
+ self.scale_factor = 1.0
+ self.latents_mean = torch.tensor([-0.06730895953510081, -0.038011381506090416, -0.07477820912866141,
+ -0.05565264470995561, 0.012767231469026969, -0.04703542746246419,
+ 0.043896967884726704, -0.09346305707025976, -0.09918314763016893,
+ -0.008729793427399178, -0.011931556316503654, -0.0321993391887285]).view(1, self.latent_channels, 1, 1, 1)
+ self.latents_std = torch.tensor([0.9263795028493863, 0.9248894543193766, 0.9393059390890617,
+ 0.959253732819592, 0.8244560132752793, 0.917259975397747,
+ 0.9294154431013696, 1.3720942357788521, 0.881393668867029,
+ 0.9168315692124348, 0.9185249279345552, 0.9274757570805041]).view(1, self.latent_channels, 1, 1, 1)
+
+ self.latent_rgb_factors =[
+ [-0.0069, -0.0045, 0.0018],
+ [ 0.0154, -0.0692, -0.0274],
+ [ 0.0333, 0.0019, 0.0206],
+ [-0.1390, 0.0628, 0.1678],
+ [-0.0725, 0.0134, -0.1898],
+ [ 0.0074, -0.0270, -0.0209],
+ [-0.0176, -0.0277, -0.0221],
+ [ 0.5294, 0.5204, 0.3852],
+ [-0.0326, -0.0446, -0.0143],
+ [-0.0659, 0.0153, -0.0153],
+ [ 0.0185, -0.0217, 0.0014],
+ [-0.0396, -0.0495, -0.0281]
+ ]
+ self.latent_rgb_factors_bias = [-0.0940, -0.1418, -0.1453]
+ self.taesd_decoder_name = None #TODO
+
+ def process_in(self, latent):
+ latents_mean = self.latents_mean.to(latent.device, latent.dtype)
+ latents_std = self.latents_std.to(latent.device, latent.dtype)
+ return (latent - latents_mean) * self.scale_factor / latents_std
+
+ def process_out(self, latent):
+ latents_mean = self.latents_mean.to(latent.device, latent.dtype)
+ latents_std = self.latents_std.to(latent.device, latent.dtype)
+ return latent * latents_std / self.scale_factor + latents_mean
+
+class LTXV(LatentFormat):
+ latent_channels = 128
+ latent_dimensions = 3
+
+ def __init__(self):
+ self.latent_rgb_factors = [
+ [ 1.1202e-02, -6.3815e-04, -1.0021e-02],
+ [ 8.6031e-02, 6.5813e-02, 9.5409e-04],
+ [-1.2576e-02, -7.5734e-03, -4.0528e-03],
+ [ 9.4063e-03, -2.1688e-03, 2.6093e-03],
+ [ 3.7636e-03, 1.2765e-02, 9.1548e-03],
+ [ 2.1024e-02, -5.2973e-03, 3.4373e-03],
+ [-8.8896e-03, -1.9703e-02, -1.8761e-02],
+ [-1.3160e-02, -1.0523e-02, 1.9709e-03],
+ [-1.5152e-03, -6.9891e-03, -7.5810e-03],
+ [-1.7247e-03, 4.6560e-04, -3.3839e-03],
+ [ 1.3617e-02, 4.7077e-03, -2.0045e-03],
+ [ 1.0256e-02, 7.7318e-03, 1.3948e-02],
+ [-1.6108e-02, -6.2151e-03, 1.1561e-03],
+ [ 7.3407e-03, 1.5628e-02, 4.4865e-04],
+ [ 9.5357e-04, -2.9518e-03, -1.4760e-02],
+ [ 1.9143e-02, 1.0868e-02, 1.2264e-02],
+ [ 4.4575e-03, 3.6682e-05, -6.8508e-03],
+ [-4.5681e-04, 3.2570e-03, 7.7929e-03],
+ [ 3.3902e-02, 3.3405e-02, 3.7454e-02],
+ [-2.3001e-02, -2.4877e-03, -3.1033e-03],
+ [ 5.0265e-02, 3.8841e-02, 3.3539e-02],
+ [-4.1018e-03, -1.1095e-03, 1.5859e-03],
+ [-1.2689e-01, -1.3107e-01, -2.1005e-01],
+ [ 2.6276e-02, 1.4189e-02, -3.5963e-03],
+ [-4.8679e-03, 8.8486e-03, 7.8029e-03],
+ [-1.6610e-03, -4.8597e-03, -5.2060e-03],
+ [-2.1010e-03, 2.3610e-03, 9.3796e-03],
+ [-2.2482e-02, -2.1305e-02, -1.5087e-02],
+ [-1.5753e-02, -1.0646e-02, -6.5083e-03],
+ [-4.6975e-03, 5.0288e-03, -6.7390e-03],
+ [ 1.1951e-02, 2.0712e-02, 1.6191e-02],
+ [-6.3704e-03, -8.4827e-03, -9.5483e-03],
+ [ 7.2610e-03, -9.9326e-03, -2.2978e-02],
+ [-9.1904e-04, 6.2882e-03, 9.5720e-03],
+ [-3.7178e-02, -3.7123e-02, -5.6713e-02],
+ [-1.3373e-01, -1.0720e-01, -5.3801e-02],
+ [-5.3702e-03, 8.1256e-03, 8.8397e-03],
+ [-1.5247e-01, -2.1437e-01, -2.1843e-01],
+ [ 3.1441e-02, 7.0335e-03, -9.7541e-03],
+ [ 2.1528e-03, -8.9817e-03, -2.1023e-02],
+ [ 3.8461e-03, -5.8957e-03, -1.5014e-02],
+ [-4.3470e-03, -1.2940e-02, -1.5972e-02],
+ [-5.4781e-03, -1.0842e-02, -3.0204e-03],
+ [-6.5347e-03, 3.0806e-03, -1.0163e-02],
+ [-5.0414e-03, -7.1503e-03, -8.9686e-04],
+ [-8.5851e-03, -2.4351e-03, 1.0674e-03],
+ [-9.0016e-03, -9.6493e-03, 1.5692e-03],
+ [ 5.0914e-03, 1.2099e-02, 1.9968e-02],
+ [ 1.3758e-02, 1.1669e-02, 8.1958e-03],
+ [-1.0518e-02, -1.1575e-02, -4.1307e-03],
+ [-2.8410e-02, -3.1266e-02, -2.2149e-02],
+ [ 2.9336e-03, 3.6511e-02, 1.8717e-02],
+ [-1.6703e-02, -1.6696e-02, -4.4529e-03],
+ [ 4.8818e-02, 4.0063e-02, 8.7410e-03],
+ [-1.5066e-02, -5.7328e-04, 2.9785e-03],
+ [-1.7613e-02, -8.1034e-03, 1.3086e-02],
+ [-9.2633e-03, 1.0803e-02, -6.3489e-03],
+ [ 3.0851e-03, 4.7750e-04, 1.2347e-02],
+ [-2.2785e-02, -2.3043e-02, -2.6005e-02],
+ [-2.4787e-02, -1.5389e-02, -2.2104e-02],
+ [-2.3572e-02, 1.0544e-03, 1.2361e-02],
+ [-7.8915e-03, -1.2271e-03, -6.0968e-03],
+ [-1.1478e-02, -1.2543e-03, 6.2679e-03],
+ [-5.4229e-02, 2.6644e-02, 6.3394e-03],
+ [ 4.4216e-03, -7.3338e-03, -1.0464e-02],
+ [-4.5013e-03, 1.6082e-03, 1.4420e-02],
+ [ 1.3673e-02, 8.8877e-03, 4.1253e-03],
+ [-1.0145e-02, 9.0072e-03, 1.5695e-02],
+ [-5.6234e-03, 1.1847e-03, 8.1261e-03],
+ [-3.7171e-03, -5.3538e-03, 1.2590e-03],
+ [ 2.9476e-02, 2.1424e-02, 3.0424e-02],
+ [-3.4925e-02, -2.4340e-02, -2.5316e-02],
+ [-3.4127e-02, -2.2406e-02, -1.0589e-02],
+ [-1.7342e-02, -1.3249e-02, -1.0719e-02],
+ [-2.1478e-03, -8.6051e-03, -2.9878e-03],
+ [ 1.2089e-03, -4.2391e-03, -6.8569e-03],
+ [ 9.0411e-04, -6.6886e-03, -6.7547e-05],
+ [ 1.6048e-02, -1.0057e-02, -2.8929e-02],
+ [ 1.2290e-03, 1.0163e-02, 1.8861e-02],
+ [ 1.7264e-02, 2.7257e-04, 1.3785e-02],
+ [-1.3482e-02, -3.6427e-03, 6.7481e-04],
+ [ 4.6782e-03, -5.2423e-03, 2.4467e-03],
+ [-5.9113e-03, -6.2244e-03, -1.8162e-03],
+ [ 1.5496e-02, 1.4582e-02, 1.9514e-03],
+ [ 7.4958e-03, 1.5886e-03, -8.2305e-03],
+ [ 1.9086e-02, 1.6360e-03, -3.9674e-03],
+ [-5.7021e-03, -2.7307e-03, -4.1066e-03],
+ [ 1.7450e-03, 1.4602e-02, 2.5794e-02],
+ [-8.2788e-04, 2.2902e-03, 4.5161e-03],
+ [ 1.1632e-02, 8.9193e-03, -7.2813e-03],
+ [ 7.5721e-03, 2.6784e-03, 1.1393e-02],
+ [ 5.1939e-03, 3.6903e-03, 1.4049e-02],
+ [-1.8383e-02, -2.2529e-02, -2.4477e-02],
+ [ 5.8842e-04, -5.7874e-03, -1.4770e-02],
+ [-1.6125e-02, -8.6101e-03, -1.4533e-02],
+ [ 2.0540e-02, 2.0729e-02, 6.4338e-03],
+ [ 3.3587e-03, -1.1226e-02, -1.6444e-02],
+ [-1.4742e-03, -1.0489e-02, 1.7097e-03],
+ [ 2.8130e-02, 2.3546e-02, 3.2791e-02],
+ [-1.8532e-02, -1.2842e-02, -8.7756e-03],
+ [-8.0533e-03, -1.0771e-02, -1.7536e-02],
+ [-3.9009e-03, 1.6150e-02, 3.3359e-02],
+ [-7.4554e-03, -1.4154e-02, -6.1910e-03],
+ [ 3.4734e-03, -1.1370e-02, -1.0581e-02],
+ [ 1.1476e-02, 3.9281e-03, 2.8231e-03],
+ [ 7.1639e-03, -1.4741e-03, -3.8066e-03],
+ [ 2.2250e-03, -8.7552e-03, -9.5719e-03],
+ [ 2.4146e-02, 2.1696e-02, 2.8056e-02],
+ [-5.4365e-03, -2.4291e-02, -1.7802e-02],
+ [ 7.4263e-03, 1.0510e-02, 1.2705e-02],
+ [ 6.2669e-03, 6.2658e-03, 1.9211e-02],
+ [ 1.6378e-02, 9.4933e-03, 6.6971e-03],
+ [ 1.7173e-02, 2.3601e-02, 2.3296e-02],
+ [-1.4568e-02, -9.8279e-03, -1.1556e-02],
+ [ 1.4431e-02, 1.4430e-02, 6.6362e-03],
+ [-6.8230e-03, 1.8863e-02, 1.4555e-02],
+ [ 6.1156e-03, 3.4700e-03, -2.6662e-03],
+ [-2.6983e-03, -5.9402e-03, -9.2276e-03],
+ [ 1.0235e-02, 7.4173e-03, -7.6243e-03],
+ [-1.3255e-02, 1.9322e-02, -9.2153e-04],
+ [ 2.4222e-03, -4.8039e-03, -1.5759e-02],
+ [ 2.6244e-02, 2.5951e-02, 2.0249e-02],
+ [ 1.5711e-02, 1.8498e-02, 2.7407e-03],
+ [-2.1714e-03, 4.7214e-03, -2.2443e-02],
+ [-7.4747e-03, 7.4166e-03, 1.4430e-02],
+ [-8.3906e-03, -7.9776e-03, 9.7927e-03],
+ [ 3.8321e-02, 9.6622e-03, -1.9268e-02],
+ [-1.4605e-02, -6.7032e-03, 3.9675e-03]
+ ]
+
+ self.latent_rgb_factors_bias = [-0.0571, -0.1657, -0.2512]
+
+class HunyuanVideo(LatentFormat):
+ latent_channels = 16
+ latent_dimensions = 3
+ scale_factor = 0.476986
+ latent_rgb_factors = [
+ [-0.0395, -0.0331, 0.0445],
+ [ 0.0696, 0.0795, 0.0518],
+ [ 0.0135, -0.0945, -0.0282],
+ [ 0.0108, -0.0250, -0.0765],
+ [-0.0209, 0.0032, 0.0224],
+ [-0.0804, -0.0254, -0.0639],
+ [-0.0991, 0.0271, -0.0669],
+ [-0.0646, -0.0422, -0.0400],
+ [-0.0696, -0.0595, -0.0894],
+ [-0.0799, -0.0208, -0.0375],
+ [ 0.1166, 0.1627, 0.0962],
+ [ 0.1165, 0.0432, 0.0407],
+ [-0.2315, -0.1920, -0.1355],
+ [-0.0270, 0.0401, -0.0821],
+ [-0.0616, -0.0997, -0.0727],
+ [ 0.0249, -0.0469, -0.1703]
+ ]
+
+ latent_rgb_factors_bias = [ 0.0259, -0.0192, -0.0761]
+
+class Cosmos1CV8x8x8(LatentFormat):
+ latent_channels = 16
+ latent_dimensions = 3
+
+ latent_rgb_factors = [
+ [ 0.1817, 0.2284, 0.2423],
+ [-0.0586, -0.0862, -0.3108],
+ [-0.4703, -0.4255, -0.3995],
+ [ 0.0803, 0.1963, 0.1001],
+ [-0.0820, -0.1050, 0.0400],
+ [ 0.2511, 0.3098, 0.2787],
+ [-0.1830, -0.2117, -0.0040],
+ [-0.0621, -0.2187, -0.0939],
+ [ 0.3619, 0.1082, 0.1455],
+ [ 0.3164, 0.3922, 0.2575],
+ [ 0.1152, 0.0231, -0.0462],
+ [-0.1434, -0.3609, -0.3665],
+ [ 0.0635, 0.1471, 0.1680],
+ [-0.3635, -0.1963, -0.3248],
+ [-0.1865, 0.0365, 0.2346],
+ [ 0.0447, 0.0994, 0.0881]
+ ]
+
+ latent_rgb_factors_bias = [-0.1223, -0.1889, -0.1976]
+
+class Wan21(LatentFormat):
+ latent_channels = 16
+ latent_dimensions = 3
+
+ latent_rgb_factors = [
+ [-0.1299, -0.1692, 0.2932],
+ [ 0.0671, 0.0406, 0.0442],
+ [ 0.3568, 0.2548, 0.1747],
+ [ 0.0372, 0.2344, 0.1420],
+ [ 0.0313, 0.0189, -0.0328],
+ [ 0.0296, -0.0956, -0.0665],
+ [-0.3477, -0.4059, -0.2925],
+ [ 0.0166, 0.1902, 0.1975],
+ [-0.0412, 0.0267, -0.1364],
+ [-0.1293, 0.0740, 0.1636],
+ [ 0.0680, 0.3019, 0.1128],
+ [ 0.0032, 0.0581, 0.0639],
+ [-0.1251, 0.0927, 0.1699],
+ [ 0.0060, -0.0633, 0.0005],
+ [ 0.3477, 0.2275, 0.2950],
+ [ 0.1984, 0.0913, 0.1861]
+ ]
+
+ latent_rgb_factors_bias = [-0.1835, -0.0868, -0.3360]
+
+ def __init__(self):
+ self.scale_factor = 1.0
+ self.latents_mean = torch.tensor([
+ -0.7571, -0.7089, -0.9113, 0.1075, -0.1745, 0.9653, -0.1517, 1.5508,
+ 0.4134, -0.0715, 0.5517, -0.3632, -0.1922, -0.9497, 0.2503, -0.2921
+ ]).view(1, self.latent_channels, 1, 1, 1)
+ self.latents_std = torch.tensor([
+ 2.8184, 1.4541, 2.3275, 2.6558, 1.2196, 1.7708, 2.6052, 2.0743,
+ 3.2687, 2.1526, 2.8652, 1.5579, 1.6382, 1.1253, 2.8251, 1.9160
+ ]).view(1, self.latent_channels, 1, 1, 1)
+
+
+ self.taesd_decoder_name = None #TODO
+
+ def process_in(self, latent):
+ latents_mean = self.latents_mean.to(latent.device, latent.dtype)
+ latents_std = self.latents_std.to(latent.device, latent.dtype)
+ return (latent - latents_mean) * self.scale_factor / latents_std
+
+ def process_out(self, latent):
+ latents_mean = self.latents_mean.to(latent.device, latent.dtype)
+ latents_std = self.latents_std.to(latent.device, latent.dtype)
+ return latent * latents_std / self.scale_factor + latents_mean
+
+class Wan22(Wan21):
+ latent_channels = 48
+ latent_dimensions = 3
+
+ latent_rgb_factors = [
+ [ 0.0119, 0.0103, 0.0046],
+ [-0.1062, -0.0504, 0.0165],
+ [ 0.0140, 0.0409, 0.0491],
+ [-0.0813, -0.0677, 0.0607],
+ [ 0.0656, 0.0851, 0.0808],
+ [ 0.0264, 0.0463, 0.0912],
+ [ 0.0295, 0.0326, 0.0590],
+ [-0.0244, -0.0270, 0.0025],
+ [ 0.0443, -0.0102, 0.0288],
+ [-0.0465, -0.0090, -0.0205],
+ [ 0.0359, 0.0236, 0.0082],
+ [-0.0776, 0.0854, 0.1048],
+ [ 0.0564, 0.0264, 0.0561],
+ [ 0.0006, 0.0594, 0.0418],
+ [-0.0319, -0.0542, -0.0637],
+ [-0.0268, 0.0024, 0.0260],
+ [ 0.0539, 0.0265, 0.0358],
+ [-0.0359, -0.0312, -0.0287],
+ [-0.0285, -0.1032, -0.1237],
+ [ 0.1041, 0.0537, 0.0622],
+ [-0.0086, -0.0374, -0.0051],
+ [ 0.0390, 0.0670, 0.2863],
+ [ 0.0069, 0.0144, 0.0082],
+ [ 0.0006, -0.0167, 0.0079],
+ [ 0.0313, -0.0574, -0.0232],
+ [-0.1454, -0.0902, -0.0481],
+ [ 0.0714, 0.0827, 0.0447],
+ [-0.0304, -0.0574, -0.0196],
+ [ 0.0401, 0.0384, 0.0204],
+ [-0.0758, -0.0297, -0.0014],
+ [ 0.0568, 0.1307, 0.1372],
+ [-0.0055, -0.0310, -0.0380],
+ [ 0.0239, -0.0305, 0.0325],
+ [-0.0663, -0.0673, -0.0140],
+ [-0.0416, -0.0047, -0.0023],
+ [ 0.0166, 0.0112, -0.0093],
+ [-0.0211, 0.0011, 0.0331],
+ [ 0.1833, 0.1466, 0.2250],
+ [-0.0368, 0.0370, 0.0295],
+ [-0.3441, -0.3543, -0.2008],
+ [-0.0479, -0.0489, -0.0420],
+ [-0.0660, -0.0153, 0.0800],
+ [-0.0101, 0.0068, 0.0156],
+ [-0.0690, -0.0452, -0.0927],
+ [-0.0145, 0.0041, 0.0015],
+ [ 0.0421, 0.0451, 0.0373],
+ [ 0.0504, -0.0483, -0.0356],
+ [-0.0837, 0.0168, 0.0055]
+ ]
+
+ latent_rgb_factors_bias = [0.0317, -0.0878, -0.1388]
+
+ def __init__(self):
+ self.scale_factor = 1.0
+ self.latents_mean = torch.tensor([
+ -0.2289, -0.0052, -0.1323, -0.2339, -0.2799, 0.0174, 0.1838, 0.1557,
+ -0.1382, 0.0542, 0.2813, 0.0891, 0.1570, -0.0098, 0.0375, -0.1825,
+ -0.2246, -0.1207, -0.0698, 0.5109, 0.2665, -0.2108, -0.2158, 0.2502,
+ -0.2055, -0.0322, 0.1109, 0.1567, -0.0729, 0.0899, -0.2799, -0.1230,
+ -0.0313, -0.1649, 0.0117, 0.0723, -0.2839, -0.2083, -0.0520, 0.3748,
+ 0.0152, 0.1957, 0.1433, -0.2944, 0.3573, -0.0548, -0.1681, -0.0667,
+ ]).view(1, self.latent_channels, 1, 1, 1)
+ self.latents_std = torch.tensor([
+ 0.4765, 1.0364, 0.4514, 1.1677, 0.5313, 0.4990, 0.4818, 0.5013,
+ 0.8158, 1.0344, 0.5894, 1.0901, 0.6885, 0.6165, 0.8454, 0.4978,
+ 0.5759, 0.3523, 0.7135, 0.6804, 0.5833, 1.4146, 0.8986, 0.5659,
+ 0.7069, 0.5338, 0.4889, 0.4917, 0.4069, 0.4999, 0.6866, 0.4093,
+ 0.5709, 0.6065, 0.6415, 0.4944, 0.5726, 1.2042, 0.5458, 1.6887,
+ 0.3971, 1.0600, 0.3943, 0.5537, 0.5444, 0.4089, 0.7468, 0.7744
+ ]).view(1, self.latent_channels, 1, 1, 1)
+
+class Hunyuan3Dv2(LatentFormat):
+ latent_channels = 64
+ latent_dimensions = 1
+ scale_factor = 0.9990943042622529
+
+class Hunyuan3Dv2mini(LatentFormat):
+ latent_channels = 64
+ latent_dimensions = 1
+ scale_factor = 1.0188137142395404
+
+class ACEAudio(LatentFormat):
+ latent_channels = 8
+ latent_dimensions = 2
diff --git a/ComfyUI/comfy/lora.py b/ComfyUI/comfy/lora.py
new file mode 100644
index 0000000000000000000000000000000000000000..387d5c52aef161ce4f00c27c39795351319ead21
--- /dev/null
+++ b/ComfyUI/comfy/lora.py
@@ -0,0 +1,395 @@
+"""
+ This file is part of ComfyUI.
+ Copyright (C) 2024 Comfy
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+"""
+
+from __future__ import annotations
+import comfy.utils
+import comfy.model_management
+import comfy.model_base
+import comfy.weight_adapter as weight_adapter
+import logging
+import torch
+
+LORA_CLIP_MAP = {
+ "mlp.fc1": "mlp_fc1",
+ "mlp.fc2": "mlp_fc2",
+ "self_attn.k_proj": "self_attn_k_proj",
+ "self_attn.q_proj": "self_attn_q_proj",
+ "self_attn.v_proj": "self_attn_v_proj",
+ "self_attn.out_proj": "self_attn_out_proj",
+}
+
+
+def load_lora(lora, to_load, log_missing=True):
+ patch_dict = {}
+ loaded_keys = set()
+ for x in to_load:
+ alpha_name = "{}.alpha".format(x)
+ alpha = None
+ if alpha_name in lora.keys():
+ alpha = lora[alpha_name].item()
+ loaded_keys.add(alpha_name)
+
+ dora_scale_name = "{}.dora_scale".format(x)
+ dora_scale = None
+ if dora_scale_name in lora.keys():
+ dora_scale = lora[dora_scale_name]
+ loaded_keys.add(dora_scale_name)
+
+ for adapter_cls in weight_adapter.adapters:
+ adapter = adapter_cls.load(x, lora, alpha, dora_scale, loaded_keys)
+ if adapter is not None:
+ patch_dict[to_load[x]] = adapter
+ loaded_keys.update(adapter.loaded_keys)
+ continue
+
+ w_norm_name = "{}.w_norm".format(x)
+ b_norm_name = "{}.b_norm".format(x)
+ w_norm = lora.get(w_norm_name, None)
+ b_norm = lora.get(b_norm_name, None)
+
+ if w_norm is not None:
+ loaded_keys.add(w_norm_name)
+ patch_dict[to_load[x]] = ("diff", (w_norm,))
+ if b_norm is not None:
+ loaded_keys.add(b_norm_name)
+ patch_dict["{}.bias".format(to_load[x][:-len(".weight")])] = ("diff", (b_norm,))
+
+ diff_name = "{}.diff".format(x)
+ diff_weight = lora.get(diff_name, None)
+ if diff_weight is not None:
+ patch_dict[to_load[x]] = ("diff", (diff_weight,))
+ loaded_keys.add(diff_name)
+
+ diff_bias_name = "{}.diff_b".format(x)
+ diff_bias = lora.get(diff_bias_name, None)
+ if diff_bias is not None:
+ patch_dict["{}.bias".format(to_load[x][:-len(".weight")])] = ("diff", (diff_bias,))
+ loaded_keys.add(diff_bias_name)
+
+ set_weight_name = "{}.set_weight".format(x)
+ set_weight = lora.get(set_weight_name, None)
+ if set_weight is not None:
+ patch_dict[to_load[x]] = ("set", (set_weight,))
+ loaded_keys.add(set_weight_name)
+
+ if log_missing:
+ for x in lora.keys():
+ if x not in loaded_keys:
+ logging.warning("lora key not loaded: {}".format(x))
+
+ return patch_dict
+
+def model_lora_keys_clip(model, key_map={}):
+ sdk = model.state_dict().keys()
+ for k in sdk:
+ if k.endswith(".weight"):
+ key_map["text_encoders.{}".format(k[:-len(".weight")])] = k #generic lora format without any weird key names
+
+ text_model_lora_key = "lora_te_text_model_encoder_layers_{}_{}"
+ clip_l_present = False
+ clip_g_present = False
+ for b in range(32): #TODO: clean up
+ for c in LORA_CLIP_MAP:
+ k = "clip_h.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c)
+ if k in sdk:
+ lora_key = text_model_lora_key.format(b, LORA_CLIP_MAP[c])
+ key_map[lora_key] = k
+ lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c])
+ key_map[lora_key] = k
+ lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
+ key_map[lora_key] = k
+
+ k = "clip_l.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c)
+ if k in sdk:
+ lora_key = text_model_lora_key.format(b, LORA_CLIP_MAP[c])
+ key_map[lora_key] = k
+ lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base
+ key_map[lora_key] = k
+ clip_l_present = True
+ lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
+ key_map[lora_key] = k
+
+ k = "clip_g.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c)
+ if k in sdk:
+ clip_g_present = True
+ if clip_l_present:
+ lora_key = "lora_te2_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base
+ key_map[lora_key] = k
+ lora_key = "text_encoder_2.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
+ key_map[lora_key] = k
+ else:
+ lora_key = "lora_te_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #TODO: test if this is correct for SDXL-Refiner
+ key_map[lora_key] = k
+ lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
+ key_map[lora_key] = k
+ lora_key = "lora_prior_te_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #cascade lora: TODO put lora key prefix in the model config
+ key_map[lora_key] = k
+
+ for k in sdk:
+ if k.endswith(".weight"):
+ if k.startswith("t5xxl.transformer."):#OneTrainer SD3 and Flux lora
+ l_key = k[len("t5xxl.transformer."):-len(".weight")]
+ t5_index = 1
+ if clip_g_present:
+ t5_index += 1
+ if clip_l_present:
+ t5_index += 1
+ if t5_index == 2:
+ key_map["lora_te{}_{}".format(t5_index, l_key.replace(".", "_"))] = k #OneTrainer Flux
+ t5_index += 1
+
+ key_map["lora_te{}_{}".format(t5_index, l_key.replace(".", "_"))] = k
+ elif k.startswith("hydit_clip.transformer.bert."): #HunyuanDiT Lora
+ l_key = k[len("hydit_clip.transformer.bert."):-len(".weight")]
+ lora_key = "lora_te1_{}".format(l_key.replace(".", "_"))
+ key_map[lora_key] = k
+
+
+ k = "clip_g.transformer.text_projection.weight"
+ if k in sdk:
+ key_map["lora_prior_te_text_projection"] = k #cascade lora?
+ # key_map["text_encoder.text_projection"] = k #TODO: check if other lora have the text_projection too
+ key_map["lora_te2_text_projection"] = k #OneTrainer SD3 lora
+
+ k = "clip_l.transformer.text_projection.weight"
+ if k in sdk:
+ key_map["lora_te1_text_projection"] = k #OneTrainer SD3 lora, not necessary but omits warning
+
+ return key_map
+
+def model_lora_keys_unet(model, key_map={}):
+ sd = model.state_dict()
+ sdk = sd.keys()
+
+ for k in sdk:
+ if k.startswith("diffusion_model."):
+ if k.endswith(".weight"):
+ key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_")
+ key_map["lora_unet_{}".format(key_lora)] = k
+ key_map["{}".format(k[:-len(".weight")])] = k #generic lora format without any weird key names
+ else:
+ key_map["{}".format(k)] = k #generic lora format for not .weight without any weird key names
+
+ diffusers_keys = comfy.utils.unet_to_diffusers(model.model_config.unet_config)
+ for k in diffusers_keys:
+ if k.endswith(".weight"):
+ unet_key = "diffusion_model.{}".format(diffusers_keys[k])
+ key_lora = k[:-len(".weight")].replace(".", "_")
+ key_map["lora_unet_{}".format(key_lora)] = unet_key
+ key_map["lycoris_{}".format(key_lora)] = unet_key #simpletuner lycoris format
+
+ diffusers_lora_prefix = ["", "unet."]
+ for p in diffusers_lora_prefix:
+ diffusers_lora_key = "{}{}".format(p, k[:-len(".weight")].replace(".to_", ".processor.to_"))
+ if diffusers_lora_key.endswith(".to_out.0"):
+ diffusers_lora_key = diffusers_lora_key[:-2]
+ key_map[diffusers_lora_key] = unet_key
+
+ if isinstance(model, comfy.model_base.StableCascade_C):
+ for k in sdk:
+ if k.startswith("diffusion_model."):
+ if k.endswith(".weight"):
+ key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_")
+ key_map["lora_prior_unet_{}".format(key_lora)] = k
+
+ if isinstance(model, comfy.model_base.SD3): #Diffusers lora SD3
+ diffusers_keys = comfy.utils.mmdit_to_diffusers(model.model_config.unet_config, output_prefix="diffusion_model.")
+ for k in diffusers_keys:
+ if k.endswith(".weight"):
+ to = diffusers_keys[k]
+ key_lora = "transformer.{}".format(k[:-len(".weight")]) #regular diffusers sd3 lora format
+ key_map[key_lora] = to
+
+ key_lora = "base_model.model.{}".format(k[:-len(".weight")]) #format for flash-sd3 lora and others?
+ key_map[key_lora] = to
+
+ key_lora = "lora_transformer_{}".format(k[:-len(".weight")].replace(".", "_")) #OneTrainer lora
+ key_map[key_lora] = to
+
+ key_lora = "lycoris_{}".format(k[:-len(".weight")].replace(".", "_")) #simpletuner lycoris format
+ key_map[key_lora] = to
+
+ if isinstance(model, comfy.model_base.AuraFlow): #Diffusers lora AuraFlow
+ diffusers_keys = comfy.utils.auraflow_to_diffusers(model.model_config.unet_config, output_prefix="diffusion_model.")
+ for k in diffusers_keys:
+ if k.endswith(".weight"):
+ to = diffusers_keys[k]
+ key_lora = "transformer.{}".format(k[:-len(".weight")]) #simpletrainer and probably regular diffusers lora format
+ key_map[key_lora] = to
+
+ if isinstance(model, comfy.model_base.PixArt):
+ diffusers_keys = comfy.utils.pixart_to_diffusers(model.model_config.unet_config, output_prefix="diffusion_model.")
+ for k in diffusers_keys:
+ if k.endswith(".weight"):
+ to = diffusers_keys[k]
+ key_lora = "transformer.{}".format(k[:-len(".weight")]) #default format
+ key_map[key_lora] = to
+
+ key_lora = "base_model.model.{}".format(k[:-len(".weight")]) #diffusers training script
+ key_map[key_lora] = to
+
+ key_lora = "unet.base_model.model.{}".format(k[:-len(".weight")]) #old reference peft script
+ key_map[key_lora] = to
+
+ if isinstance(model, comfy.model_base.HunyuanDiT):
+ for k in sdk:
+ if k.startswith("diffusion_model.") and k.endswith(".weight"):
+ key_lora = k[len("diffusion_model."):-len(".weight")]
+ key_map["base_model.model.{}".format(key_lora)] = k #official hunyuan lora format
+
+ if isinstance(model, comfy.model_base.Flux): #Diffusers lora Flux
+ diffusers_keys = comfy.utils.flux_to_diffusers(model.model_config.unet_config, output_prefix="diffusion_model.")
+ for k in diffusers_keys:
+ if k.endswith(".weight"):
+ to = diffusers_keys[k]
+ key_map["transformer.{}".format(k[:-len(".weight")])] = to #simpletrainer and probably regular diffusers flux lora format
+ key_map["lycoris_{}".format(k[:-len(".weight")].replace(".", "_"))] = to #simpletrainer lycoris
+ key_map["lora_transformer_{}".format(k[:-len(".weight")].replace(".", "_"))] = to #onetrainer
+
+ if isinstance(model, comfy.model_base.GenmoMochi):
+ for k in sdk:
+ if k.startswith("diffusion_model.") and k.endswith(".weight"): #Official Mochi lora format
+ key_lora = k[len("diffusion_model."):-len(".weight")]
+ key_map["{}".format(key_lora)] = k
+
+ if isinstance(model, comfy.model_base.HunyuanVideo):
+ for k in sdk:
+ if k.startswith("diffusion_model.") and k.endswith(".weight"):
+ # diffusion-pipe lora format
+ key_lora = k
+ key_lora = key_lora.replace("_mod.lin.", "_mod.linear.").replace("_attn.qkv.", "_attn_qkv.").replace("_attn.proj.", "_attn_proj.")
+ key_lora = key_lora.replace("mlp.0.", "mlp.fc1.").replace("mlp.2.", "mlp.fc2.")
+ key_lora = key_lora.replace(".modulation.lin.", ".modulation.linear.")
+ key_lora = key_lora[len("diffusion_model."):-len(".weight")]
+ key_map["transformer.{}".format(key_lora)] = k
+ key_map["diffusion_model.{}".format(key_lora)] = k # Old loras
+
+ if isinstance(model, comfy.model_base.HiDream):
+ for k in sdk:
+ if k.startswith("diffusion_model."):
+ if k.endswith(".weight"):
+ key_lora = k[len("diffusion_model."):-len(".weight")]
+ key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k #SimpleTuner lycoris format
+ key_map["transformer.{}".format(key_lora)] = k #SimpleTuner regular format
+
+ if isinstance(model, comfy.model_base.ACEStep):
+ for k in sdk:
+ if k.startswith("diffusion_model.") and k.endswith(".weight"): #Official ACE step lora format
+ key_lora = k[len("diffusion_model."):-len(".weight")]
+ key_map["{}".format(key_lora)] = k
+
+ return key_map
+
+
+def pad_tensor_to_shape(tensor: torch.Tensor, new_shape: list[int]) -> torch.Tensor:
+ """
+ Pad a tensor to a new shape with zeros.
+
+ Args:
+ tensor (torch.Tensor): The original tensor to be padded.
+ new_shape (List[int]): The desired shape of the padded tensor.
+
+ Returns:
+ torch.Tensor: A new tensor padded with zeros to the specified shape.
+
+ Note:
+ If the new shape is smaller than the original tensor in any dimension,
+ the original tensor will be truncated in that dimension.
+ """
+ if any([new_shape[i] < tensor.shape[i] for i in range(len(new_shape))]):
+ raise ValueError("The new shape must be larger than the original tensor in all dimensions")
+
+ if len(new_shape) != len(tensor.shape):
+ raise ValueError("The new shape must have the same number of dimensions as the original tensor")
+
+ # Create a new tensor filled with zeros
+ padded_tensor = torch.zeros(new_shape, dtype=tensor.dtype, device=tensor.device)
+
+ # Create slicing tuples for both tensors
+ orig_slices = tuple(slice(0, dim) for dim in tensor.shape)
+ new_slices = tuple(slice(0, dim) for dim in tensor.shape)
+
+ # Copy the original tensor into the new tensor
+ padded_tensor[new_slices] = tensor[orig_slices]
+
+ return padded_tensor
+
+def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, original_weights=None):
+ for p in patches:
+ strength = p[0]
+ v = p[1]
+ strength_model = p[2]
+ offset = p[3]
+ function = p[4]
+ if function is None:
+ function = lambda a: a
+
+ old_weight = None
+ if offset is not None:
+ old_weight = weight
+ weight = weight.narrow(offset[0], offset[1], offset[2])
+
+ if strength_model != 1.0:
+ weight *= strength_model
+
+ if isinstance(v, list):
+ v = (calculate_weight(v[1:], v[0][1](comfy.model_management.cast_to_device(v[0][0], weight.device, intermediate_dtype, copy=True), inplace=True), key, intermediate_dtype=intermediate_dtype), )
+
+ if isinstance(v, weight_adapter.WeightAdapterBase):
+ output = v.calculate_weight(weight, key, strength, strength_model, offset, function, intermediate_dtype, original_weights)
+ if output is None:
+ logging.warning("Calculate Weight Failed: {} {}".format(v.name, key))
+ else:
+ weight = output
+ if old_weight is not None:
+ weight = old_weight
+ continue
+
+ if len(v) == 1:
+ patch_type = "diff"
+ elif len(v) == 2:
+ patch_type = v[0]
+ v = v[1]
+
+ if patch_type == "diff":
+ diff: torch.Tensor = v[0]
+ # An extra flag to pad the weight if the diff's shape is larger than the weight
+ do_pad_weight = len(v) > 1 and v[1]['pad_weight']
+ if do_pad_weight and diff.shape != weight.shape:
+ logging.info("Pad weight {} from {} to shape: {}".format(key, weight.shape, diff.shape))
+ weight = pad_tensor_to_shape(weight, diff.shape)
+
+ if strength != 0.0:
+ if diff.shape != weight.shape:
+ logging.warning("WARNING SHAPE MISMATCH {} WEIGHT NOT MERGED {} != {}".format(key, diff.shape, weight.shape))
+ else:
+ weight += function(strength * comfy.model_management.cast_to_device(diff, weight.device, weight.dtype))
+ elif patch_type == "set":
+ weight.copy_(v[0])
+ elif patch_type == "model_as_lora":
+ target_weight: torch.Tensor = v[0]
+ diff_weight = comfy.model_management.cast_to_device(target_weight, weight.device, intermediate_dtype) - \
+ comfy.model_management.cast_to_device(original_weights[key][0][0], weight.device, intermediate_dtype)
+ weight += function(strength * comfy.model_management.cast_to_device(diff_weight, weight.device, weight.dtype))
+ else:
+ logging.warning("patch type not recognized {} {}".format(patch_type, key))
+
+ if old_weight is not None:
+ weight = old_weight
+
+ return weight
diff --git a/ComfyUI/comfy/lora_convert.py b/ComfyUI/comfy/lora_convert.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e00b63db94969e813b4a9244dce2b1c9d046a8d
--- /dev/null
+++ b/ComfyUI/comfy/lora_convert.py
@@ -0,0 +1,24 @@
+import torch
+import comfy.utils
+
+
+def convert_lora_bfl_control(sd): #BFL loras for Flux
+ sd_out = {}
+ for k in sd:
+ k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.scale.set_weight"))
+ sd_out[k_to] = sd[k]
+
+ sd_out["diffusion_model.img_in.reshape_weight"] = torch.tensor([sd["img_in.lora_B.weight"].shape[0], sd["img_in.lora_A.weight"].shape[1]])
+ return sd_out
+
+
+def convert_lora_wan_fun(sd): #Wan Fun loras
+ return comfy.utils.state_dict_prefix_replace(sd, {"lora_unet__": "lora_unet_"})
+
+
+def convert_lora(sd):
+ if "img_in.lora_A.weight" in sd and "single_blocks.0.norm.key_norm.scale" in sd:
+ return convert_lora_bfl_control(sd)
+ if "lora_unet__blocks_0_cross_attn_k.lora_down.weight" in sd:
+ return convert_lora_wan_fun(sd)
+ return sd
diff --git a/ComfyUI/comfy/model_base.py b/ComfyUI/comfy/model_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b7978949f6d4da833ea20a848a7bbc0395d237f
--- /dev/null
+++ b/ComfyUI/comfy/model_base.py
@@ -0,0 +1,1305 @@
+"""
+ This file is part of ComfyUI.
+ Copyright (C) 2024 Comfy
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+"""
+
+import torch
+import logging
+from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel, Timestep
+from comfy.ldm.cascade.stage_c import StageC
+from comfy.ldm.cascade.stage_b import StageB
+from comfy.ldm.modules.encoders.noise_aug_modules import CLIPEmbeddingNoiseAugmentation
+from comfy.ldm.modules.diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation
+from comfy.ldm.modules.diffusionmodules.mmdit import OpenAISignatureMMDITWrapper
+import comfy.ldm.genmo.joint_model.asymm_models_joint
+import comfy.ldm.aura.mmdit
+import comfy.ldm.pixart.pixartms
+import comfy.ldm.hydit.models
+import comfy.ldm.audio.dit
+import comfy.ldm.audio.embedders
+import comfy.ldm.flux.model
+import comfy.ldm.lightricks.model
+import comfy.ldm.hunyuan_video.model
+import comfy.ldm.cosmos.model
+import comfy.ldm.cosmos.predict2
+import comfy.ldm.lumina.model
+import comfy.ldm.wan.model
+import comfy.ldm.hunyuan3d.model
+import comfy.ldm.hidream.model
+import comfy.ldm.chroma.model
+import comfy.ldm.ace.model
+import comfy.ldm.omnigen.omnigen2
+
+import comfy.model_management
+import comfy.patcher_extension
+import comfy.conds
+import comfy.ops
+from enum import Enum
+from . import utils
+import comfy.latent_formats
+import comfy.model_sampling
+import math
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+ from comfy.model_patcher import ModelPatcher
+
+class ModelType(Enum):
+ EPS = 1
+ V_PREDICTION = 2
+ V_PREDICTION_EDM = 3
+ STABLE_CASCADE = 4
+ EDM = 5
+ FLOW = 6
+ V_PREDICTION_CONTINUOUS = 7
+ FLUX = 8
+ IMG_TO_IMG = 9
+ FLOW_COSMOS = 10
+
+
+def model_sampling(model_config, model_type):
+ s = comfy.model_sampling.ModelSamplingDiscrete
+
+ if model_type == ModelType.EPS:
+ c = comfy.model_sampling.EPS
+ elif model_type == ModelType.V_PREDICTION:
+ c = comfy.model_sampling.V_PREDICTION
+ elif model_type == ModelType.V_PREDICTION_EDM:
+ c = comfy.model_sampling.V_PREDICTION
+ s = comfy.model_sampling.ModelSamplingContinuousEDM
+ elif model_type == ModelType.FLOW:
+ c = comfy.model_sampling.CONST
+ s = comfy.model_sampling.ModelSamplingDiscreteFlow
+ elif model_type == ModelType.STABLE_CASCADE:
+ c = comfy.model_sampling.EPS
+ s = comfy.model_sampling.StableCascadeSampling
+ elif model_type == ModelType.EDM:
+ c = comfy.model_sampling.EDM
+ s = comfy.model_sampling.ModelSamplingContinuousEDM
+ elif model_type == ModelType.V_PREDICTION_CONTINUOUS:
+ c = comfy.model_sampling.V_PREDICTION
+ s = comfy.model_sampling.ModelSamplingContinuousV
+ elif model_type == ModelType.FLUX:
+ c = comfy.model_sampling.CONST
+ s = comfy.model_sampling.ModelSamplingFlux
+ elif model_type == ModelType.IMG_TO_IMG:
+ c = comfy.model_sampling.IMG_TO_IMG
+ elif model_type == ModelType.FLOW_COSMOS:
+ c = comfy.model_sampling.COSMOS_RFLOW
+ s = comfy.model_sampling.ModelSamplingCosmosRFlow
+
+ class ModelSampling(s, c):
+ pass
+
+ return ModelSampling(model_config)
+
+
+def convert_tensor(extra, dtype):
+ if hasattr(extra, "dtype"):
+ if extra.dtype != torch.int and extra.dtype != torch.long:
+ extra = extra.to(dtype)
+ return extra
+
+
+class BaseModel(torch.nn.Module):
+ def __init__(self, model_config, model_type=ModelType.EPS, device=None, unet_model=UNetModel):
+ super().__init__()
+
+ unet_config = model_config.unet_config
+ self.latent_format = model_config.latent_format
+ self.model_config = model_config
+ self.manual_cast_dtype = model_config.manual_cast_dtype
+ self.device = device
+ self.current_patcher: 'ModelPatcher' = None
+
+ if not unet_config.get("disable_unet_model_creation", False):
+ if model_config.custom_operations is None:
+ fp8 = model_config.optimizations.get("fp8", False)
+ operations = comfy.ops.pick_operations(unet_config.get("dtype", None), self.manual_cast_dtype, fp8_optimizations=fp8, scaled_fp8=model_config.scaled_fp8)
+ else:
+ operations = model_config.custom_operations
+ self.diffusion_model = unet_model(**unet_config, device=device, operations=operations)
+ if comfy.model_management.force_channels_last():
+ self.diffusion_model.to(memory_format=torch.channels_last)
+ logging.debug("using channels last mode for diffusion model")
+ logging.info("model weight dtype {}, manual cast: {}".format(self.get_dtype(), self.manual_cast_dtype))
+ self.model_type = model_type
+ self.model_sampling = model_sampling(model_config, model_type)
+
+ self.adm_channels = unet_config.get("adm_in_channels", None)
+ if self.adm_channels is None:
+ self.adm_channels = 0
+
+ self.concat_keys = ()
+ logging.info("model_type {}".format(model_type.name))
+ logging.debug("adm {}".format(self.adm_channels))
+ self.memory_usage_factor = model_config.memory_usage_factor
+ self.memory_usage_factor_conds = ()
+
+ def apply_model(self, x, t, c_concat=None, c_crossattn=None, control=None, transformer_options={}, **kwargs):
+ return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+ self._apply_model,
+ self,
+ comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.APPLY_MODEL, transformer_options)
+ ).execute(x, t, c_concat, c_crossattn, control, transformer_options, **kwargs)
+
+ def _apply_model(self, x, t, c_concat=None, c_crossattn=None, control=None, transformer_options={}, **kwargs):
+ sigma = t
+ xc = self.model_sampling.calculate_input(sigma, x)
+
+ if c_concat is not None:
+ xc = torch.cat([xc] + [c_concat], dim=1)
+
+ context = c_crossattn
+ dtype = self.get_dtype()
+
+ if self.manual_cast_dtype is not None:
+ dtype = self.manual_cast_dtype
+
+ xc = xc.to(dtype)
+ t = self.model_sampling.timestep(t).float()
+ if context is not None:
+ context = context.to(dtype)
+
+ extra_conds = {}
+ for o in kwargs:
+ extra = kwargs[o]
+
+ if hasattr(extra, "dtype"):
+ extra = convert_tensor(extra, dtype)
+ elif isinstance(extra, list):
+ ex = []
+ for ext in extra:
+ ex.append(convert_tensor(ext, dtype))
+ extra = ex
+ extra_conds[o] = extra
+
+ t = self.process_timestep(t, x=x, **extra_conds)
+ model_output = self.diffusion_model(xc, t, context=context, control=control, transformer_options=transformer_options, **extra_conds).float()
+ return self.model_sampling.calculate_denoised(sigma, model_output, x)
+
+ def process_timestep(self, timestep, **kwargs):
+ return timestep
+
+ def get_dtype(self):
+ return self.diffusion_model.dtype
+
+ def encode_adm(self, **kwargs):
+ return None
+
+ def concat_cond(self, **kwargs):
+ if len(self.concat_keys) > 0:
+ cond_concat = []
+ denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
+ concat_latent_image = kwargs.get("concat_latent_image", None)
+ if concat_latent_image is None:
+ concat_latent_image = kwargs.get("latent_image", None)
+ else:
+ concat_latent_image = self.process_latent_in(concat_latent_image)
+
+ noise = kwargs.get("noise", None)
+ device = kwargs["device"]
+
+ if concat_latent_image.shape[1:] != noise.shape[1:]:
+ concat_latent_image = utils.common_upscale(concat_latent_image, noise.shape[-1], noise.shape[-2], "bilinear", "center")
+ if noise.ndim == 5:
+ if concat_latent_image.shape[-3] < noise.shape[-3]:
+ concat_latent_image = torch.nn.functional.pad(concat_latent_image, (0, 0, 0, 0, 0, noise.shape[-3] - concat_latent_image.shape[-3]), "constant", 0)
+ else:
+ concat_latent_image = concat_latent_image[:, :, :noise.shape[-3]]
+
+ concat_latent_image = utils.resize_to_batch_size(concat_latent_image, noise.shape[0])
+
+ if denoise_mask is not None:
+ if len(denoise_mask.shape) == len(noise.shape):
+ denoise_mask = denoise_mask[:, :1]
+
+ num_dim = noise.ndim - 2
+ denoise_mask = denoise_mask.reshape((-1, 1) + tuple(denoise_mask.shape[-num_dim:]))
+ if denoise_mask.shape[-2:] != noise.shape[-2:]:
+ denoise_mask = utils.common_upscale(denoise_mask, noise.shape[-1], noise.shape[-2], "bilinear", "center")
+ denoise_mask = utils.resize_to_batch_size(denoise_mask.round(), noise.shape[0])
+
+ for ck in self.concat_keys:
+ if denoise_mask is not None:
+ if ck == "mask":
+ cond_concat.append(denoise_mask.to(device))
+ elif ck == "masked_image":
+ cond_concat.append(concat_latent_image.to(device)) # NOTE: the latent_image should be masked by the mask in pixel space
+ elif ck == "mask_inverted":
+ cond_concat.append(1.0 - denoise_mask.to(device))
+ else:
+ if ck == "mask":
+ cond_concat.append(torch.ones_like(noise)[:, :1])
+ elif ck == "masked_image":
+ cond_concat.append(self.blank_inpaint_image_like(noise))
+ elif ck == "mask_inverted":
+ cond_concat.append(torch.zeros_like(noise)[:, :1])
+ if ck == "concat_image":
+ if concat_latent_image is not None:
+ cond_concat.append(concat_latent_image.to(device))
+ else:
+ cond_concat.append(torch.zeros_like(noise))
+ data = torch.cat(cond_concat, dim=1)
+ return data
+ return None
+
+ def extra_conds(self, **kwargs):
+ out = {}
+ concat_cond = self.concat_cond(**kwargs)
+ if concat_cond is not None:
+ out['c_concat'] = comfy.conds.CONDNoiseShape(concat_cond)
+
+ adm = self.encode_adm(**kwargs)
+ if adm is not None:
+ out['y'] = comfy.conds.CONDRegular(adm)
+
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDCrossAttn(cross_attn)
+
+ cross_attn_cnet = kwargs.get("cross_attn_controlnet", None)
+ if cross_attn_cnet is not None:
+ out['crossattn_controlnet'] = comfy.conds.CONDCrossAttn(cross_attn_cnet)
+
+ c_concat = kwargs.get("noise_concat", None)
+ if c_concat is not None:
+ out['c_concat'] = comfy.conds.CONDNoiseShape(c_concat)
+
+ return out
+
+ def load_model_weights(self, sd, unet_prefix=""):
+ to_load = {}
+ keys = list(sd.keys())
+ for k in keys:
+ if k.startswith(unet_prefix):
+ to_load[k[len(unet_prefix):]] = sd.pop(k)
+
+ to_load = self.model_config.process_unet_state_dict(to_load)
+ m, u = self.diffusion_model.load_state_dict(to_load, strict=False)
+ if len(m) > 0:
+ logging.warning("unet missing: {}".format(m))
+
+ if len(u) > 0:
+ logging.warning("unet unexpected: {}".format(u))
+ del to_load
+ return self
+
+ def process_latent_in(self, latent):
+ return self.latent_format.process_in(latent)
+
+ def process_latent_out(self, latent):
+ return self.latent_format.process_out(latent)
+
+ def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
+ extra_sds = []
+ if clip_state_dict is not None:
+ extra_sds.append(self.model_config.process_clip_state_dict_for_saving(clip_state_dict))
+ if vae_state_dict is not None:
+ extra_sds.append(self.model_config.process_vae_state_dict_for_saving(vae_state_dict))
+ if clip_vision_state_dict is not None:
+ extra_sds.append(self.model_config.process_clip_vision_state_dict_for_saving(clip_vision_state_dict))
+
+ unet_state_dict = self.diffusion_model.state_dict()
+
+ if self.model_config.scaled_fp8 is not None:
+ unet_state_dict["scaled_fp8"] = torch.tensor([], dtype=self.model_config.scaled_fp8)
+
+ unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)
+
+ if self.model_type == ModelType.V_PREDICTION:
+ unet_state_dict["v_pred"] = torch.tensor([])
+
+ for sd in extra_sds:
+ unet_state_dict.update(sd)
+
+ return unet_state_dict
+
+ def set_inpaint(self):
+ self.concat_keys = ("mask", "masked_image")
+ def blank_inpaint_image_like(latent_image):
+ blank_image = torch.ones_like(latent_image)
+ # these are the values for "zero" in pixel space translated to latent space
+ blank_image[:,0] *= 0.8223
+ blank_image[:,1] *= -0.6876
+ blank_image[:,2] *= 0.6364
+ blank_image[:,3] *= 0.1380
+ return blank_image
+ self.blank_inpaint_image_like = blank_inpaint_image_like
+
+ def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
+ return self.model_sampling.noise_scaling(sigma.reshape([sigma.shape[0]] + [1] * (len(noise.shape) - 1)), noise, latent_image)
+
+ def memory_required(self, input_shape, cond_shapes={}):
+ input_shapes = [input_shape]
+ for c in self.memory_usage_factor_conds:
+ shape = cond_shapes.get(c, None)
+ if shape is not None and len(shape) > 0:
+ input_shapes += shape
+
+ if comfy.model_management.xformers_enabled() or comfy.model_management.pytorch_attention_flash_attention():
+ dtype = self.get_dtype()
+ if self.manual_cast_dtype is not None:
+ dtype = self.manual_cast_dtype
+ #TODO: this needs to be tweaked
+ area = sum(map(lambda input_shape: input_shape[0] * math.prod(input_shape[2:]), input_shapes))
+ return (area * comfy.model_management.dtype_size(dtype) * 0.01 * self.memory_usage_factor) * (1024 * 1024)
+ else:
+ #TODO: this formula might be too aggressive since I tweaked the sub-quad and split algorithms to use less memory.
+ area = sum(map(lambda input_shape: input_shape[0] * math.prod(input_shape[2:]), input_shapes))
+ return (area * 0.15 * self.memory_usage_factor) * (1024 * 1024)
+
+ def extra_conds_shapes(self, **kwargs):
+ return {}
+
+
+def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0, seed=None):
+ adm_inputs = []
+ weights = []
+ noise_aug = []
+ for unclip_cond in unclip_conditioning:
+ for adm_cond in unclip_cond["clip_vision_output"].image_embeds:
+ weight = unclip_cond["strength"]
+ noise_augment = unclip_cond["noise_augmentation"]
+ noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
+ c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device), seed=seed)
+ adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
+ weights.append(weight)
+ noise_aug.append(noise_augment)
+ adm_inputs.append(adm_out)
+
+ if len(noise_aug) > 1:
+ adm_out = torch.stack(adm_inputs).sum(0)
+ noise_augment = noise_augment_merge
+ noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
+ c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
+ adm_out = torch.cat((c_adm, noise_level_emb), 1)
+
+ return adm_out
+
+class SD21UNCLIP(BaseModel):
+ def __init__(self, model_config, noise_aug_config, model_type=ModelType.V_PREDICTION, device=None):
+ super().__init__(model_config, model_type, device=device)
+ self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**noise_aug_config)
+
+ def encode_adm(self, **kwargs):
+ unclip_conditioning = kwargs.get("unclip_conditioning", None)
+ device = kwargs["device"]
+ if unclip_conditioning is None:
+ return torch.zeros((1, self.adm_channels))
+ else:
+ return unclip_adm(unclip_conditioning, device, self.noise_augmentor, kwargs.get("unclip_noise_augment_merge", 0.05), kwargs.get("seed", 0) - 10)
+
+def sdxl_pooled(args, noise_augmentor):
+ if "unclip_conditioning" in args:
+ return unclip_adm(args.get("unclip_conditioning", None), args["device"], noise_augmentor, seed=args.get("seed", 0) - 10)[:,:1280]
+ else:
+ return args["pooled_output"]
+
+class SDXLRefiner(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.EPS, device=None):
+ super().__init__(model_config, model_type, device=device)
+ self.embedder = Timestep(256)
+ self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**{"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1280})
+
+ def encode_adm(self, **kwargs):
+ clip_pooled = sdxl_pooled(kwargs, self.noise_augmentor)
+ width = kwargs.get("width", 768)
+ height = kwargs.get("height", 768)
+ crop_w = kwargs.get("crop_w", 0)
+ crop_h = kwargs.get("crop_h", 0)
+
+ if kwargs.get("prompt_type", "") == "negative":
+ aesthetic_score = kwargs.get("aesthetic_score", 2.5)
+ else:
+ aesthetic_score = kwargs.get("aesthetic_score", 6)
+
+ out = []
+ out.append(self.embedder(torch.Tensor([height])))
+ out.append(self.embedder(torch.Tensor([width])))
+ out.append(self.embedder(torch.Tensor([crop_h])))
+ out.append(self.embedder(torch.Tensor([crop_w])))
+ out.append(self.embedder(torch.Tensor([aesthetic_score])))
+ flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
+ return torch.cat((clip_pooled.to(flat.device), flat), dim=1)
+
+class SDXL(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.EPS, device=None):
+ super().__init__(model_config, model_type, device=device)
+ self.embedder = Timestep(256)
+ self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**{"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1280})
+
+ def encode_adm(self, **kwargs):
+ clip_pooled = sdxl_pooled(kwargs, self.noise_augmentor)
+ width = kwargs.get("width", 768)
+ height = kwargs.get("height", 768)
+ crop_w = kwargs.get("crop_w", 0)
+ crop_h = kwargs.get("crop_h", 0)
+ target_width = kwargs.get("target_width", width)
+ target_height = kwargs.get("target_height", height)
+
+ out = []
+ out.append(self.embedder(torch.Tensor([height])))
+ out.append(self.embedder(torch.Tensor([width])))
+ out.append(self.embedder(torch.Tensor([crop_h])))
+ out.append(self.embedder(torch.Tensor([crop_w])))
+ out.append(self.embedder(torch.Tensor([target_height])))
+ out.append(self.embedder(torch.Tensor([target_width])))
+ flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
+ return torch.cat((clip_pooled.to(flat.device), flat), dim=1)
+
+
+class SVD_img2vid(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.V_PREDICTION_EDM, device=None):
+ super().__init__(model_config, model_type, device=device)
+ self.embedder = Timestep(256)
+
+ def encode_adm(self, **kwargs):
+ fps_id = kwargs.get("fps", 6) - 1
+ motion_bucket_id = kwargs.get("motion_bucket_id", 127)
+ augmentation = kwargs.get("augmentation_level", 0)
+
+ out = []
+ out.append(self.embedder(torch.Tensor([fps_id])))
+ out.append(self.embedder(torch.Tensor([motion_bucket_id])))
+ out.append(self.embedder(torch.Tensor([augmentation])))
+
+ flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0)
+ return flat
+
+ def extra_conds(self, **kwargs):
+ out = {}
+ adm = self.encode_adm(**kwargs)
+ if adm is not None:
+ out['y'] = comfy.conds.CONDRegular(adm)
+
+ latent_image = kwargs.get("concat_latent_image", None)
+ noise = kwargs.get("noise", None)
+
+ if latent_image is None:
+ latent_image = torch.zeros_like(noise)
+
+ if latent_image.shape[1:] != noise.shape[1:]:
+ latent_image = utils.common_upscale(latent_image, noise.shape[-1], noise.shape[-2], "bilinear", "center")
+
+ latent_image = utils.resize_to_batch_size(latent_image, noise.shape[0])
+
+ out['c_concat'] = comfy.conds.CONDNoiseShape(latent_image)
+
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDCrossAttn(cross_attn)
+
+ if "time_conditioning" in kwargs:
+ out["time_context"] = comfy.conds.CONDCrossAttn(kwargs["time_conditioning"])
+
+ out['num_video_frames'] = comfy.conds.CONDConstant(noise.shape[0])
+ return out
+
+class SV3D_u(SVD_img2vid):
+ def encode_adm(self, **kwargs):
+ augmentation = kwargs.get("augmentation_level", 0)
+
+ out = []
+ out.append(self.embedder(torch.flatten(torch.Tensor([augmentation]))))
+
+ flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0)
+ return flat
+
+class SV3D_p(SVD_img2vid):
+ def __init__(self, model_config, model_type=ModelType.V_PREDICTION_EDM, device=None):
+ super().__init__(model_config, model_type, device=device)
+ self.embedder_512 = Timestep(512)
+
+ def encode_adm(self, **kwargs):
+ augmentation = kwargs.get("augmentation_level", 0)
+ elevation = kwargs.get("elevation", 0) #elevation and azimuth are in degrees here
+ azimuth = kwargs.get("azimuth", 0)
+ noise = kwargs.get("noise", None)
+
+ out = []
+ out.append(self.embedder(torch.flatten(torch.Tensor([augmentation]))))
+ out.append(self.embedder_512(torch.deg2rad(torch.fmod(torch.flatten(90 - torch.Tensor([elevation])), 360.0))))
+ out.append(self.embedder_512(torch.deg2rad(torch.fmod(torch.flatten(torch.Tensor([azimuth])), 360.0))))
+
+ out = list(map(lambda a: utils.resize_to_batch_size(a, noise.shape[0]), out))
+ return torch.cat(out, dim=1)
+
+
+class Stable_Zero123(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None):
+ super().__init__(model_config, model_type, device=device)
+ self.cc_projection = comfy.ops.manual_cast.Linear(cc_projection_weight.shape[1], cc_projection_weight.shape[0], dtype=self.get_dtype(), device=device)
+ self.cc_projection.weight.copy_(cc_projection_weight)
+ self.cc_projection.bias.copy_(cc_projection_bias)
+
+ def extra_conds(self, **kwargs):
+ out = {}
+
+ latent_image = kwargs.get("concat_latent_image", None)
+ noise = kwargs.get("noise", None)
+
+ if latent_image is None:
+ latent_image = torch.zeros_like(noise)
+
+ if latent_image.shape[1:] != noise.shape[1:]:
+ latent_image = utils.common_upscale(latent_image, noise.shape[-1], noise.shape[-2], "bilinear", "center")
+
+ latent_image = utils.resize_to_batch_size(latent_image, noise.shape[0])
+
+ out['c_concat'] = comfy.conds.CONDNoiseShape(latent_image)
+
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ if cross_attn.shape[-1] != 768:
+ cross_attn = self.cc_projection(cross_attn)
+ out['c_crossattn'] = comfy.conds.CONDCrossAttn(cross_attn)
+ return out
+
+class SD_X4Upscaler(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.V_PREDICTION, device=None):
+ super().__init__(model_config, model_type, device=device)
+ self.noise_augmentor = ImageConcatWithNoiseAugmentation(noise_schedule_config={"linear_start": 0.0001, "linear_end": 0.02}, max_noise_level=350)
+
+ def extra_conds(self, **kwargs):
+ out = {}
+
+ image = kwargs.get("concat_image", None)
+ noise = kwargs.get("noise", None)
+ noise_augment = kwargs.get("noise_augmentation", 0.0)
+ device = kwargs["device"]
+ seed = kwargs["seed"] - 10
+
+ noise_level = round((self.noise_augmentor.max_noise_level) * noise_augment)
+
+ if image is None:
+ image = torch.zeros_like(noise)[:,:3]
+
+ if image.shape[1:] != noise.shape[1:]:
+ image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
+
+ noise_level = torch.tensor([noise_level], device=device)
+ if noise_augment > 0:
+ image, noise_level = self.noise_augmentor(image.to(device), noise_level=noise_level, seed=seed)
+
+ image = utils.resize_to_batch_size(image, noise.shape[0])
+
+ out['c_concat'] = comfy.conds.CONDNoiseShape(image)
+ out['y'] = comfy.conds.CONDRegular(noise_level)
+
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDCrossAttn(cross_attn)
+ return out
+
+class IP2P:
+ def concat_cond(self, **kwargs):
+ image = kwargs.get("concat_latent_image", None)
+ noise = kwargs.get("noise", None)
+ device = kwargs["device"]
+
+ if image is None:
+ image = torch.zeros_like(noise)
+
+ if image.shape[1:] != noise.shape[1:]:
+ image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
+
+ image = utils.resize_to_batch_size(image, noise.shape[0])
+ return self.process_ip2p_image_in(image)
+
+
+class SD15_instructpix2pix(IP2P, BaseModel):
+ def __init__(self, model_config, model_type=ModelType.EPS, device=None):
+ super().__init__(model_config, model_type, device=device)
+ self.process_ip2p_image_in = lambda image: image
+
+
+class SDXL_instructpix2pix(IP2P, SDXL):
+ def __init__(self, model_config, model_type=ModelType.EPS, device=None):
+ super().__init__(model_config, model_type, device=device)
+ if model_type == ModelType.V_PREDICTION_EDM:
+ self.process_ip2p_image_in = lambda image: comfy.latent_formats.SDXL().process_in(image) #cosxl ip2p
+ else:
+ self.process_ip2p_image_in = lambda image: image #diffusers ip2p
+
+class Lotus(BaseModel):
+ def extra_conds(self, **kwargs):
+ out = {}
+ cross_attn = kwargs.get("cross_attn", None)
+ out['c_crossattn'] = comfy.conds.CONDCrossAttn(cross_attn)
+ device = kwargs["device"]
+ task_emb = torch.tensor([1, 0]).float().to(device)
+ task_emb = torch.cat([torch.sin(task_emb), torch.cos(task_emb)]).unsqueeze(0)
+ out['y'] = comfy.conds.CONDRegular(task_emb)
+ return out
+
+ def __init__(self, model_config, model_type=ModelType.IMG_TO_IMG, device=None):
+ super().__init__(model_config, model_type, device=device)
+
+class StableCascade_C(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=StageC)
+ self.diffusion_model.eval().requires_grad_(False)
+
+ def extra_conds(self, **kwargs):
+ out = {}
+ clip_text_pooled = kwargs["pooled_output"]
+ if clip_text_pooled is not None:
+ out['clip_text_pooled'] = comfy.conds.CONDRegular(clip_text_pooled)
+
+ if "unclip_conditioning" in kwargs:
+ embeds = []
+ for unclip_cond in kwargs["unclip_conditioning"]:
+ weight = unclip_cond["strength"]
+ embeds.append(unclip_cond["clip_vision_output"].image_embeds.unsqueeze(0) * weight)
+ clip_img = torch.cat(embeds, dim=1)
+ else:
+ clip_img = torch.zeros((1, 1, 768))
+ out["clip_img"] = comfy.conds.CONDRegular(clip_img)
+ out["sca"] = comfy.conds.CONDRegular(torch.zeros((1,)))
+ out["crp"] = comfy.conds.CONDRegular(torch.zeros((1,)))
+
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['clip_text'] = comfy.conds.CONDCrossAttn(cross_attn)
+ return out
+
+
+class StableCascade_B(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=StageB)
+ self.diffusion_model.eval().requires_grad_(False)
+
+ def extra_conds(self, **kwargs):
+ out = {}
+ noise = kwargs.get("noise", None)
+
+ clip_text_pooled = kwargs["pooled_output"]
+ if clip_text_pooled is not None:
+ out['clip'] = comfy.conds.CONDRegular(clip_text_pooled)
+
+ #size of prior doesn't really matter if zeros because it gets resized but I still want it to get batched
+ prior = kwargs.get("stable_cascade_prior", torch.zeros((1, 16, (noise.shape[2] * 4) // 42, (noise.shape[3] * 4) // 42), dtype=noise.dtype, layout=noise.layout, device=noise.device))
+
+ out["effnet"] = comfy.conds.CONDRegular(prior)
+ out["sca"] = comfy.conds.CONDRegular(torch.zeros((1,)))
+ return out
+
+
+class SD3(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=OpenAISignatureMMDITWrapper)
+
+ def encode_adm(self, **kwargs):
+ return kwargs["pooled_output"]
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+ return out
+
+
+class AuraFlow(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.aura.mmdit.MMDiT)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+ return out
+
+
+class StableAudio1(BaseModel):
+ def __init__(self, model_config, seconds_start_embedder_weights, seconds_total_embedder_weights, model_type=ModelType.V_PREDICTION_CONTINUOUS, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.audio.dit.AudioDiffusionTransformer)
+ self.seconds_start_embedder = comfy.ldm.audio.embedders.NumberConditioner(768, min_val=0, max_val=512)
+ self.seconds_total_embedder = comfy.ldm.audio.embedders.NumberConditioner(768, min_val=0, max_val=512)
+ self.seconds_start_embedder.load_state_dict(seconds_start_embedder_weights)
+ self.seconds_total_embedder.load_state_dict(seconds_total_embedder_weights)
+
+ def extra_conds(self, **kwargs):
+ out = {}
+
+ noise = kwargs.get("noise", None)
+ device = kwargs["device"]
+
+ seconds_start = kwargs.get("seconds_start", 0)
+ seconds_total = kwargs.get("seconds_total", int(noise.shape[-1] / 21.53))
+
+ seconds_start_embed = self.seconds_start_embedder([seconds_start])[0].to(device)
+ seconds_total_embed = self.seconds_total_embedder([seconds_total])[0].to(device)
+
+ global_embed = torch.cat([seconds_start_embed, seconds_total_embed], dim=-1).reshape((1, -1))
+ out['global_embed'] = comfy.conds.CONDRegular(global_embed)
+
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ cross_attn = torch.cat([cross_attn.to(device), seconds_start_embed.repeat((cross_attn.shape[0], 1, 1)), seconds_total_embed.repeat((cross_attn.shape[0], 1, 1))], dim=1)
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+ return out
+
+ def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
+ sd = super().state_dict_for_saving(clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict)
+ d = {"conditioner.conditioners.seconds_start.": self.seconds_start_embedder.state_dict(), "conditioner.conditioners.seconds_total.": self.seconds_total_embedder.state_dict()}
+ for k in d:
+ s = d[k]
+ for l in s:
+ sd["{}{}".format(k, l)] = s[l]
+ return sd
+
+
+class HunyuanDiT(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.V_PREDICTION, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hydit.models.HunYuanDiT)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+ attention_mask = kwargs.get("attention_mask", None)
+ if attention_mask is not None:
+ out['text_embedding_mask'] = comfy.conds.CONDRegular(attention_mask)
+
+ conditioning_mt5xl = kwargs.get("conditioning_mt5xl", None)
+ if conditioning_mt5xl is not None:
+ out['encoder_hidden_states_t5'] = comfy.conds.CONDRegular(conditioning_mt5xl)
+
+ attention_mask_mt5xl = kwargs.get("attention_mask_mt5xl", None)
+ if attention_mask_mt5xl is not None:
+ out['text_embedding_mask_t5'] = comfy.conds.CONDRegular(attention_mask_mt5xl)
+
+ width = kwargs.get("width", 768)
+ height = kwargs.get("height", 768)
+ target_width = kwargs.get("target_width", width)
+ target_height = kwargs.get("target_height", height)
+
+ out['image_meta_size'] = comfy.conds.CONDRegular(torch.FloatTensor([[height, width, target_height, target_width, 0, 0]]))
+ return out
+
+class PixArt(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.EPS, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.pixart.pixartms.PixArtMS)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+ width = kwargs.get("width", None)
+ height = kwargs.get("height", None)
+ if width is not None and height is not None:
+ out["c_size"] = comfy.conds.CONDRegular(torch.FloatTensor([[height, width]]))
+ out["c_ar"] = comfy.conds.CONDRegular(torch.FloatTensor([[kwargs.get("aspect_ratio", height/width)]]))
+
+ return out
+
+class Flux(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLUX, device=None, unet_model=comfy.ldm.flux.model.Flux):
+ super().__init__(model_config, model_type, device=device, unet_model=unet_model)
+ self.memory_usage_factor_conds = ("ref_latents",)
+
+ def concat_cond(self, **kwargs):
+ try:
+ #Handle Flux control loras dynamically changing the img_in weight.
+ num_channels = self.diffusion_model.img_in.weight.shape[1] // (self.diffusion_model.patch_size * self.diffusion_model.patch_size)
+ except:
+ #Some cases like tensorrt might not have the weights accessible
+ num_channels = self.model_config.unet_config["in_channels"]
+
+ out_channels = self.model_config.unet_config["out_channels"]
+
+ if num_channels <= out_channels:
+ return None
+
+ image = kwargs.get("concat_latent_image", None)
+ noise = kwargs.get("noise", None)
+ device = kwargs["device"]
+
+ if image is None:
+ image = torch.zeros_like(noise)
+
+ image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
+ image = utils.resize_to_batch_size(image, noise.shape[0])
+ image = self.process_latent_in(image)
+ if num_channels <= out_channels * 2:
+ return image
+
+ #inpaint model
+ mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
+ if mask is None:
+ mask = torch.ones_like(noise)[:, :1]
+
+ mask = torch.mean(mask, dim=1, keepdim=True)
+ mask = utils.common_upscale(mask.to(device), noise.shape[-1] * 8, noise.shape[-2] * 8, "bilinear", "center")
+ mask = mask.view(mask.shape[0], mask.shape[2] // 8, 8, mask.shape[3] // 8, 8).permute(0, 2, 4, 1, 3).reshape(mask.shape[0], -1, mask.shape[2] // 8, mask.shape[3] // 8)
+ mask = utils.resize_to_batch_size(mask, noise.shape[0])
+ return torch.cat((image, mask), dim=1)
+
+ def encode_adm(self, **kwargs):
+ return kwargs["pooled_output"]
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+ # upscale the attention mask, since now we
+ attention_mask = kwargs.get("attention_mask", None)
+ if attention_mask is not None:
+ shape = kwargs["noise"].shape
+ mask_ref_size = kwargs["attention_mask_img_shape"]
+ # the model will pad to the patch size, and then divide
+ # essentially dividing and rounding up
+ (h_tok, w_tok) = (math.ceil(shape[2] / self.diffusion_model.patch_size), math.ceil(shape[3] / self.diffusion_model.patch_size))
+ attention_mask = utils.upscale_dit_mask(attention_mask, mask_ref_size, (h_tok, w_tok))
+ out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
+
+ guidance = kwargs.get("guidance", 3.5)
+ if guidance is not None:
+ out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance]))
+
+ ref_latents = kwargs.get("reference_latents", None)
+ if ref_latents is not None:
+ latents = []
+ for lat in ref_latents:
+ latents.append(self.process_latent_in(lat))
+ out['ref_latents'] = comfy.conds.CONDList(latents)
+ return out
+
+ def extra_conds_shapes(self, **kwargs):
+ out = {}
+ ref_latents = kwargs.get("reference_latents", None)
+ if ref_latents is not None:
+ out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
+ return out
+
+
+class GenmoMochi(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.genmo.joint_model.asymm_models_joint.AsymmDiTJoint)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ attention_mask = kwargs.get("attention_mask", None)
+ if attention_mask is not None:
+ out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
+ out['num_tokens'] = comfy.conds.CONDConstant(max(1, torch.sum(attention_mask).item()))
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+ return out
+
+class LTXV(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLUX, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.lightricks.model.LTXVModel) #TODO
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ attention_mask = kwargs.get("attention_mask", None)
+ if attention_mask is not None:
+ out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+ out['frame_rate'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", 25))
+
+ denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
+ if denoise_mask is not None:
+ out["denoise_mask"] = comfy.conds.CONDRegular(denoise_mask)
+
+ keyframe_idxs = kwargs.get("keyframe_idxs", None)
+ if keyframe_idxs is not None:
+ out['keyframe_idxs'] = comfy.conds.CONDRegular(keyframe_idxs)
+
+ return out
+
+ def process_timestep(self, timestep, x, denoise_mask=None, **kwargs):
+ if denoise_mask is None:
+ return timestep
+ return self.diffusion_model.patchifier.patchify(((denoise_mask) * timestep.view([timestep.shape[0]] + [1] * (denoise_mask.ndim - 1)))[:, :1])[0]
+
+ def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
+ return latent_image
+
+class HunyuanVideo(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan_video.model.HunyuanVideo)
+
+ def encode_adm(self, **kwargs):
+ return kwargs["pooled_output"]
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ attention_mask = kwargs.get("attention_mask", None)
+ if attention_mask is not None:
+ out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+ guidance = kwargs.get("guidance", 6.0)
+ if guidance is not None:
+ out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance]))
+
+ guiding_frame_index = kwargs.get("guiding_frame_index", None)
+ if guiding_frame_index is not None:
+ out['guiding_frame_index'] = comfy.conds.CONDRegular(torch.FloatTensor([guiding_frame_index]))
+
+ ref_latent = kwargs.get("ref_latent", None)
+ if ref_latent is not None:
+ out['ref_latent'] = comfy.conds.CONDRegular(self.process_latent_in(ref_latent))
+
+ return out
+
+ def scale_latent_inpaint(self, latent_image, **kwargs):
+ return latent_image
+
+class HunyuanVideoI2V(HunyuanVideo):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device)
+ self.concat_keys = ("concat_image", "mask_inverted")
+
+ def scale_latent_inpaint(self, latent_image, **kwargs):
+ return super().scale_latent_inpaint(latent_image=latent_image, **kwargs)
+
+class HunyuanVideoSkyreelsI2V(HunyuanVideo):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device)
+ self.concat_keys = ("concat_image",)
+
+ def scale_latent_inpaint(self, latent_image, **kwargs):
+ return super().scale_latent_inpaint(latent_image=latent_image, **kwargs)
+
+class CosmosVideo(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.EDM, image_to_video=False, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.cosmos.model.GeneralDIT)
+ self.image_to_video = image_to_video
+ if self.image_to_video:
+ self.concat_keys = ("mask_inverted",)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ attention_mask = kwargs.get("attention_mask", None)
+ if attention_mask is not None:
+ out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+ out['fps'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", None))
+ return out
+
+ def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
+ sigma = sigma.reshape([sigma.shape[0]] + [1] * (len(noise.shape) - 1))
+ sigma_noise_augmentation = 0 #TODO
+ if sigma_noise_augmentation != 0:
+ latent_image = latent_image + noise
+ latent_image = self.model_sampling.calculate_input(torch.tensor([sigma_noise_augmentation], device=latent_image.device, dtype=latent_image.dtype), latent_image)
+ return latent_image * ((sigma ** 2 + self.model_sampling.sigma_data ** 2) ** 0.5)
+
+class CosmosPredict2(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW_COSMOS, image_to_video=False, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.cosmos.predict2.MiniTrainDIT)
+ self.image_to_video = image_to_video
+ if self.image_to_video:
+ self.concat_keys = ("mask_inverted",)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+ denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
+ if denoise_mask is not None:
+ out["denoise_mask"] = comfy.conds.CONDRegular(denoise_mask)
+
+ out['fps'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", None))
+ return out
+
+ def process_timestep(self, timestep, x, denoise_mask=None, **kwargs):
+ if denoise_mask is None:
+ return timestep
+ if denoise_mask.ndim <= 4:
+ return timestep
+ condition_video_mask_B_1_T_1_1 = denoise_mask.mean(dim=[1, 3, 4], keepdim=True)
+ c_noise_B_1_T_1_1 = 0.0 * (1.0 - condition_video_mask_B_1_T_1_1) + timestep.reshape(timestep.shape[0], 1, 1, 1, 1) * condition_video_mask_B_1_T_1_1
+ out = c_noise_B_1_T_1_1.squeeze(dim=[1, 3, 4])
+ return out
+
+ def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
+ sigma = sigma.reshape([sigma.shape[0]] + [1] * (len(noise.shape) - 1))
+ sigma_noise_augmentation = 0 #TODO
+ if sigma_noise_augmentation != 0:
+ latent_image = latent_image + noise
+ latent_image = self.model_sampling.calculate_input(torch.tensor([sigma_noise_augmentation], device=latent_image.device, dtype=latent_image.dtype), latent_image)
+ sigma = (sigma / (sigma + 1))
+ return latent_image / (1.0 - sigma)
+
+class Lumina2(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.lumina.model.NextDiT)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ attention_mask = kwargs.get("attention_mask", None)
+ if attention_mask is not None:
+ if torch.numel(attention_mask) != attention_mask.sum():
+ out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
+ out['num_tokens'] = comfy.conds.CONDConstant(max(1, torch.sum(attention_mask).item()))
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+ return out
+
+class WAN21(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
+ self.image_to_video = image_to_video
+
+ def concat_cond(self, **kwargs):
+ noise = kwargs.get("noise", None)
+ extra_channels = self.diffusion_model.patch_embedding.weight.shape[1] - noise.shape[1]
+ if extra_channels == 0:
+ return None
+
+ image = kwargs.get("concat_latent_image", None)
+ device = kwargs["device"]
+
+ if image is None:
+ shape_image = list(noise.shape)
+ shape_image[1] = extra_channels
+ image = torch.zeros(shape_image, dtype=noise.dtype, layout=noise.layout, device=noise.device)
+ else:
+ image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
+ for i in range(0, image.shape[1], 16):
+ image[:, i: i + 16] = self.process_latent_in(image[:, i: i + 16])
+ image = utils.resize_to_batch_size(image, noise.shape[0])
+
+ if extra_channels != image.shape[1] + 4:
+ if not self.image_to_video or extra_channels == image.shape[1]:
+ return image
+
+ if image.shape[1] > (extra_channels - 4):
+ image = image[:, :(extra_channels - 4)]
+
+ mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
+ if mask is None:
+ mask = torch.zeros_like(noise)[:, :4]
+ else:
+ if mask.shape[1] != 4:
+ mask = torch.mean(mask, dim=1, keepdim=True)
+ mask = 1.0 - mask
+ mask = utils.common_upscale(mask.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
+ if mask.shape[-3] < noise.shape[-3]:
+ mask = torch.nn.functional.pad(mask, (0, 0, 0, 0, 0, noise.shape[-3] - mask.shape[-3]), mode='constant', value=0)
+ if mask.shape[1] == 1:
+ mask = mask.repeat(1, 4, 1, 1, 1)
+ mask = utils.resize_to_batch_size(mask, noise.shape[0])
+
+ return torch.cat((mask, image), dim=1)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+ clip_vision_output = kwargs.get("clip_vision_output", None)
+ if clip_vision_output is not None:
+ out['clip_fea'] = comfy.conds.CONDRegular(clip_vision_output.penultimate_hidden_states)
+
+ time_dim_concat = kwargs.get("time_dim_concat", None)
+ if time_dim_concat is not None:
+ out['time_dim_concat'] = comfy.conds.CONDRegular(self.process_latent_in(time_dim_concat))
+
+ return out
+
+
+class WAN21_Vace(WAN21):
+ def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
+ super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.VaceWanModel)
+ self.image_to_video = image_to_video
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ noise = kwargs.get("noise", None)
+ noise_shape = list(noise.shape)
+ vace_frames = kwargs.get("vace_frames", None)
+ if vace_frames is None:
+ noise_shape[1] = 32
+ vace_frames = [torch.zeros(noise_shape, device=noise.device, dtype=noise.dtype)]
+
+ mask = kwargs.get("vace_mask", None)
+ if mask is None:
+ noise_shape[1] = 64
+ mask = [torch.ones(noise_shape, device=noise.device, dtype=noise.dtype)] * len(vace_frames)
+
+ vace_frames_out = []
+ for j in range(len(vace_frames)):
+ vf = vace_frames[j].clone()
+ for i in range(0, vf.shape[1], 16):
+ vf[:, i:i + 16] = self.process_latent_in(vf[:, i:i + 16])
+ vf = torch.cat([vf, mask[j]], dim=1)
+ vace_frames_out.append(vf)
+
+ vace_frames = torch.stack(vace_frames_out, dim=1)
+ out['vace_context'] = comfy.conds.CONDRegular(vace_frames)
+
+ vace_strength = kwargs.get("vace_strength", [1.0] * len(vace_frames_out))
+ out['vace_strength'] = comfy.conds.CONDConstant(vace_strength)
+ return out
+
+class WAN21_Camera(WAN21):
+ def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
+ super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.CameraWanModel)
+ self.image_to_video = image_to_video
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ camera_conditions = kwargs.get("camera_conditions", None)
+ if camera_conditions is not None:
+ out['camera_conditions'] = comfy.conds.CONDRegular(camera_conditions)
+ return out
+
+class WAN22(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
+ self.image_to_video = image_to_video
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+ denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
+ if denoise_mask is not None:
+ out["denoise_mask"] = comfy.conds.CONDRegular(denoise_mask)
+ return out
+
+ def process_timestep(self, timestep, x, denoise_mask=None, **kwargs):
+ if denoise_mask is None:
+ return timestep
+ temp_ts = (torch.mean(denoise_mask[:, :, :, :, :], dim=(1, 3, 4), keepdim=True) * timestep.view([timestep.shape[0]] + [1] * (denoise_mask.ndim - 1))).reshape(timestep.shape[0], -1)
+ return temp_ts
+
+ def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
+ return latent_image
+
+class Hunyuan3Dv2(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan3d.model.Hunyuan3Dv2)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+ guidance = kwargs.get("guidance", 5.0)
+ if guidance is not None:
+ out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance]))
+ return out
+
+class HiDream(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hidream.model.HiDreamImageTransformer2DModel)
+
+ def encode_adm(self, **kwargs):
+ return kwargs["pooled_output"]
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+ conditioning_llama3 = kwargs.get("conditioning_llama3", None)
+ if conditioning_llama3 is not None:
+ out['encoder_hidden_states_llama3'] = comfy.conds.CONDRegular(conditioning_llama3)
+ image_cond = kwargs.get("concat_latent_image", None)
+ if image_cond is not None:
+ out['image_cond'] = comfy.conds.CONDNoiseShape(self.process_latent_in(image_cond))
+ return out
+
+class Chroma(Flux):
+ def __init__(self, model_config, model_type=ModelType.FLUX, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.chroma.model.Chroma)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+
+ guidance = kwargs.get("guidance", 0)
+ if guidance is not None:
+ out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance]))
+ return out
+
+class ACEStep(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.ace.model.ACEStepTransformer2DModel)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ noise = kwargs.get("noise", None)
+
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+
+ conditioning_lyrics = kwargs.get("conditioning_lyrics", None)
+ if cross_attn is not None:
+ out['lyric_token_idx'] = comfy.conds.CONDRegular(conditioning_lyrics)
+ out['speaker_embeds'] = comfy.conds.CONDRegular(torch.zeros(noise.shape[0], 512, device=noise.device, dtype=noise.dtype))
+ out['lyrics_strength'] = comfy.conds.CONDConstant(kwargs.get("lyrics_strength", 1.0))
+ return out
+
+class Omnigen2(BaseModel):
+ def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+ super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.omnigen.omnigen2.OmniGen2Transformer2DModel)
+ self.memory_usage_factor_conds = ("ref_latents",)
+
+ def extra_conds(self, **kwargs):
+ out = super().extra_conds(**kwargs)
+ attention_mask = kwargs.get("attention_mask", None)
+ if attention_mask is not None:
+ if torch.numel(attention_mask) != attention_mask.sum():
+ out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
+ out['num_tokens'] = comfy.conds.CONDConstant(max(1, torch.sum(attention_mask).item()))
+ cross_attn = kwargs.get("cross_attn", None)
+ if cross_attn is not None:
+ out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+ ref_latents = kwargs.get("reference_latents", None)
+ if ref_latents is not None:
+ latents = []
+ for lat in ref_latents:
+ latents.append(self.process_latent_in(lat))
+ out['ref_latents'] = comfy.conds.CONDList(latents)
+ return out
+
+ def extra_conds_shapes(self, **kwargs):
+ out = {}
+ ref_latents = kwargs.get("reference_latents", None)
+ if ref_latents is not None:
+ out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
+ return out
diff --git a/ComfyUI/comfy/model_management.py b/ComfyUI/comfy/model_management.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e6149d60454195254518294bf690a3a174daf09
--- /dev/null
+++ b/ComfyUI/comfy/model_management.py
@@ -0,0 +1,1397 @@
+"""
+ This file is part of ComfyUI.
+ Copyright (C) 2024 Comfy
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+"""
+
+import psutil
+import logging
+from enum import Enum
+from comfy.cli_args import args, PerformanceFeature
+import torch
+import sys
+import platform
+import weakref
+import gc
+
+class VRAMState(Enum):
+ DISABLED = 0 #No vram present: no need to move models to vram
+ NO_VRAM = 1 #Very low vram: enable all the options to save vram
+ LOW_VRAM = 2
+ NORMAL_VRAM = 3
+ HIGH_VRAM = 4
+ SHARED = 5 #No dedicated vram: memory shared between CPU and GPU but models still need to be moved between both.
+
+class CPUState(Enum):
+ GPU = 0
+ CPU = 1
+ MPS = 2
+
+# Determine VRAM State
+vram_state = VRAMState.NORMAL_VRAM
+set_vram_to = VRAMState.NORMAL_VRAM
+cpu_state = CPUState.GPU
+
+total_vram = 0
+
+def get_supported_float8_types():
+ float8_types = []
+ try:
+ float8_types.append(torch.float8_e4m3fn)
+ except:
+ pass
+ try:
+ float8_types.append(torch.float8_e4m3fnuz)
+ except:
+ pass
+ try:
+ float8_types.append(torch.float8_e5m2)
+ except:
+ pass
+ try:
+ float8_types.append(torch.float8_e5m2fnuz)
+ except:
+ pass
+ try:
+ float8_types.append(torch.float8_e8m0fnu)
+ except:
+ pass
+ return float8_types
+
+FLOAT8_TYPES = get_supported_float8_types()
+
+xpu_available = False
+torch_version = ""
+try:
+ torch_version = torch.version.__version__
+ temp = torch_version.split(".")
+ torch_version_numeric = (int(temp[0]), int(temp[1]))
+ xpu_available = (torch_version_numeric[0] < 2 or (torch_version_numeric[0] == 2 and torch_version_numeric[1] <= 4)) and torch.xpu.is_available()
+except:
+ pass
+
+lowvram_available = True
+if args.deterministic:
+ logging.info("Using deterministic algorithms for pytorch")
+ torch.use_deterministic_algorithms(True, warn_only=True)
+
+directml_enabled = False
+if args.directml is not None:
+ import torch_directml
+ directml_enabled = True
+ device_index = args.directml
+ if device_index < 0:
+ directml_device = torch_directml.device()
+ else:
+ directml_device = torch_directml.device(device_index)
+ logging.info("Using directml with device: {}".format(torch_directml.device_name(device_index)))
+ # torch_directml.disable_tiled_resources(True)
+ lowvram_available = False #TODO: need to find a way to get free memory in directml before this can be enabled by default.
+
+try:
+ import intel_extension_for_pytorch as ipex # noqa: F401
+ _ = torch.xpu.device_count()
+ xpu_available = xpu_available or torch.xpu.is_available()
+except:
+ xpu_available = xpu_available or (hasattr(torch, "xpu") and torch.xpu.is_available())
+
+try:
+ if torch.backends.mps.is_available():
+ cpu_state = CPUState.MPS
+ import torch.mps
+except:
+ pass
+
+try:
+ import torch_npu # noqa: F401
+ _ = torch.npu.device_count()
+ npu_available = torch.npu.is_available()
+except:
+ npu_available = False
+
+try:
+ import torch_mlu # noqa: F401
+ _ = torch.mlu.device_count()
+ mlu_available = torch.mlu.is_available()
+except:
+ mlu_available = False
+
+try:
+ ixuca_available = hasattr(torch, "corex")
+except:
+ ixuca_available = False
+
+if args.cpu:
+ cpu_state = CPUState.CPU
+
+def is_intel_xpu():
+ global cpu_state
+ global xpu_available
+ if cpu_state == CPUState.GPU:
+ if xpu_available:
+ return True
+ return False
+
+def is_ascend_npu():
+ global npu_available
+ if npu_available:
+ return True
+ return False
+
+def is_mlu():
+ global mlu_available
+ if mlu_available:
+ return True
+ return False
+
+def is_ixuca():
+ global ixuca_available
+ if ixuca_available:
+ return True
+ return False
+
+def get_torch_device():
+ global directml_enabled
+ global cpu_state
+ if directml_enabled:
+ global directml_device
+ return directml_device
+ if cpu_state == CPUState.MPS:
+ return torch.device("mps")
+ if cpu_state == CPUState.CPU:
+ return torch.device("cpu")
+ else:
+ if is_intel_xpu():
+ return torch.device("xpu", torch.xpu.current_device())
+ elif is_ascend_npu():
+ return torch.device("npu", torch.npu.current_device())
+ elif is_mlu():
+ return torch.device("mlu", torch.mlu.current_device())
+ else:
+ return torch.device(torch.cuda.current_device())
+
+def get_total_memory(dev=None, torch_total_too=False):
+ global directml_enabled
+ if dev is None:
+ dev = get_torch_device()
+
+ if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'):
+ mem_total = psutil.virtual_memory().total
+ mem_total_torch = mem_total
+ else:
+ if directml_enabled:
+ mem_total = 1024 * 1024 * 1024 #TODO
+ mem_total_torch = mem_total
+ elif is_intel_xpu():
+ stats = torch.xpu.memory_stats(dev)
+ mem_reserved = stats['reserved_bytes.all.current']
+ mem_total_xpu = torch.xpu.get_device_properties(dev).total_memory
+ mem_total_torch = mem_reserved
+ mem_total = mem_total_xpu
+ elif is_ascend_npu():
+ stats = torch.npu.memory_stats(dev)
+ mem_reserved = stats['reserved_bytes.all.current']
+ _, mem_total_npu = torch.npu.mem_get_info(dev)
+ mem_total_torch = mem_reserved
+ mem_total = mem_total_npu
+ elif is_mlu():
+ stats = torch.mlu.memory_stats(dev)
+ mem_reserved = stats['reserved_bytes.all.current']
+ _, mem_total_mlu = torch.mlu.mem_get_info(dev)
+ mem_total_torch = mem_reserved
+ mem_total = mem_total_mlu
+ else:
+ stats = torch.cuda.memory_stats(dev)
+ mem_reserved = stats['reserved_bytes.all.current']
+ _, mem_total_cuda = torch.cuda.mem_get_info(dev)
+ mem_total_torch = mem_reserved
+ mem_total = mem_total_cuda
+
+ if torch_total_too:
+ return (mem_total, mem_total_torch)
+ else:
+ return mem_total
+
+def mac_version():
+ try:
+ return tuple(int(n) for n in platform.mac_ver()[0].split("."))
+ except:
+ return None
+
+total_vram = get_total_memory(get_torch_device()) / (1024 * 1024)
+total_ram = psutil.virtual_memory().total / (1024 * 1024)
+logging.info("Total VRAM {:0.0f} MB, total RAM {:0.0f} MB".format(total_vram, total_ram))
+
+try:
+ logging.info("pytorch version: {}".format(torch_version))
+ mac_ver = mac_version()
+ if mac_ver is not None:
+ logging.info("Mac Version {}".format(mac_ver))
+except:
+ pass
+
+try:
+ OOM_EXCEPTION = torch.cuda.OutOfMemoryError
+except:
+ OOM_EXCEPTION = Exception
+
+XFORMERS_VERSION = ""
+XFORMERS_ENABLED_VAE = True
+if args.disable_xformers:
+ XFORMERS_IS_AVAILABLE = False
+else:
+ try:
+ import xformers
+ import xformers.ops
+ XFORMERS_IS_AVAILABLE = True
+ try:
+ XFORMERS_IS_AVAILABLE = xformers._has_cpp_library
+ except:
+ pass
+ try:
+ XFORMERS_VERSION = xformers.version.__version__
+ logging.info("xformers version: {}".format(XFORMERS_VERSION))
+ if XFORMERS_VERSION.startswith("0.0.18"):
+ logging.warning("\nWARNING: This version of xformers has a major bug where you will get black images when generating high resolution images.")
+ logging.warning("Please downgrade or upgrade xformers to a different version.\n")
+ XFORMERS_ENABLED_VAE = False
+ except:
+ pass
+ except:
+ XFORMERS_IS_AVAILABLE = False
+
+def is_nvidia():
+ global cpu_state
+ if cpu_state == CPUState.GPU:
+ if torch.version.cuda:
+ return True
+ return False
+
+def is_amd():
+ global cpu_state
+ if cpu_state == CPUState.GPU:
+ if torch.version.hip:
+ return True
+ return False
+
+MIN_WEIGHT_MEMORY_RATIO = 0.4
+if is_nvidia():
+ MIN_WEIGHT_MEMORY_RATIO = 0.0
+
+ENABLE_PYTORCH_ATTENTION = False
+if args.use_pytorch_cross_attention:
+ ENABLE_PYTORCH_ATTENTION = True
+ XFORMERS_IS_AVAILABLE = False
+
+try:
+ if is_nvidia():
+ if torch_version_numeric[0] >= 2:
+ if ENABLE_PYTORCH_ATTENTION == False and args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
+ ENABLE_PYTORCH_ATTENTION = True
+ if is_intel_xpu() or is_ascend_npu() or is_mlu() or is_ixuca():
+ if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
+ ENABLE_PYTORCH_ATTENTION = True
+except:
+ pass
+
+
+SUPPORT_FP8_OPS = args.supports_fp8_compute
+try:
+ if is_amd():
+ try:
+ rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
+ except:
+ rocm_version = (6, -1)
+ arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
+ logging.info("AMD arch: {}".format(arch))
+ logging.info("ROCm version: {}".format(rocm_version))
+ if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
+ if torch_version_numeric >= (2, 7): # works on 2.6 but doesn't actually seem to improve much
+ if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]): # TODO: more arches, TODO: gfx950
+ ENABLE_PYTORCH_ATTENTION = True
+ if torch_version_numeric >= (2, 8):
+ if any((a in arch) for a in ["gfx1201"]):
+ ENABLE_PYTORCH_ATTENTION = True
+ if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4):
+ if any((a in arch) for a in ["gfx1201", "gfx942", "gfx950"]): # TODO: more arches
+ SUPPORT_FP8_OPS = True
+
+except:
+ pass
+
+
+if ENABLE_PYTORCH_ATTENTION:
+ torch.backends.cuda.enable_math_sdp(True)
+ torch.backends.cuda.enable_flash_sdp(True)
+ torch.backends.cuda.enable_mem_efficient_sdp(True)
+
+
+PRIORITIZE_FP16 = False # TODO: remove and replace with something that shows exactly which dtype is faster than the other
+try:
+ if is_nvidia() and PerformanceFeature.Fp16Accumulation in args.fast:
+ torch.backends.cuda.matmul.allow_fp16_accumulation = True
+ PRIORITIZE_FP16 = True # TODO: limit to cards where it actually boosts performance
+ logging.info("Enabled fp16 accumulation.")
+except:
+ pass
+
+try:
+ if torch_version_numeric >= (2, 5):
+ torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)
+except:
+ logging.warning("Warning, could not set allow_fp16_bf16_reduction_math_sdp")
+
+if args.lowvram:
+ set_vram_to = VRAMState.LOW_VRAM
+ lowvram_available = True
+elif args.novram:
+ set_vram_to = VRAMState.NO_VRAM
+elif args.highvram or args.gpu_only:
+ vram_state = VRAMState.HIGH_VRAM
+
+FORCE_FP32 = False
+if args.force_fp32:
+ logging.info("Forcing FP32, if this improves things please report it.")
+ FORCE_FP32 = True
+
+if lowvram_available:
+ if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM):
+ vram_state = set_vram_to
+
+
+if cpu_state != CPUState.GPU:
+ vram_state = VRAMState.DISABLED
+
+if cpu_state == CPUState.MPS:
+ vram_state = VRAMState.SHARED
+
+logging.info(f"Set vram state to: {vram_state.name}")
+
+DISABLE_SMART_MEMORY = args.disable_smart_memory
+
+if DISABLE_SMART_MEMORY:
+ logging.info("Disabling smart memory management")
+
+def get_torch_device_name(device):
+ if hasattr(device, 'type'):
+ if device.type == "cuda":
+ try:
+ allocator_backend = torch.cuda.get_allocator_backend()
+ except:
+ allocator_backend = ""
+ return "{} {} : {}".format(device, torch.cuda.get_device_name(device), allocator_backend)
+ elif device.type == "xpu":
+ return "{} {}".format(device, torch.xpu.get_device_name(device))
+ else:
+ return "{}".format(device.type)
+ elif is_intel_xpu():
+ return "{} {}".format(device, torch.xpu.get_device_name(device))
+ elif is_ascend_npu():
+ return "{} {}".format(device, torch.npu.get_device_name(device))
+ elif is_mlu():
+ return "{} {}".format(device, torch.mlu.get_device_name(device))
+ else:
+ return "CUDA {}: {}".format(device, torch.cuda.get_device_name(device))
+
+try:
+ logging.info("Device: {}".format(get_torch_device_name(get_torch_device())))
+except:
+ logging.warning("Could not pick default device.")
+
+
+current_loaded_models = []
+
+def module_size(module):
+ module_mem = 0
+ sd = module.state_dict()
+ for k in sd:
+ t = sd[k]
+ module_mem += t.nelement() * t.element_size()
+ return module_mem
+
+class LoadedModel:
+ def __init__(self, model):
+ self._set_model(model)
+ self.device = model.load_device
+ self.real_model = None
+ self.currently_used = True
+ self.model_finalizer = None
+ self._patcher_finalizer = None
+
+ def _set_model(self, model):
+ self._model = weakref.ref(model)
+ if model.parent is not None:
+ self._parent_model = weakref.ref(model.parent)
+ self._patcher_finalizer = weakref.finalize(model, self._switch_parent)
+
+ def _switch_parent(self):
+ model = self._parent_model()
+ if model is not None:
+ self._set_model(model)
+
+ @property
+ def model(self):
+ return self._model()
+
+ def model_memory(self):
+ return self.model.model_size()
+
+ def model_loaded_memory(self):
+ return self.model.loaded_size()
+
+ def model_offloaded_memory(self):
+ return self.model.model_size() - self.model.loaded_size()
+
+ def model_memory_required(self, device):
+ if device == self.model.current_loaded_device():
+ return self.model_offloaded_memory()
+ else:
+ return self.model_memory()
+
+ def model_load(self, lowvram_model_memory=0, force_patch_weights=False):
+ self.model.model_patches_to(self.device)
+ self.model.model_patches_to(self.model.model_dtype())
+
+ # if self.model.loaded_size() > 0:
+ use_more_vram = lowvram_model_memory
+ if use_more_vram == 0:
+ use_more_vram = 1e32
+ self.model_use_more_vram(use_more_vram, force_patch_weights=force_patch_weights)
+ real_model = self.model.model
+
+ if is_intel_xpu() and not args.disable_ipex_optimize and 'ipex' in globals() and real_model is not None:
+ with torch.no_grad():
+ real_model = ipex.optimize(real_model.eval(), inplace=True, graph_mode=True, concat_linear=True)
+
+ self.real_model = weakref.ref(real_model)
+ self.model_finalizer = weakref.finalize(real_model, cleanup_models)
+ return real_model
+
+ def should_reload_model(self, force_patch_weights=False):
+ if force_patch_weights and self.model.lowvram_patch_counter() > 0:
+ return True
+ return False
+
+ def model_unload(self, memory_to_free=None, unpatch_weights=True):
+ if memory_to_free is not None:
+ if memory_to_free < self.model.loaded_size():
+ freed = self.model.partially_unload(self.model.offload_device, memory_to_free)
+ if freed >= memory_to_free:
+ return False
+ self.model.detach(unpatch_weights)
+ self.model_finalizer.detach()
+ self.model_finalizer = None
+ self.real_model = None
+ return True
+
+ def model_use_more_vram(self, extra_memory, force_patch_weights=False):
+ return self.model.partially_load(self.device, extra_memory, force_patch_weights=force_patch_weights)
+
+ def __eq__(self, other):
+ return self.model is other.model
+
+ def __del__(self):
+ if self._patcher_finalizer is not None:
+ self._patcher_finalizer.detach()
+
+ def is_dead(self):
+ return self.real_model() is not None and self.model is None
+
+
+def use_more_memory(extra_memory, loaded_models, device):
+ for m in loaded_models:
+ if m.device == device:
+ extra_memory -= m.model_use_more_vram(extra_memory)
+ if extra_memory <= 0:
+ break
+
+def offloaded_memory(loaded_models, device):
+ offloaded_mem = 0
+ for m in loaded_models:
+ if m.device == device:
+ offloaded_mem += m.model_offloaded_memory()
+ return offloaded_mem
+
+WINDOWS = any(platform.win32_ver())
+
+EXTRA_RESERVED_VRAM = 400 * 1024 * 1024
+if WINDOWS:
+ EXTRA_RESERVED_VRAM = 600 * 1024 * 1024 #Windows is higher because of the shared vram issue
+ if total_vram > (15 * 1024): # more extra reserved vram on 16GB+ cards
+ EXTRA_RESERVED_VRAM += 100 * 1024 * 1024
+
+if args.reserve_vram is not None:
+ EXTRA_RESERVED_VRAM = args.reserve_vram * 1024 * 1024 * 1024
+ logging.debug("Reserving {}MB vram for other applications.".format(EXTRA_RESERVED_VRAM / (1024 * 1024)))
+
+def extra_reserved_memory():
+ return EXTRA_RESERVED_VRAM
+
+def minimum_inference_memory():
+ return (1024 * 1024 * 1024) * 0.8 + extra_reserved_memory()
+
+def free_memory(memory_required, device, keep_loaded=[]):
+ cleanup_models_gc()
+ unloaded_model = []
+ can_unload = []
+ unloaded_models = []
+
+ for i in range(len(current_loaded_models) -1, -1, -1):
+ shift_model = current_loaded_models[i]
+ if shift_model.device == device:
+ if shift_model not in keep_loaded and not shift_model.is_dead():
+ can_unload.append((-shift_model.model_offloaded_memory(), sys.getrefcount(shift_model.model), shift_model.model_memory(), i))
+ shift_model.currently_used = False
+
+ for x in sorted(can_unload):
+ i = x[-1]
+ memory_to_free = None
+ if not DISABLE_SMART_MEMORY:
+ free_mem = get_free_memory(device)
+ if free_mem > memory_required:
+ break
+ memory_to_free = memory_required - free_mem
+ logging.debug(f"Unloading {current_loaded_models[i].model.model.__class__.__name__}")
+ if current_loaded_models[i].model_unload(memory_to_free):
+ unloaded_model.append(i)
+
+ for i in sorted(unloaded_model, reverse=True):
+ unloaded_models.append(current_loaded_models.pop(i))
+
+ if len(unloaded_model) > 0:
+ soft_empty_cache()
+ else:
+ if vram_state != VRAMState.HIGH_VRAM:
+ mem_free_total, mem_free_torch = get_free_memory(device, torch_free_too=True)
+ if mem_free_torch > mem_free_total * 0.25:
+ soft_empty_cache()
+ return unloaded_models
+
+def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
+ cleanup_models_gc()
+ global vram_state
+
+ inference_memory = minimum_inference_memory()
+ extra_mem = max(inference_memory, memory_required + extra_reserved_memory())
+ if minimum_memory_required is None:
+ minimum_memory_required = extra_mem
+ else:
+ minimum_memory_required = max(inference_memory, minimum_memory_required + extra_reserved_memory())
+
+ models = set(models)
+
+ models_to_load = []
+
+ for x in models:
+ loaded_model = LoadedModel(x)
+ try:
+ loaded_model_index = current_loaded_models.index(loaded_model)
+ except:
+ loaded_model_index = None
+
+ if loaded_model_index is not None:
+ loaded = current_loaded_models[loaded_model_index]
+ loaded.currently_used = True
+ models_to_load.append(loaded)
+ else:
+ if hasattr(x, "model"):
+ logging.info(f"Requested to load {x.model.__class__.__name__}")
+ models_to_load.append(loaded_model)
+
+ for loaded_model in models_to_load:
+ to_unload = []
+ for i in range(len(current_loaded_models)):
+ if loaded_model.model.is_clone(current_loaded_models[i].model):
+ to_unload = [i] + to_unload
+ for i in to_unload:
+ current_loaded_models.pop(i).model.detach(unpatch_all=False)
+
+ total_memory_required = {}
+ for loaded_model in models_to_load:
+ total_memory_required[loaded_model.device] = total_memory_required.get(loaded_model.device, 0) + loaded_model.model_memory_required(loaded_model.device)
+
+ for device in total_memory_required:
+ if device != torch.device("cpu"):
+ free_memory(total_memory_required[device] * 1.1 + extra_mem, device)
+
+ for device in total_memory_required:
+ if device != torch.device("cpu"):
+ free_mem = get_free_memory(device)
+ if free_mem < minimum_memory_required:
+ models_l = free_memory(minimum_memory_required, device)
+ logging.info("{} models unloaded.".format(len(models_l)))
+
+ for loaded_model in models_to_load:
+ model = loaded_model.model
+ torch_dev = model.load_device
+ if is_device_cpu(torch_dev):
+ vram_set_state = VRAMState.DISABLED
+ else:
+ vram_set_state = vram_state
+ lowvram_model_memory = 0
+ if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM) and not force_full_load:
+ loaded_memory = loaded_model.model_loaded_memory()
+ current_free_mem = get_free_memory(torch_dev) + loaded_memory
+
+ lowvram_model_memory = max(128 * 1024 * 1024, (current_free_mem - minimum_memory_required), min(current_free_mem * MIN_WEIGHT_MEMORY_RATIO, current_free_mem - minimum_inference_memory()))
+ lowvram_model_memory = max(0.1, lowvram_model_memory - loaded_memory)
+
+ if vram_set_state == VRAMState.NO_VRAM:
+ lowvram_model_memory = 0.1
+
+ loaded_model.model_load(lowvram_model_memory, force_patch_weights=force_patch_weights)
+ current_loaded_models.insert(0, loaded_model)
+ return
+
+def load_model_gpu(model):
+ return load_models_gpu([model])
+
+def loaded_models(only_currently_used=False):
+ output = []
+ for m in current_loaded_models:
+ if only_currently_used:
+ if not m.currently_used:
+ continue
+
+ output.append(m.model)
+ return output
+
+
+def cleanup_models_gc():
+ do_gc = False
+ for i in range(len(current_loaded_models)):
+ cur = current_loaded_models[i]
+ if cur.is_dead():
+ logging.info("Potential memory leak detected with model {}, doing a full garbage collect, for maximum performance avoid circular references in the model code.".format(cur.real_model().__class__.__name__))
+ do_gc = True
+ break
+
+ if do_gc:
+ gc.collect()
+ soft_empty_cache()
+
+ for i in range(len(current_loaded_models)):
+ cur = current_loaded_models[i]
+ if cur.is_dead():
+ logging.warning("WARNING, memory leak with model {}. Please make sure it is not being referenced from somewhere.".format(cur.real_model().__class__.__name__))
+
+
+
+def cleanup_models():
+ to_delete = []
+ for i in range(len(current_loaded_models)):
+ if current_loaded_models[i].real_model() is None:
+ to_delete = [i] + to_delete
+
+ for i in to_delete:
+ x = current_loaded_models.pop(i)
+ del x
+
+def dtype_size(dtype):
+ dtype_size = 4
+ if dtype == torch.float16 or dtype == torch.bfloat16:
+ dtype_size = 2
+ elif dtype == torch.float32:
+ dtype_size = 4
+ else:
+ try:
+ dtype_size = dtype.itemsize
+ except: #Old pytorch doesn't have .itemsize
+ pass
+ return dtype_size
+
+def unet_offload_device():
+ if vram_state == VRAMState.HIGH_VRAM:
+ return get_torch_device()
+ else:
+ return torch.device("cpu")
+
+def unet_inital_load_device(parameters, dtype):
+ torch_dev = get_torch_device()
+ if vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.SHARED:
+ return torch_dev
+
+ cpu_dev = torch.device("cpu")
+ if DISABLE_SMART_MEMORY or vram_state == VRAMState.NO_VRAM:
+ return cpu_dev
+
+ model_size = dtype_size(dtype) * parameters
+
+ mem_dev = get_free_memory(torch_dev)
+ mem_cpu = get_free_memory(cpu_dev)
+ if mem_dev > mem_cpu and model_size < mem_dev:
+ return torch_dev
+ else:
+ return cpu_dev
+
+def maximum_vram_for_weights(device=None):
+ return (get_total_memory(device) * 0.88 - minimum_inference_memory())
+
+def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, torch.bfloat16, torch.float32], weight_dtype=None):
+ if model_params < 0:
+ model_params = 1000000000000000000000
+ if args.fp32_unet:
+ return torch.float32
+ if args.fp64_unet:
+ return torch.float64
+ if args.bf16_unet:
+ return torch.bfloat16
+ if args.fp16_unet:
+ return torch.float16
+ if args.fp8_e4m3fn_unet:
+ return torch.float8_e4m3fn
+ if args.fp8_e5m2_unet:
+ return torch.float8_e5m2
+ if args.fp8_e8m0fnu_unet:
+ return torch.float8_e8m0fnu
+
+ fp8_dtype = None
+ if weight_dtype in FLOAT8_TYPES:
+ fp8_dtype = weight_dtype
+
+ if fp8_dtype is not None:
+ if supports_fp8_compute(device): #if fp8 compute is supported the casting is most likely not expensive
+ return fp8_dtype
+
+ free_model_memory = maximum_vram_for_weights(device)
+ if model_params * 2 > free_model_memory:
+ return fp8_dtype
+
+ if PRIORITIZE_FP16 or weight_dtype == torch.float16:
+ if torch.float16 in supported_dtypes and should_use_fp16(device=device, model_params=model_params):
+ return torch.float16
+
+ for dt in supported_dtypes:
+ if dt == torch.float16 and should_use_fp16(device=device, model_params=model_params):
+ if torch.float16 in supported_dtypes:
+ return torch.float16
+ if dt == torch.bfloat16 and should_use_bf16(device, model_params=model_params):
+ if torch.bfloat16 in supported_dtypes:
+ return torch.bfloat16
+
+ for dt in supported_dtypes:
+ if dt == torch.float16 and should_use_fp16(device=device, model_params=model_params, manual_cast=True):
+ if torch.float16 in supported_dtypes:
+ return torch.float16
+ if dt == torch.bfloat16 and should_use_bf16(device, model_params=model_params, manual_cast=True):
+ if torch.bfloat16 in supported_dtypes:
+ return torch.bfloat16
+
+ return torch.float32
+
+# None means no manual cast
+def unet_manual_cast(weight_dtype, inference_device, supported_dtypes=[torch.float16, torch.bfloat16, torch.float32]):
+ if weight_dtype == torch.float32 or weight_dtype == torch.float64:
+ return None
+
+ fp16_supported = should_use_fp16(inference_device, prioritize_performance=False)
+ if fp16_supported and weight_dtype == torch.float16:
+ return None
+
+ bf16_supported = should_use_bf16(inference_device)
+ if bf16_supported and weight_dtype == torch.bfloat16:
+ return None
+
+ fp16_supported = should_use_fp16(inference_device, prioritize_performance=True)
+ if PRIORITIZE_FP16 and fp16_supported and torch.float16 in supported_dtypes:
+ return torch.float16
+
+ for dt in supported_dtypes:
+ if dt == torch.float16 and fp16_supported:
+ return torch.float16
+ if dt == torch.bfloat16 and bf16_supported:
+ return torch.bfloat16
+
+ return torch.float32
+
+def text_encoder_offload_device():
+ if args.gpu_only:
+ return get_torch_device()
+ else:
+ return torch.device("cpu")
+
+def text_encoder_device():
+ if args.gpu_only:
+ return get_torch_device()
+ elif vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.NORMAL_VRAM:
+ if should_use_fp16(prioritize_performance=False):
+ return get_torch_device()
+ else:
+ return torch.device("cpu")
+ else:
+ return torch.device("cpu")
+
+def text_encoder_initial_device(load_device, offload_device, model_size=0):
+ if load_device == offload_device or model_size <= 1024 * 1024 * 1024:
+ return offload_device
+
+ if is_device_mps(load_device):
+ return load_device
+
+ mem_l = get_free_memory(load_device)
+ mem_o = get_free_memory(offload_device)
+ if mem_l > (mem_o * 0.5) and model_size * 1.2 < mem_l:
+ return load_device
+ else:
+ return offload_device
+
+def text_encoder_dtype(device=None):
+ if args.fp8_e4m3fn_text_enc:
+ return torch.float8_e4m3fn
+ elif args.fp8_e5m2_text_enc:
+ return torch.float8_e5m2
+ elif args.fp16_text_enc:
+ return torch.float16
+ elif args.bf16_text_enc:
+ return torch.bfloat16
+ elif args.fp32_text_enc:
+ return torch.float32
+
+ if is_device_cpu(device):
+ return torch.float16
+
+ return torch.float16
+
+
+def intermediate_device():
+ if args.gpu_only:
+ return get_torch_device()
+ else:
+ return torch.device("cpu")
+
+def vae_device():
+ if args.cpu_vae:
+ return torch.device("cpu")
+ return get_torch_device()
+
+def vae_offload_device():
+ if args.gpu_only:
+ return get_torch_device()
+ else:
+ return torch.device("cpu")
+
+def vae_dtype(device=None, allowed_dtypes=[]):
+ if args.fp16_vae:
+ return torch.float16
+ elif args.bf16_vae:
+ return torch.bfloat16
+ elif args.fp32_vae:
+ return torch.float32
+
+ for d in allowed_dtypes:
+ if d == torch.float16 and should_use_fp16(device):
+ return d
+
+ # NOTE: bfloat16 seems to work on AMD for the VAE but is extremely slow in some cases compared to fp32
+ # slowness still a problem on pytorch nightly 2.9.0.dev20250720+rocm6.4 tested on RDNA3
+ if d == torch.bfloat16 and (not is_amd()) and should_use_bf16(device):
+ return d
+
+ return torch.float32
+
+def get_autocast_device(dev):
+ if hasattr(dev, 'type'):
+ return dev.type
+ return "cuda"
+
+def supports_dtype(device, dtype): #TODO
+ if dtype == torch.float32:
+ return True
+ if is_device_cpu(device):
+ return False
+ if dtype == torch.float16:
+ return True
+ if dtype == torch.bfloat16:
+ return True
+ return False
+
+def supports_cast(device, dtype): #TODO
+ if dtype == torch.float32:
+ return True
+ if dtype == torch.float16:
+ return True
+ if directml_enabled: #TODO: test this
+ return False
+ if dtype == torch.bfloat16:
+ return True
+ if is_device_mps(device):
+ return False
+ if dtype == torch.float8_e4m3fn:
+ return True
+ if dtype == torch.float8_e5m2:
+ return True
+ return False
+
+def pick_weight_dtype(dtype, fallback_dtype, device=None):
+ if dtype is None:
+ dtype = fallback_dtype
+ elif dtype_size(dtype) > dtype_size(fallback_dtype):
+ dtype = fallback_dtype
+
+ if not supports_cast(device, dtype):
+ dtype = fallback_dtype
+
+ return dtype
+
+def device_supports_non_blocking(device):
+ if is_device_mps(device):
+ return False #pytorch bug? mps doesn't support non blocking
+ if is_intel_xpu():
+ return True
+ if args.deterministic: #TODO: figure out why deterministic breaks non blocking from gpu to cpu (previews)
+ return False
+ if directml_enabled:
+ return False
+ return True
+
+def device_should_use_non_blocking(device):
+ if not device_supports_non_blocking(device):
+ return False
+ return False
+ # return True #TODO: figure out why this causes memory issues on Nvidia and possibly others
+
+def force_channels_last():
+ if args.force_channels_last:
+ return True
+
+ #TODO
+ return False
+
+
+STREAMS = {}
+NUM_STREAMS = 1
+if args.async_offload:
+ NUM_STREAMS = 2
+ logging.info("Using async weight offloading with {} streams".format(NUM_STREAMS))
+
+stream_counters = {}
+def get_offload_stream(device):
+ stream_counter = stream_counters.get(device, 0)
+ if NUM_STREAMS <= 1:
+ return None
+
+ if device in STREAMS:
+ ss = STREAMS[device]
+ s = ss[stream_counter]
+ stream_counter = (stream_counter + 1) % len(ss)
+ if is_device_cuda(device):
+ ss[stream_counter].wait_stream(torch.cuda.current_stream())
+ elif is_device_xpu(device):
+ ss[stream_counter].wait_stream(torch.xpu.current_stream())
+ stream_counters[device] = stream_counter
+ return s
+ elif is_device_cuda(device):
+ ss = []
+ for k in range(NUM_STREAMS):
+ ss.append(torch.cuda.Stream(device=device, priority=0))
+ STREAMS[device] = ss
+ s = ss[stream_counter]
+ stream_counter = (stream_counter + 1) % len(ss)
+ stream_counters[device] = stream_counter
+ return s
+ elif is_device_xpu(device):
+ ss = []
+ for k in range(NUM_STREAMS):
+ ss.append(torch.xpu.Stream(device=device, priority=0))
+ STREAMS[device] = ss
+ s = ss[stream_counter]
+ stream_counter = (stream_counter + 1) % len(ss)
+ stream_counters[device] = stream_counter
+ return s
+ return None
+
+def sync_stream(device, stream):
+ if stream is None:
+ return
+ if is_device_cuda(device):
+ torch.cuda.current_stream().wait_stream(stream)
+ elif is_device_xpu(device):
+ torch.xpu.current_stream().wait_stream(stream)
+
+def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None):
+ if device is None or weight.device == device:
+ if not copy:
+ if dtype is None or weight.dtype == dtype:
+ return weight
+ if stream is not None:
+ with stream:
+ return weight.to(dtype=dtype, copy=copy)
+ return weight.to(dtype=dtype, copy=copy)
+
+ if stream is not None:
+ with stream:
+ r = torch.empty_like(weight, dtype=dtype, device=device)
+ r.copy_(weight, non_blocking=non_blocking)
+ else:
+ r = torch.empty_like(weight, dtype=dtype, device=device)
+ r.copy_(weight, non_blocking=non_blocking)
+ return r
+
+def cast_to_device(tensor, device, dtype, copy=False):
+ non_blocking = device_supports_non_blocking(device)
+ return cast_to(tensor, dtype=dtype, device=device, non_blocking=non_blocking, copy=copy)
+
+def sage_attention_enabled():
+ return args.use_sage_attention
+
+def flash_attention_enabled():
+ return args.use_flash_attention
+
+def xformers_enabled():
+ global directml_enabled
+ global cpu_state
+ if cpu_state != CPUState.GPU:
+ return False
+ if is_intel_xpu():
+ return False
+ if is_ascend_npu():
+ return False
+ if is_mlu():
+ return False
+ if is_ixuca():
+ return False
+ if directml_enabled:
+ return False
+ return XFORMERS_IS_AVAILABLE
+
+
+def xformers_enabled_vae():
+ enabled = xformers_enabled()
+ if not enabled:
+ return False
+
+ return XFORMERS_ENABLED_VAE
+
+def pytorch_attention_enabled():
+ global ENABLE_PYTORCH_ATTENTION
+ return ENABLE_PYTORCH_ATTENTION
+
+def pytorch_attention_enabled_vae():
+ if is_amd():
+ return False # enabling pytorch attention on AMD currently causes crash when doing high res
+ return pytorch_attention_enabled()
+
+def pytorch_attention_flash_attention():
+ global ENABLE_PYTORCH_ATTENTION
+ if ENABLE_PYTORCH_ATTENTION:
+ #TODO: more reliable way of checking for flash attention?
+ if is_nvidia():
+ return True
+ if is_intel_xpu():
+ return True
+ if is_ascend_npu():
+ return True
+ if is_mlu():
+ return True
+ if is_amd():
+ return True #if you have pytorch attention enabled on AMD it probably supports at least mem efficient attention
+ if is_ixuca():
+ return True
+ return False
+
+def force_upcast_attention_dtype():
+ upcast = args.force_upcast_attention
+
+ macos_version = mac_version()
+ if macos_version is not None and ((14, 5) <= macos_version): # black image bug on recent versions of macOS, I don't think it's ever getting fixed
+ upcast = True
+
+ if upcast:
+ return {torch.float16: torch.float32}
+ else:
+ return None
+
+def get_free_memory(dev=None, torch_free_too=False):
+ global directml_enabled
+ if dev is None:
+ dev = get_torch_device()
+
+ if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'):
+ mem_free_total = psutil.virtual_memory().available
+ mem_free_torch = mem_free_total
+ else:
+ if directml_enabled:
+ mem_free_total = 1024 * 1024 * 1024 #TODO
+ mem_free_torch = mem_free_total
+ elif is_intel_xpu():
+ stats = torch.xpu.memory_stats(dev)
+ mem_active = stats['active_bytes.all.current']
+ mem_reserved = stats['reserved_bytes.all.current']
+ mem_free_xpu = torch.xpu.get_device_properties(dev).total_memory - mem_reserved
+ mem_free_torch = mem_reserved - mem_active
+ mem_free_total = mem_free_xpu + mem_free_torch
+ elif is_ascend_npu():
+ stats = torch.npu.memory_stats(dev)
+ mem_active = stats['active_bytes.all.current']
+ mem_reserved = stats['reserved_bytes.all.current']
+ mem_free_npu, _ = torch.npu.mem_get_info(dev)
+ mem_free_torch = mem_reserved - mem_active
+ mem_free_total = mem_free_npu + mem_free_torch
+ elif is_mlu():
+ stats = torch.mlu.memory_stats(dev)
+ mem_active = stats['active_bytes.all.current']
+ mem_reserved = stats['reserved_bytes.all.current']
+ mem_free_mlu, _ = torch.mlu.mem_get_info(dev)
+ mem_free_torch = mem_reserved - mem_active
+ mem_free_total = mem_free_mlu + mem_free_torch
+ else:
+ stats = torch.cuda.memory_stats(dev)
+ mem_active = stats['active_bytes.all.current']
+ mem_reserved = stats['reserved_bytes.all.current']
+ mem_free_cuda, _ = torch.cuda.mem_get_info(dev)
+ mem_free_torch = mem_reserved - mem_active
+ mem_free_total = mem_free_cuda + mem_free_torch
+
+ if torch_free_too:
+ return (mem_free_total, mem_free_torch)
+ else:
+ return mem_free_total
+
+def cpu_mode():
+ global cpu_state
+ return cpu_state == CPUState.CPU
+
+def mps_mode():
+ global cpu_state
+ return cpu_state == CPUState.MPS
+
+def is_device_type(device, type):
+ if hasattr(device, 'type'):
+ if (device.type == type):
+ return True
+ return False
+
+def is_device_cpu(device):
+ return is_device_type(device, 'cpu')
+
+def is_device_mps(device):
+ return is_device_type(device, 'mps')
+
+def is_device_xpu(device):
+ return is_device_type(device, 'xpu')
+
+def is_device_cuda(device):
+ return is_device_type(device, 'cuda')
+
+def is_directml_enabled():
+ global directml_enabled
+ if directml_enabled:
+ return True
+
+ return False
+
+def should_use_fp16(device=None, model_params=0, prioritize_performance=True, manual_cast=False):
+ if device is not None:
+ if is_device_cpu(device):
+ return False
+
+ if args.force_fp16:
+ return True
+
+ if FORCE_FP32:
+ return False
+
+ if is_directml_enabled():
+ return True
+
+ if (device is not None and is_device_mps(device)) or mps_mode():
+ return True
+
+ if cpu_mode():
+ return False
+
+ if is_intel_xpu():
+ if torch_version_numeric < (2, 3):
+ return True
+ else:
+ return torch.xpu.get_device_properties(device).has_fp16
+
+ if is_ascend_npu():
+ return True
+
+ if is_mlu():
+ return True
+
+ if is_ixuca():
+ return True
+
+ if torch.version.hip:
+ return True
+
+ props = torch.cuda.get_device_properties(device)
+ if props.major >= 8:
+ return True
+
+ if props.major < 6:
+ return False
+
+ #FP16 is confirmed working on a 1080 (GP104) and on latest pytorch actually seems faster than fp32
+ nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050", "p40", "p100", "p6", "p4"]
+ for x in nvidia_10_series:
+ if x in props.name.lower():
+ if WINDOWS or manual_cast:
+ return True
+ else:
+ return False #weird linux behavior where fp32 is faster
+
+ if manual_cast:
+ free_model_memory = maximum_vram_for_weights(device)
+ if (not prioritize_performance) or model_params * 4 > free_model_memory:
+ return True
+
+ if props.major < 7:
+ return False
+
+ #FP16 is just broken on these cards
+ nvidia_16_series = ["1660", "1650", "1630", "T500", "T550", "T600", "MX550", "MX450", "CMP 30HX", "T2000", "T1000", "T1200"]
+ for x in nvidia_16_series:
+ if x in props.name:
+ return False
+
+ return True
+
+def should_use_bf16(device=None, model_params=0, prioritize_performance=True, manual_cast=False):
+ if device is not None:
+ if is_device_cpu(device): #TODO ? bf16 works on CPU but is extremely slow
+ return False
+
+ if FORCE_FP32:
+ return False
+
+ if directml_enabled:
+ return False
+
+ if (device is not None and is_device_mps(device)) or mps_mode():
+ if mac_version() < (14,):
+ return False
+ return True
+
+ if cpu_mode():
+ return False
+
+ if is_intel_xpu():
+ if torch_version_numeric < (2, 6):
+ return True
+ else:
+ return torch.xpu.get_device_capability(device)['has_bfloat16_conversions']
+
+ if is_ascend_npu():
+ return True
+
+ if is_ixuca():
+ return True
+
+ if is_amd():
+ arch = torch.cuda.get_device_properties(device).gcnArchName
+ if any((a in arch) for a in ["gfx1030", "gfx1031", "gfx1010", "gfx1011", "gfx1012", "gfx906", "gfx900", "gfx803"]): # RDNA2 and older don't support bf16
+ if manual_cast:
+ return True
+ return False
+
+ props = torch.cuda.get_device_properties(device)
+
+ if is_mlu():
+ if props.major > 3:
+ return True
+
+ if props.major >= 8:
+ return True
+
+ bf16_works = torch.cuda.is_bf16_supported()
+
+ if bf16_works and manual_cast:
+ free_model_memory = maximum_vram_for_weights(device)
+ if (not prioritize_performance) or model_params * 4 > free_model_memory:
+ return True
+
+ return False
+
+def supports_fp8_compute(device=None):
+ if SUPPORT_FP8_OPS:
+ return True
+
+ if not is_nvidia():
+ return False
+
+ props = torch.cuda.get_device_properties(device)
+ if props.major >= 9:
+ return True
+ if props.major < 8:
+ return False
+ if props.minor < 9:
+ return False
+
+ if torch_version_numeric < (2, 3):
+ return False
+
+ if WINDOWS:
+ if torch_version_numeric < (2, 4):
+ return False
+
+ return True
+
+def extended_fp16_support():
+ # TODO: check why some models work with fp16 on newer torch versions but not on older
+ if torch_version_numeric < (2, 7):
+ return False
+
+ return True
+
+def soft_empty_cache(force=False):
+ global cpu_state
+ if cpu_state == CPUState.MPS:
+ torch.mps.empty_cache()
+ elif is_intel_xpu():
+ torch.xpu.empty_cache()
+ elif is_ascend_npu():
+ torch.npu.empty_cache()
+ elif is_mlu():
+ torch.mlu.empty_cache()
+ elif torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ torch.cuda.ipc_collect()
+
+def unload_all_models():
+ free_memory(1e30, get_torch_device())
+
+
+#TODO: might be cleaner to put this somewhere else
+import threading
+
+class InterruptProcessingException(Exception):
+ pass
+
+interrupt_processing_mutex = threading.RLock()
+
+interrupt_processing = False
+def interrupt_current_processing(value=True):
+ global interrupt_processing
+ global interrupt_processing_mutex
+ with interrupt_processing_mutex:
+ interrupt_processing = value
+
+def processing_interrupted():
+ global interrupt_processing
+ global interrupt_processing_mutex
+ with interrupt_processing_mutex:
+ return interrupt_processing
+
+def throw_exception_if_processing_interrupted():
+ global interrupt_processing
+ global interrupt_processing_mutex
+ with interrupt_processing_mutex:
+ if interrupt_processing:
+ interrupt_processing = False
+ raise InterruptProcessingException()
diff --git a/ComfyUI/comfy/model_sampling.py b/ComfyUI/comfy/model_sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..b240b7f291905c90d40718565a3fdecbf85cabb3
--- /dev/null
+++ b/ComfyUI/comfy/model_sampling.py
@@ -0,0 +1,383 @@
+import torch
+from comfy.ldm.modules.diffusionmodules.util import make_beta_schedule
+import math
+
+def rescale_zero_terminal_snr_sigmas(sigmas):
+ alphas_cumprod = 1 / ((sigmas * sigmas) + 1)
+ alphas_bar_sqrt = alphas_cumprod.sqrt()
+
+ # Store old values.
+ alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+ alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+
+ # Shift so the last timestep is zero.
+ alphas_bar_sqrt -= (alphas_bar_sqrt_T)
+
+ # Scale so the first timestep is back to the old value.
+ alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+
+ # Convert alphas_bar_sqrt to betas
+ alphas_bar = alphas_bar_sqrt**2 # Revert sqrt
+ alphas_bar[-1] = 4.8973451890853435e-08
+ return ((1 - alphas_bar) / alphas_bar) ** 0.5
+
+class EPS:
+ def calculate_input(self, sigma, noise):
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
+ return noise / (sigma ** 2 + self.sigma_data ** 2) ** 0.5
+
+ def calculate_denoised(self, sigma, model_output, model_input):
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
+ return model_input - model_output * sigma
+
+ def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
+ if max_denoise:
+ noise = noise * torch.sqrt(1.0 + sigma ** 2.0)
+ else:
+ noise = noise * sigma
+
+ noise += latent_image
+ return noise
+
+ def inverse_noise_scaling(self, sigma, latent):
+ return latent
+
+class V_PREDICTION(EPS):
+ def calculate_denoised(self, sigma, model_output, model_input):
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
+ return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) - model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5
+
+class EDM(V_PREDICTION):
+ def calculate_denoised(self, sigma, model_output, model_input):
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
+ return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) + model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5
+
+class CONST:
+ def calculate_input(self, sigma, noise):
+ return noise
+
+ def calculate_denoised(self, sigma, model_output, model_input):
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
+ return model_input - model_output * sigma
+
+ def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
+ return sigma * noise + (1.0 - sigma) * latent_image
+
+ def inverse_noise_scaling(self, sigma, latent):
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (latent.ndim - 1))
+ return latent / (1.0 - sigma)
+
+class X0(EPS):
+ def calculate_denoised(self, sigma, model_output, model_input):
+ return model_output
+
+class IMG_TO_IMG(X0):
+ def calculate_input(self, sigma, noise):
+ return noise
+
+class COSMOS_RFLOW:
+ def calculate_input(self, sigma, noise):
+ sigma = (sigma / (sigma + 1))
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
+ return noise * (1.0 - sigma)
+
+ def calculate_denoised(self, sigma, model_output, model_input):
+ sigma = (sigma / (sigma + 1))
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
+ return model_input * (1.0 - sigma) - model_output * sigma
+
+ def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
+ sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
+ noise = noise * sigma
+ noise += latent_image
+ return noise
+
+ def inverse_noise_scaling(self, sigma, latent):
+ return latent
+
+class ModelSamplingDiscrete(torch.nn.Module):
+ def __init__(self, model_config=None, zsnr=None):
+ super().__init__()
+
+ if model_config is not None:
+ sampling_settings = model_config.sampling_settings
+ else:
+ sampling_settings = {}
+
+ beta_schedule = sampling_settings.get("beta_schedule", "linear")
+ linear_start = sampling_settings.get("linear_start", 0.00085)
+ linear_end = sampling_settings.get("linear_end", 0.012)
+ timesteps = sampling_settings.get("timesteps", 1000)
+
+ if zsnr is None:
+ zsnr = sampling_settings.get("zsnr", False)
+
+ self._register_schedule(given_betas=None, beta_schedule=beta_schedule, timesteps=timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=8e-3, zsnr=zsnr)
+ self.sigma_data = 1.0
+
+ def _register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
+ linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3, zsnr=False):
+ if given_betas is not None:
+ betas = given_betas
+ else:
+ betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s)
+ alphas = 1. - betas
+ alphas_cumprod = torch.cumprod(alphas, dim=0)
+
+ timesteps, = betas.shape
+ self.num_timesteps = int(timesteps)
+ self.linear_start = linear_start
+ self.linear_end = linear_end
+ self.zsnr = zsnr
+
+ # self.register_buffer('betas', torch.tensor(betas, dtype=torch.float32))
+ # self.register_buffer('alphas_cumprod', torch.tensor(alphas_cumprod, dtype=torch.float32))
+ # self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32))
+
+ sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
+ if self.zsnr:
+ sigmas = rescale_zero_terminal_snr_sigmas(sigmas)
+
+ self.set_sigmas(sigmas)
+
+ def set_sigmas(self, sigmas):
+ self.register_buffer('sigmas', sigmas.float())
+ self.register_buffer('log_sigmas', sigmas.log().float())
+
+ @property
+ def sigma_min(self):
+ return self.sigmas[0]
+
+ @property
+ def sigma_max(self):
+ return self.sigmas[-1]
+
+ def timestep(self, sigma):
+ log_sigma = sigma.log()
+ dists = log_sigma.to(self.log_sigmas.device) - self.log_sigmas[:, None]
+ return dists.abs().argmin(dim=0).view(sigma.shape).to(sigma.device)
+
+ def sigma(self, timestep):
+ t = torch.clamp(timestep.float().to(self.log_sigmas.device), min=0, max=(len(self.sigmas) - 1))
+ low_idx = t.floor().long()
+ high_idx = t.ceil().long()
+ w = t.frac()
+ log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx]
+ return log_sigma.exp().to(timestep.device)
+
+ def percent_to_sigma(self, percent):
+ if percent <= 0.0:
+ return 999999999.9
+ if percent >= 1.0:
+ return 0.0
+ percent = 1.0 - percent
+ return self.sigma(torch.tensor(percent * 999.0)).item()
+
+class ModelSamplingDiscreteEDM(ModelSamplingDiscrete):
+ def timestep(self, sigma):
+ return 0.25 * sigma.log()
+
+ def sigma(self, timestep):
+ return (timestep / 0.25).exp()
+
+class ModelSamplingContinuousEDM(torch.nn.Module):
+ def __init__(self, model_config=None):
+ super().__init__()
+ if model_config is not None:
+ sampling_settings = model_config.sampling_settings
+ else:
+ sampling_settings = {}
+
+ sigma_min = sampling_settings.get("sigma_min", 0.002)
+ sigma_max = sampling_settings.get("sigma_max", 120.0)
+ sigma_data = sampling_settings.get("sigma_data", 1.0)
+ self.set_parameters(sigma_min, sigma_max, sigma_data)
+
+ def set_parameters(self, sigma_min, sigma_max, sigma_data):
+ self.sigma_data = sigma_data
+ sigmas = torch.linspace(math.log(sigma_min), math.log(sigma_max), 1000).exp()
+
+ self.register_buffer('sigmas', sigmas) #for compatibility with some schedulers
+ self.register_buffer('log_sigmas', sigmas.log())
+
+ @property
+ def sigma_min(self):
+ return self.sigmas[0]
+
+ @property
+ def sigma_max(self):
+ return self.sigmas[-1]
+
+ def timestep(self, sigma):
+ return 0.25 * sigma.log()
+
+ def sigma(self, timestep):
+ return (timestep / 0.25).exp()
+
+ def percent_to_sigma(self, percent):
+ if percent <= 0.0:
+ return 999999999.9
+ if percent >= 1.0:
+ return 0.0
+ percent = 1.0 - percent
+
+ log_sigma_min = math.log(self.sigma_min)
+ return math.exp((math.log(self.sigma_max) - log_sigma_min) * percent + log_sigma_min)
+
+
+class ModelSamplingContinuousV(ModelSamplingContinuousEDM):
+ def timestep(self, sigma):
+ return sigma.atan() / math.pi * 2
+
+ def sigma(self, timestep):
+ return (timestep * math.pi / 2).tan()
+
+
+def time_snr_shift(alpha, t):
+ if alpha == 1.0:
+ return t
+ return alpha * t / (1 + (alpha - 1) * t)
+
+class ModelSamplingDiscreteFlow(torch.nn.Module):
+ def __init__(self, model_config=None):
+ super().__init__()
+ if model_config is not None:
+ sampling_settings = model_config.sampling_settings
+ else:
+ sampling_settings = {}
+
+ self.set_parameters(shift=sampling_settings.get("shift", 1.0), multiplier=sampling_settings.get("multiplier", 1000))
+
+ def set_parameters(self, shift=1.0, timesteps=1000, multiplier=1000):
+ self.shift = shift
+ self.multiplier = multiplier
+ ts = self.sigma((torch.arange(1, timesteps + 1, 1) / timesteps) * multiplier)
+ self.register_buffer('sigmas', ts)
+
+ @property
+ def sigma_min(self):
+ return self.sigmas[0]
+
+ @property
+ def sigma_max(self):
+ return self.sigmas[-1]
+
+ def timestep(self, sigma):
+ return sigma * self.multiplier
+
+ def sigma(self, timestep):
+ return time_snr_shift(self.shift, timestep / self.multiplier)
+
+ def percent_to_sigma(self, percent):
+ if percent <= 0.0:
+ return 1.0
+ if percent >= 1.0:
+ return 0.0
+ return time_snr_shift(self.shift, 1.0 - percent)
+
+class StableCascadeSampling(ModelSamplingDiscrete):
+ def __init__(self, model_config=None):
+ super().__init__()
+
+ if model_config is not None:
+ sampling_settings = model_config.sampling_settings
+ else:
+ sampling_settings = {}
+
+ self.set_parameters(sampling_settings.get("shift", 1.0))
+
+ def set_parameters(self, shift=1.0, cosine_s=8e-3):
+ self.shift = shift
+ self.cosine_s = torch.tensor(cosine_s)
+ self._init_alpha_cumprod = torch.cos(self.cosine_s / (1 + self.cosine_s) * torch.pi * 0.5) ** 2
+
+ #This part is just for compatibility with some schedulers in the codebase
+ self.num_timesteps = 10000
+ sigmas = torch.empty((self.num_timesteps), dtype=torch.float32)
+ for x in range(self.num_timesteps):
+ t = (x + 1) / self.num_timesteps
+ sigmas[x] = self.sigma(t)
+
+ self.set_sigmas(sigmas)
+
+ def sigma(self, timestep):
+ alpha_cumprod = (torch.cos((timestep + self.cosine_s) / (1 + self.cosine_s) * torch.pi * 0.5) ** 2 / self._init_alpha_cumprod)
+
+ if self.shift != 1.0:
+ var = alpha_cumprod
+ logSNR = (var/(1-var)).log()
+ logSNR += 2 * torch.log(1.0 / torch.tensor(self.shift))
+ alpha_cumprod = logSNR.sigmoid()
+
+ alpha_cumprod = alpha_cumprod.clamp(0.0001, 0.9999)
+ return ((1 - alpha_cumprod) / alpha_cumprod) ** 0.5
+
+ def timestep(self, sigma):
+ var = 1 / ((sigma * sigma) + 1)
+ var = var.clamp(0, 1.0)
+ s, min_var = self.cosine_s.to(var.device), self._init_alpha_cumprod.to(var.device)
+ t = (((var * min_var) ** 0.5).acos() / (torch.pi * 0.5)) * (1 + s) - s
+ return t
+
+ def percent_to_sigma(self, percent):
+ if percent <= 0.0:
+ return 999999999.9
+ if percent >= 1.0:
+ return 0.0
+
+ percent = 1.0 - percent
+ return self.sigma(torch.tensor(percent))
+
+
+def flux_time_shift(mu: float, sigma: float, t):
+ return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
+
+class ModelSamplingFlux(torch.nn.Module):
+ def __init__(self, model_config=None):
+ super().__init__()
+ if model_config is not None:
+ sampling_settings = model_config.sampling_settings
+ else:
+ sampling_settings = {}
+
+ self.set_parameters(shift=sampling_settings.get("shift", 1.15))
+
+ def set_parameters(self, shift=1.15, timesteps=10000):
+ self.shift = shift
+ ts = self.sigma((torch.arange(1, timesteps + 1, 1) / timesteps))
+ self.register_buffer('sigmas', ts)
+
+ @property
+ def sigma_min(self):
+ return self.sigmas[0]
+
+ @property
+ def sigma_max(self):
+ return self.sigmas[-1]
+
+ def timestep(self, sigma):
+ return sigma
+
+ def sigma(self, timestep):
+ return flux_time_shift(self.shift, 1.0, timestep)
+
+ def percent_to_sigma(self, percent):
+ if percent <= 0.0:
+ return 1.0
+ if percent >= 1.0:
+ return 0.0
+ return flux_time_shift(self.shift, 1.0, 1.0 - percent)
+
+
+class ModelSamplingCosmosRFlow(ModelSamplingContinuousEDM):
+ def timestep(self, sigma):
+ return sigma / (sigma + 1)
+
+ def sigma(self, timestep):
+ sigma_max = self.sigma_max
+ if timestep >= (sigma_max / (sigma_max + 1)):
+ return sigma_max
+
+ return timestep / (1 - timestep)
diff --git a/ComfyUI/comfy/options.py b/ComfyUI/comfy/options.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7f8af41ebd8b9669ef0ef21827ea6195bcb4752
--- /dev/null
+++ b/ComfyUI/comfy/options.py
@@ -0,0 +1,6 @@
+
+args_parsing = False
+
+def enable_args_parsing(enable=True):
+ global args_parsing
+ args_parsing = enable
diff --git a/ComfyUI/comfy/rmsnorm.py b/ComfyUI/comfy/rmsnorm.py
new file mode 100644
index 0000000000000000000000000000000000000000..66ae8321d9fe21274c27f7233b41db81d334505c
--- /dev/null
+++ b/ComfyUI/comfy/rmsnorm.py
@@ -0,0 +1,55 @@
+import torch
+import comfy.model_management
+import numbers
+
+RMSNorm = None
+
+try:
+ rms_norm_torch = torch.nn.functional.rms_norm
+ RMSNorm = torch.nn.RMSNorm
+except:
+ rms_norm_torch = None
+
+
+def rms_norm(x, weight=None, eps=1e-6):
+ if rms_norm_torch is not None and not (torch.jit.is_tracing() or torch.jit.is_scripting()):
+ if weight is None:
+ return rms_norm_torch(x, (x.shape[-1],), eps=eps)
+ else:
+ return rms_norm_torch(x, weight.shape, weight=comfy.model_management.cast_to(weight, dtype=x.dtype, device=x.device), eps=eps)
+ else:
+ r = x * torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + eps)
+ if weight is None:
+ return r
+ else:
+ return r * comfy.model_management.cast_to(weight, dtype=x.dtype, device=x.device)
+
+
+if RMSNorm is None:
+ class RMSNorm(torch.nn.Module):
+ def __init__(
+ self,
+ normalized_shape,
+ eps=1e-6,
+ elementwise_affine=True,
+ device=None,
+ dtype=None,
+ ):
+ factory_kwargs = {"device": device, "dtype": dtype}
+ super().__init__()
+ if isinstance(normalized_shape, numbers.Integral):
+ # mypy error: incompatible types in assignment
+ normalized_shape = (normalized_shape,) # type: ignore[assignment]
+ self.normalized_shape = tuple(normalized_shape) # type: ignore[arg-type]
+ self.eps = eps
+ self.elementwise_affine = elementwise_affine
+ if self.elementwise_affine:
+ self.weight = torch.nn.Parameter(
+ torch.empty(self.normalized_shape, **factory_kwargs)
+ )
+ else:
+ self.register_parameter("weight", None)
+ self.bias = None
+
+ def forward(self, x):
+ return rms_norm(x, self.weight, self.eps)
diff --git a/ComfyUI/comfy/sampler_helpers.py b/ComfyUI/comfy/sampler_helpers.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dbc41455cfdbedaee86ce5f4f90f1e34d6fa89c
--- /dev/null
+++ b/ComfyUI/comfy/sampler_helpers.py
@@ -0,0 +1,184 @@
+from __future__ import annotations
+import uuid
+import math
+import collections
+import comfy.model_management
+import comfy.conds
+import comfy.utils
+import comfy.hooks
+import comfy.patcher_extension
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+ from comfy.model_patcher import ModelPatcher
+ from comfy.model_base import BaseModel
+ from comfy.controlnet import ControlBase
+
+def prepare_mask(noise_mask, shape, device):
+ return comfy.utils.reshape_mask(noise_mask, shape).to(device)
+
+def get_models_from_cond(cond, model_type):
+ models = []
+ for c in cond:
+ if model_type in c:
+ if isinstance(c[model_type], list):
+ models += c[model_type]
+ else:
+ models += [c[model_type]]
+ return models
+
+def get_hooks_from_cond(cond, full_hooks: comfy.hooks.HookGroup):
+ # get hooks from conds, and collect cnets so they can be checked for extra_hooks
+ cnets: list[ControlBase] = []
+ for c in cond:
+ if 'hooks' in c:
+ for hook in c['hooks'].hooks:
+ full_hooks.add(hook)
+ if 'control' in c:
+ cnets.append(c['control'])
+
+ def get_extra_hooks_from_cnet(cnet: ControlBase, _list: list):
+ if cnet.extra_hooks is not None:
+ _list.append(cnet.extra_hooks)
+ if cnet.previous_controlnet is None:
+ return _list
+ return get_extra_hooks_from_cnet(cnet.previous_controlnet, _list)
+
+ hooks_list = []
+ cnets = set(cnets)
+ for base_cnet in cnets:
+ get_extra_hooks_from_cnet(base_cnet, hooks_list)
+ extra_hooks = comfy.hooks.HookGroup.combine_all_hooks(hooks_list)
+ if extra_hooks is not None:
+ for hook in extra_hooks.hooks:
+ full_hooks.add(hook)
+
+ return full_hooks
+
+def convert_cond(cond):
+ out = []
+ for c in cond:
+ temp = c[1].copy()
+ model_conds = temp.get("model_conds", {})
+ if c[0] is not None:
+ temp["cross_attn"] = c[0]
+ temp["model_conds"] = model_conds
+ temp["uuid"] = uuid.uuid4()
+ out.append(temp)
+ return out
+
+def get_additional_models(conds, dtype):
+ """loads additional models in conditioning"""
+ cnets: list[ControlBase] = []
+ gligen = []
+ add_models = []
+
+ for k in conds:
+ cnets += get_models_from_cond(conds[k], "control")
+ gligen += get_models_from_cond(conds[k], "gligen")
+ add_models += get_models_from_cond(conds[k], "additional_models")
+
+ control_nets = set(cnets)
+
+ inference_memory = 0
+ control_models = []
+ for m in control_nets:
+ control_models += m.get_models()
+ inference_memory += m.inference_memory_requirements(dtype)
+
+ gligen = [x[1] for x in gligen]
+ models = control_models + gligen + add_models
+
+ return models, inference_memory
+
+def get_additional_models_from_model_options(model_options: dict[str]=None):
+ """loads additional models from registered AddModels hooks"""
+ models = []
+ if model_options is not None and "registered_hooks" in model_options:
+ registered: comfy.hooks.HookGroup = model_options["registered_hooks"]
+ for hook in registered.get_type(comfy.hooks.EnumHookType.AdditionalModels):
+ hook: comfy.hooks.AdditionalModelsHook
+ models.extend(hook.models)
+ return models
+
+def cleanup_additional_models(models):
+ """cleanup additional models that were loaded"""
+ for m in models:
+ if hasattr(m, 'cleanup'):
+ m.cleanup()
+
+def estimate_memory(model, noise_shape, conds):
+ cond_shapes = collections.defaultdict(list)
+ cond_shapes_min = {}
+ for _, cs in conds.items():
+ for cond in cs:
+ for k, v in model.model.extra_conds_shapes(**cond).items():
+ cond_shapes[k].append(v)
+ if cond_shapes_min.get(k, None) is None:
+ cond_shapes_min[k] = [v]
+ elif math.prod(v) > math.prod(cond_shapes_min[k][0]):
+ cond_shapes_min[k] = [v]
+
+ memory_required = model.model.memory_required([noise_shape[0] * 2] + list(noise_shape[1:]), cond_shapes=cond_shapes)
+ minimum_memory_required = model.model.memory_required([noise_shape[0]] + list(noise_shape[1:]), cond_shapes=cond_shapes_min)
+ return memory_required, minimum_memory_required
+
+def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None):
+ executor = comfy.patcher_extension.WrapperExecutor.new_executor(
+ _prepare_sampling,
+ comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.PREPARE_SAMPLING, model_options, is_model_options=True)
+ )
+ return executor.execute(model, noise_shape, conds, model_options=model_options)
+
+def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None):
+ real_model: BaseModel = None
+ models, inference_memory = get_additional_models(conds, model.model_dtype())
+ models += get_additional_models_from_model_options(model_options)
+ models += model.get_nested_additional_models() # TODO: does this require inference_memory update?
+ memory_required, minimum_memory_required = estimate_memory(model, noise_shape, conds)
+ comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required + inference_memory, minimum_memory_required=minimum_memory_required + inference_memory)
+ real_model = model.model
+
+ return real_model, conds, models
+
+def cleanup_models(conds, models):
+ cleanup_additional_models(models)
+
+ control_cleanup = []
+ for k in conds:
+ control_cleanup += get_models_from_cond(conds[k], "control")
+
+ cleanup_additional_models(set(control_cleanup))
+
+def prepare_model_patcher(model: 'ModelPatcher', conds, model_options: dict):
+ '''
+ Registers hooks from conds.
+ '''
+ # check for hooks in conds - if not registered, see if can be applied
+ hooks = comfy.hooks.HookGroup()
+ for k in conds:
+ get_hooks_from_cond(conds[k], hooks)
+ # add wrappers and callbacks from ModelPatcher to transformer_options
+ model_options["transformer_options"]["wrappers"] = comfy.patcher_extension.copy_nested_dicts(model.wrappers)
+ model_options["transformer_options"]["callbacks"] = comfy.patcher_extension.copy_nested_dicts(model.callbacks)
+ # begin registering hooks
+ registered = comfy.hooks.HookGroup()
+ target_dict = comfy.hooks.create_target_dict(comfy.hooks.EnumWeightTarget.Model)
+ # handle all TransformerOptionsHooks
+ for hook in hooks.get_type(comfy.hooks.EnumHookType.TransformerOptions):
+ hook: comfy.hooks.TransformerOptionsHook
+ hook.add_hook_patches(model, model_options, target_dict, registered)
+ # handle all AddModelsHooks
+ for hook in hooks.get_type(comfy.hooks.EnumHookType.AdditionalModels):
+ hook: comfy.hooks.AdditionalModelsHook
+ hook.add_hook_patches(model, model_options, target_dict, registered)
+ # handle all WeightHooks by registering on ModelPatcher
+ model.register_all_hook_patches(hooks, target_dict, model_options, registered)
+ # add registered_hooks onto model_options for further reference
+ if len(registered) > 0:
+ model_options["registered_hooks"] = registered
+ # merge original wrappers and callbacks with hooked wrappers and callbacks
+ to_load_options: dict[str] = model_options.setdefault("to_load_options", {})
+ for wc_name in ["wrappers", "callbacks"]:
+ comfy.patcher_extension.merge_nested_dicts(to_load_options.setdefault(wc_name, {}), model_options["transformer_options"][wc_name],
+ copy_dict1=False)
+ return to_load_options
diff --git a/ComfyUI/comfy/sd.py b/ComfyUI/comfy/sd.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0498e58541e81c8c595a7d88b8e3b930149f7a1
--- /dev/null
+++ b/ComfyUI/comfy/sd.py
@@ -0,0 +1,1233 @@
+from __future__ import annotations
+import json
+import torch
+from enum import Enum
+import logging
+
+from comfy import model_management
+from comfy.utils import ProgressBar
+from .ldm.models.autoencoder import AutoencoderKL, AutoencodingEngine
+from .ldm.cascade.stage_a import StageA
+from .ldm.cascade.stage_c_coder import StageC_coder
+from .ldm.audio.autoencoder import AudioOobleckVAE
+import comfy.ldm.genmo.vae.model
+import comfy.ldm.lightricks.vae.causal_video_autoencoder
+import comfy.ldm.cosmos.vae
+import comfy.ldm.wan.vae
+import comfy.ldm.wan.vae2_2
+import comfy.ldm.hunyuan3d.vae
+import comfy.ldm.ace.vae.music_dcae_pipeline
+import yaml
+import math
+import os
+
+import comfy.utils
+
+from . import clip_vision
+from . import gligen
+from . import diffusers_convert
+from . import model_detection
+
+from . import sd1_clip
+from . import sdxl_clip
+import comfy.text_encoders.sd2_clip
+import comfy.text_encoders.sd3_clip
+import comfy.text_encoders.sa_t5
+import comfy.text_encoders.aura_t5
+import comfy.text_encoders.pixart_t5
+import comfy.text_encoders.hydit
+import comfy.text_encoders.flux
+import comfy.text_encoders.long_clipl
+import comfy.text_encoders.genmo
+import comfy.text_encoders.lt
+import comfy.text_encoders.hunyuan_video
+import comfy.text_encoders.cosmos
+import comfy.text_encoders.lumina2
+import comfy.text_encoders.wan
+import comfy.text_encoders.hidream
+import comfy.text_encoders.ace
+import comfy.text_encoders.omnigen2
+
+import comfy.model_patcher
+import comfy.lora
+import comfy.lora_convert
+import comfy.hooks
+import comfy.t2i_adapter.adapter
+import comfy.taesd.taesd
+
+import comfy.ldm.flux.redux
+
+def load_lora_for_models(model, clip, lora, strength_model, strength_clip):
+ key_map = {}
+ if model is not None:
+ key_map = comfy.lora.model_lora_keys_unet(model.model, key_map)
+ if clip is not None:
+ key_map = comfy.lora.model_lora_keys_clip(clip.cond_stage_model, key_map)
+
+ lora = comfy.lora_convert.convert_lora(lora)
+ loaded = comfy.lora.load_lora(lora, key_map)
+ if model is not None:
+ new_modelpatcher = model.clone()
+ k = new_modelpatcher.add_patches(loaded, strength_model)
+ else:
+ k = ()
+ new_modelpatcher = None
+
+ if clip is not None:
+ new_clip = clip.clone()
+ k1 = new_clip.add_patches(loaded, strength_clip)
+ else:
+ k1 = ()
+ new_clip = None
+ k = set(k)
+ k1 = set(k1)
+ for x in loaded:
+ if (x not in k) and (x not in k1):
+ logging.warning("NOT LOADED {}".format(x))
+
+ return (new_modelpatcher, new_clip)
+
+
+class CLIP:
+ def __init__(self, target=None, embedding_directory=None, no_init=False, tokenizer_data={}, parameters=0, model_options={}):
+ if no_init:
+ return
+ params = target.params.copy()
+ clip = target.clip
+ tokenizer = target.tokenizer
+
+ load_device = model_options.get("load_device", model_management.text_encoder_device())
+ offload_device = model_options.get("offload_device", model_management.text_encoder_offload_device())
+ dtype = model_options.get("dtype", None)
+ if dtype is None:
+ dtype = model_management.text_encoder_dtype(load_device)
+
+ params['dtype'] = dtype
+ params['device'] = model_options.get("initial_device", model_management.text_encoder_initial_device(load_device, offload_device, parameters * model_management.dtype_size(dtype)))
+ params['model_options'] = model_options
+
+ self.cond_stage_model = clip(**(params))
+
+ for dt in self.cond_stage_model.dtypes:
+ if not model_management.supports_cast(load_device, dt):
+ load_device = offload_device
+ if params['device'] != offload_device:
+ self.cond_stage_model.to(offload_device)
+ logging.warning("Had to shift TE back.")
+
+ self.tokenizer = tokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data)
+ self.patcher = comfy.model_patcher.ModelPatcher(self.cond_stage_model, load_device=load_device, offload_device=offload_device)
+ self.patcher.hook_mode = comfy.hooks.EnumHookMode.MinVram
+ self.patcher.is_clip = True
+ self.apply_hooks_to_conds = None
+ if params['device'] == load_device:
+ model_management.load_models_gpu([self.patcher], force_full_load=True)
+ self.layer_idx = None
+ self.use_clip_schedule = False
+ logging.info("CLIP/text encoder model load device: {}, offload device: {}, current: {}, dtype: {}".format(load_device, offload_device, params['device'], dtype))
+ self.tokenizer_options = {}
+
+ def clone(self):
+ n = CLIP(no_init=True)
+ n.patcher = self.patcher.clone()
+ n.cond_stage_model = self.cond_stage_model
+ n.tokenizer = self.tokenizer
+ n.layer_idx = self.layer_idx
+ n.tokenizer_options = self.tokenizer_options.copy()
+ n.use_clip_schedule = self.use_clip_schedule
+ n.apply_hooks_to_conds = self.apply_hooks_to_conds
+ return n
+
+ def add_patches(self, patches, strength_patch=1.0, strength_model=1.0):
+ return self.patcher.add_patches(patches, strength_patch, strength_model)
+
+ def set_tokenizer_option(self, option_name, value):
+ self.tokenizer_options[option_name] = value
+
+ def clip_layer(self, layer_idx):
+ self.layer_idx = layer_idx
+
+ def tokenize(self, text, return_word_ids=False, **kwargs):
+ tokenizer_options = kwargs.get("tokenizer_options", {})
+ if len(self.tokenizer_options) > 0:
+ tokenizer_options = {**self.tokenizer_options, **tokenizer_options}
+ if len(tokenizer_options) > 0:
+ kwargs["tokenizer_options"] = tokenizer_options
+ return self.tokenizer.tokenize_with_weights(text, return_word_ids, **kwargs)
+
+ def add_hooks_to_dict(self, pooled_dict: dict[str]):
+ if self.apply_hooks_to_conds:
+ pooled_dict["hooks"] = self.apply_hooks_to_conds
+ return pooled_dict
+
+ def encode_from_tokens_scheduled(self, tokens, unprojected=False, add_dict: dict[str]={}, show_pbar=True):
+ all_cond_pooled: list[tuple[torch.Tensor, dict[str]]] = []
+ all_hooks = self.patcher.forced_hooks
+ if all_hooks is None or not self.use_clip_schedule:
+ # if no hooks or shouldn't use clip schedule, do unscheduled encode_from_tokens and perform add_dict
+ return_pooled = "unprojected" if unprojected else True
+ pooled_dict = self.encode_from_tokens(tokens, return_pooled=return_pooled, return_dict=True)
+ cond = pooled_dict.pop("cond")
+ # add/update any keys with the provided add_dict
+ pooled_dict.update(add_dict)
+ all_cond_pooled.append([cond, pooled_dict])
+ else:
+ scheduled_keyframes = all_hooks.get_hooks_for_clip_schedule()
+
+ self.cond_stage_model.reset_clip_options()
+ if self.layer_idx is not None:
+ self.cond_stage_model.set_clip_options({"layer": self.layer_idx})
+ if unprojected:
+ self.cond_stage_model.set_clip_options({"projected_pooled": False})
+
+ self.load_model()
+ all_hooks.reset()
+ self.patcher.patch_hooks(None)
+ if show_pbar:
+ pbar = ProgressBar(len(scheduled_keyframes))
+
+ for scheduled_opts in scheduled_keyframes:
+ t_range = scheduled_opts[0]
+ # don't bother encoding any conds outside of start_percent and end_percent bounds
+ if "start_percent" in add_dict:
+ if t_range[1] < add_dict["start_percent"]:
+ continue
+ if "end_percent" in add_dict:
+ if t_range[0] > add_dict["end_percent"]:
+ continue
+ hooks_keyframes = scheduled_opts[1]
+ for hook, keyframe in hooks_keyframes:
+ hook.hook_keyframe._current_keyframe = keyframe
+ # apply appropriate hooks with values that match new hook_keyframe
+ self.patcher.patch_hooks(all_hooks)
+ # perform encoding as normal
+ o = self.cond_stage_model.encode_token_weights(tokens)
+ cond, pooled = o[:2]
+ pooled_dict = {"pooled_output": pooled}
+ # add clip_start_percent and clip_end_percent in pooled
+ pooled_dict["clip_start_percent"] = t_range[0]
+ pooled_dict["clip_end_percent"] = t_range[1]
+ # add/update any keys with the provided add_dict
+ pooled_dict.update(add_dict)
+ # add hooks stored on clip
+ self.add_hooks_to_dict(pooled_dict)
+ all_cond_pooled.append([cond, pooled_dict])
+ if show_pbar:
+ pbar.update(1)
+ model_management.throw_exception_if_processing_interrupted()
+ all_hooks.reset()
+ return all_cond_pooled
+
+ def encode_from_tokens(self, tokens, return_pooled=False, return_dict=False):
+ self.cond_stage_model.reset_clip_options()
+
+ if self.layer_idx is not None:
+ self.cond_stage_model.set_clip_options({"layer": self.layer_idx})
+
+ if return_pooled == "unprojected":
+ self.cond_stage_model.set_clip_options({"projected_pooled": False})
+
+ self.load_model()
+ o = self.cond_stage_model.encode_token_weights(tokens)
+ cond, pooled = o[:2]
+ if return_dict:
+ out = {"cond": cond, "pooled_output": pooled}
+ if len(o) > 2:
+ for k in o[2]:
+ out[k] = o[2][k]
+ self.add_hooks_to_dict(out)
+ return out
+
+ if return_pooled:
+ return cond, pooled
+ return cond
+
+ def encode(self, text):
+ tokens = self.tokenize(text)
+ return self.encode_from_tokens(tokens)
+
+ def load_sd(self, sd, full_model=False):
+ if full_model:
+ return self.cond_stage_model.load_state_dict(sd, strict=False)
+ else:
+ return self.cond_stage_model.load_sd(sd)
+
+ def get_sd(self):
+ sd_clip = self.cond_stage_model.state_dict()
+ sd_tokenizer = self.tokenizer.state_dict()
+ for k in sd_tokenizer:
+ sd_clip[k] = sd_tokenizer[k]
+ return sd_clip
+
+ def load_model(self):
+ model_management.load_model_gpu(self.patcher)
+ return self.patcher
+
+ def get_key_patches(self):
+ return self.patcher.get_key_patches()
+
+class VAE:
+ def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None):
+ if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format
+ sd = diffusers_convert.convert_vae_state_dict(sd)
+
+ self.memory_used_encode = lambda shape, dtype: (1767 * shape[2] * shape[3]) * model_management.dtype_size(dtype) #These are for AutoencoderKL and need tweaking (should be lower)
+ self.memory_used_decode = lambda shape, dtype: (2178 * shape[2] * shape[3] * 64) * model_management.dtype_size(dtype)
+ self.downscale_ratio = 8
+ self.upscale_ratio = 8
+ self.latent_channels = 4
+ self.latent_dim = 2
+ self.output_channels = 3
+ self.process_input = lambda image: image * 2.0 - 1.0
+ self.process_output = lambda image: torch.clamp((image + 1.0) / 2.0, min=0.0, max=1.0)
+ self.working_dtypes = [torch.bfloat16, torch.float32]
+ self.disable_offload = False
+
+ self.downscale_index_formula = None
+ self.upscale_index_formula = None
+ self.extra_1d_channel = None
+
+ if config is None:
+ if "decoder.mid.block_1.mix_factor" in sd:
+ encoder_config = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
+ decoder_config = encoder_config.copy()
+ decoder_config["video_kernel_size"] = [3, 1, 1]
+ decoder_config["alpha"] = 0.0
+ self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
+ encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': encoder_config},
+ decoder_config={'target': "comfy.ldm.modules.temporal_ae.VideoDecoder", 'params': decoder_config})
+ elif "taesd_decoder.1.weight" in sd:
+ self.latent_channels = sd["taesd_decoder.1.weight"].shape[1]
+ self.first_stage_model = comfy.taesd.taesd.TAESD(latent_channels=self.latent_channels)
+ elif "vquantizer.codebook.weight" in sd: #VQGan: stage a of stable cascade
+ self.first_stage_model = StageA()
+ self.downscale_ratio = 4
+ self.upscale_ratio = 4
+ #TODO
+ #self.memory_used_encode
+ #self.memory_used_decode
+ self.process_input = lambda image: image
+ self.process_output = lambda image: image
+ elif "backbone.1.0.block.0.1.num_batches_tracked" in sd: #effnet: encoder for stage c latent of stable cascade
+ self.first_stage_model = StageC_coder()
+ self.downscale_ratio = 32
+ self.latent_channels = 16
+ new_sd = {}
+ for k in sd:
+ new_sd["encoder.{}".format(k)] = sd[k]
+ sd = new_sd
+ elif "blocks.11.num_batches_tracked" in sd: #previewer: decoder for stage c latent of stable cascade
+ self.first_stage_model = StageC_coder()
+ self.latent_channels = 16
+ new_sd = {}
+ for k in sd:
+ new_sd["previewer.{}".format(k)] = sd[k]
+ sd = new_sd
+ elif "encoder.backbone.1.0.block.0.1.num_batches_tracked" in sd: #combined effnet and previewer for stable cascade
+ self.first_stage_model = StageC_coder()
+ self.downscale_ratio = 32
+ self.latent_channels = 16
+ elif "decoder.conv_in.weight" in sd:
+ #default SD1.x/SD2.x VAE parameters
+ ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
+
+ if 'encoder.down.2.downsample.conv.weight' not in sd and 'decoder.up.3.upsample.conv.weight' not in sd: #Stable diffusion x4 upscaler VAE
+ ddconfig['ch_mult'] = [1, 2, 4]
+ self.downscale_ratio = 4
+ self.upscale_ratio = 4
+
+ self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.weight"].shape[1]
+ if 'post_quant_conv.weight' in sd:
+ self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
+ else:
+ self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
+ encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': ddconfig},
+ decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': ddconfig})
+ elif "decoder.layers.1.layers.0.beta" in sd:
+ self.first_stage_model = AudioOobleckVAE()
+ self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype)
+ self.memory_used_decode = lambda shape, dtype: (1000 * shape[2] * 2048) * model_management.dtype_size(dtype)
+ self.latent_channels = 64
+ self.output_channels = 2
+ self.upscale_ratio = 2048
+ self.downscale_ratio = 2048
+ self.latent_dim = 1
+ self.process_output = lambda audio: audio
+ self.process_input = lambda audio: audio
+ self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
+ self.disable_offload = True
+ elif "blocks.2.blocks.3.stack.5.weight" in sd or "decoder.blocks.2.blocks.3.stack.5.weight" in sd or "layers.4.layers.1.attn_block.attn.qkv.weight" in sd or "encoder.layers.4.layers.1.attn_block.attn.qkv.weight" in sd: #genmo mochi vae
+ if "blocks.2.blocks.3.stack.5.weight" in sd:
+ sd = comfy.utils.state_dict_prefix_replace(sd, {"": "decoder."})
+ if "layers.4.layers.1.attn_block.attn.qkv.weight" in sd:
+ sd = comfy.utils.state_dict_prefix_replace(sd, {"": "encoder."})
+ self.first_stage_model = comfy.ldm.genmo.vae.model.VideoVAE()
+ self.latent_channels = 12
+ self.latent_dim = 3
+ self.memory_used_decode = lambda shape, dtype: (1000 * shape[2] * shape[3] * shape[4] * (6 * 8 * 8)) * model_management.dtype_size(dtype)
+ self.memory_used_encode = lambda shape, dtype: (1.5 * max(shape[2], 7) * shape[3] * shape[4] * (6 * 8 * 8)) * model_management.dtype_size(dtype)
+ self.upscale_ratio = (lambda a: max(0, a * 6 - 5), 8, 8)
+ self.upscale_index_formula = (6, 8, 8)
+ self.downscale_ratio = (lambda a: max(0, math.floor((a + 5) / 6)), 8, 8)
+ self.downscale_index_formula = (6, 8, 8)
+ self.working_dtypes = [torch.float16, torch.float32]
+ elif "decoder.up_blocks.0.res_blocks.0.conv1.conv.weight" in sd: #lightricks ltxv
+ tensor_conv1 = sd["decoder.up_blocks.0.res_blocks.0.conv1.conv.weight"]
+ version = 0
+ if tensor_conv1.shape[0] == 512:
+ version = 0
+ elif tensor_conv1.shape[0] == 1024:
+ version = 1
+ if "encoder.down_blocks.1.conv.conv.bias" in sd:
+ version = 2
+ vae_config = None
+ if metadata is not None and "config" in metadata:
+ vae_config = json.loads(metadata["config"]).get("vae", None)
+ self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE(version=version, config=vae_config)
+ self.latent_channels = 128
+ self.latent_dim = 3
+ self.memory_used_decode = lambda shape, dtype: (900 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
+ self.memory_used_encode = lambda shape, dtype: (70 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+ self.upscale_ratio = (lambda a: max(0, a * 8 - 7), 32, 32)
+ self.upscale_index_formula = (8, 32, 32)
+ self.downscale_ratio = (lambda a: max(0, math.floor((a + 7) / 8)), 32, 32)
+ self.downscale_index_formula = (8, 32, 32)
+ self.working_dtypes = [torch.bfloat16, torch.float32]
+ elif "decoder.conv_in.conv.weight" in sd:
+ ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
+ ddconfig["conv3d"] = True
+ ddconfig["time_compress"] = 4
+ self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 8, 8)
+ self.upscale_index_formula = (4, 8, 8)
+ self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 8, 8)
+ self.downscale_index_formula = (4, 8, 8)
+ self.latent_dim = 3
+ self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.conv.weight"].shape[1]
+ self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
+ self.memory_used_decode = lambda shape, dtype: (1500 * shape[2] * shape[3] * shape[4] * (4 * 8 * 8)) * model_management.dtype_size(dtype)
+ self.memory_used_encode = lambda shape, dtype: (900 * max(shape[2], 2) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+ self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+ elif "decoder.unpatcher3d.wavelets" in sd:
+ self.upscale_ratio = (lambda a: max(0, a * 8 - 7), 8, 8)
+ self.upscale_index_formula = (8, 8, 8)
+ self.downscale_ratio = (lambda a: max(0, math.floor((a + 7) / 8)), 8, 8)
+ self.downscale_index_formula = (8, 8, 8)
+ self.latent_dim = 3
+ self.latent_channels = 16
+ ddconfig = {'z_channels': 16, 'latent_channels': self.latent_channels, 'z_factor': 1, 'resolution': 1024, 'in_channels': 3, 'out_channels': 3, 'channels': 128, 'channels_mult': [2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [32], 'dropout': 0.0, 'patch_size': 4, 'num_groups': 1, 'temporal_compression': 8, 'spacial_compression': 8}
+ self.first_stage_model = comfy.ldm.cosmos.vae.CausalContinuousVideoTokenizer(**ddconfig)
+ #TODO: these values are a bit off because this is not a standard VAE
+ self.memory_used_decode = lambda shape, dtype: (50 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
+ self.memory_used_encode = lambda shape, dtype: (50 * (round((shape[2] + 7) / 8) * 8) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+ self.working_dtypes = [torch.bfloat16, torch.float32]
+ elif "decoder.middle.0.residual.0.gamma" in sd:
+ if "decoder.upsamples.0.upsamples.0.residual.2.weight" in sd: # Wan 2.2 VAE
+ self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 16, 16)
+ self.upscale_index_formula = (4, 16, 16)
+ self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 16, 16)
+ self.downscale_index_formula = (4, 16, 16)
+ self.latent_dim = 3
+ self.latent_channels = 48
+ ddconfig = {"dim": 160, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "dropout": 0.0}
+ self.first_stage_model = comfy.ldm.wan.vae2_2.WanVAE(**ddconfig)
+ self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+ self.memory_used_encode = lambda shape, dtype: 3300 * shape[3] * shape[4] * model_management.dtype_size(dtype)
+ self.memory_used_decode = lambda shape, dtype: 8000 * shape[3] * shape[4] * (16 * 16) * model_management.dtype_size(dtype)
+ else: # Wan 2.1 VAE
+ self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 8, 8)
+ self.upscale_index_formula = (4, 8, 8)
+ self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 8, 8)
+ self.downscale_index_formula = (4, 8, 8)
+ self.latent_dim = 3
+ self.latent_channels = 16
+ ddconfig = {"dim": 96, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "dropout": 0.0}
+ self.first_stage_model = comfy.ldm.wan.vae.WanVAE(**ddconfig)
+ self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+ self.memory_used_encode = lambda shape, dtype: 6000 * shape[3] * shape[4] * model_management.dtype_size(dtype)
+ self.memory_used_decode = lambda shape, dtype: 7000 * shape[3] * shape[4] * (8 * 8) * model_management.dtype_size(dtype)
+ elif "geo_decoder.cross_attn_decoder.ln_1.bias" in sd:
+ self.latent_dim = 1
+ ln_post = "geo_decoder.ln_post.weight" in sd
+ inner_size = sd["geo_decoder.output_proj.weight"].shape[1]
+ downsample_ratio = sd["post_kl.weight"].shape[0] // inner_size
+ mlp_expand = sd["geo_decoder.cross_attn_decoder.mlp.c_fc.weight"].shape[0] // inner_size
+ self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype) # TODO
+ self.memory_used_decode = lambda shape, dtype: (1024 * 1024 * 1024 * 2.0) * model_management.dtype_size(dtype) # TODO
+ ddconfig = {"embed_dim": 64, "num_freqs": 8, "include_pi": False, "heads": 16, "width": 1024, "num_decoder_layers": 16, "qkv_bias": False, "qk_norm": True, "geo_decoder_mlp_expand_ratio": mlp_expand, "geo_decoder_downsample_ratio": downsample_ratio, "geo_decoder_ln_post": ln_post}
+ self.first_stage_model = comfy.ldm.hunyuan3d.vae.ShapeVAE(**ddconfig)
+ self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
+ elif "vocoder.backbone.channel_layers.0.0.bias" in sd: #Ace Step Audio
+ self.first_stage_model = comfy.ldm.ace.vae.music_dcae_pipeline.MusicDCAE(source_sample_rate=44100)
+ self.memory_used_encode = lambda shape, dtype: (shape[2] * 330) * model_management.dtype_size(dtype)
+ self.memory_used_decode = lambda shape, dtype: (shape[2] * shape[3] * 87000) * model_management.dtype_size(dtype)
+ self.latent_channels = 8
+ self.output_channels = 2
+ self.upscale_ratio = 4096
+ self.downscale_ratio = 4096
+ self.latent_dim = 2
+ self.process_output = lambda audio: audio
+ self.process_input = lambda audio: audio
+ self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+ self.disable_offload = True
+ self.extra_1d_channel = 16
+ else:
+ logging.warning("WARNING: No VAE weights detected, VAE not initalized.")
+ self.first_stage_model = None
+ return
+ else:
+ self.first_stage_model = AutoencoderKL(**(config['params']))
+ self.first_stage_model = self.first_stage_model.eval()
+
+ m, u = self.first_stage_model.load_state_dict(sd, strict=False)
+ if len(m) > 0:
+ logging.warning("Missing VAE keys {}".format(m))
+
+ if len(u) > 0:
+ logging.debug("Leftover VAE keys {}".format(u))
+
+ if device is None:
+ device = model_management.vae_device()
+ self.device = device
+ offload_device = model_management.vae_offload_device()
+ if dtype is None:
+ dtype = model_management.vae_dtype(self.device, self.working_dtypes)
+ self.vae_dtype = dtype
+ self.first_stage_model.to(self.vae_dtype)
+ self.output_device = model_management.intermediate_device()
+
+ self.patcher = comfy.model_patcher.ModelPatcher(self.first_stage_model, load_device=self.device, offload_device=offload_device)
+ logging.info("VAE load device: {}, offload device: {}, dtype: {}".format(self.device, offload_device, self.vae_dtype))
+
+ def throw_exception_if_invalid(self):
+ if self.first_stage_model is None:
+ raise RuntimeError("ERROR: VAE is invalid: None\n\nIf the VAE is from a checkpoint loader node your checkpoint does not contain a valid VAE.")
+
+ def vae_encode_crop_pixels(self, pixels):
+ downscale_ratio = self.spacial_compression_encode()
+
+ dims = pixels.shape[1:-1]
+ for d in range(len(dims)):
+ x = (dims[d] // downscale_ratio) * downscale_ratio
+ x_offset = (dims[d] % downscale_ratio) // 2
+ if x != dims[d]:
+ pixels = pixels.narrow(d + 1, x_offset, x)
+ return pixels
+
+ def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16):
+ steps = samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap)
+ steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x // 2, tile_y * 2, overlap)
+ steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x * 2, tile_y // 2, overlap)
+ pbar = comfy.utils.ProgressBar(steps)
+
+ decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
+ output = self.process_output(
+ (comfy.utils.tiled_scale(samples, decode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = self.upscale_ratio, output_device=self.output_device, pbar = pbar) +
+ comfy.utils.tiled_scale(samples, decode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = self.upscale_ratio, output_device=self.output_device, pbar = pbar) +
+ comfy.utils.tiled_scale(samples, decode_fn, tile_x, tile_y, overlap, upscale_amount = self.upscale_ratio, output_device=self.output_device, pbar = pbar))
+ / 3.0)
+ return output
+
+ def decode_tiled_1d(self, samples, tile_x=128, overlap=32):
+ if samples.ndim == 3:
+ decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
+ else:
+ og_shape = samples.shape
+ samples = samples.reshape((og_shape[0], og_shape[1] * og_shape[2], -1))
+ decode_fn = lambda a: self.first_stage_model.decode(a.reshape((-1, og_shape[1], og_shape[2], a.shape[-1])).to(self.vae_dtype).to(self.device)).float()
+
+ return self.process_output(comfy.utils.tiled_scale_multidim(samples, decode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=self.upscale_ratio, out_channels=self.output_channels, output_device=self.output_device))
+
+ def decode_tiled_3d(self, samples, tile_t=999, tile_x=32, tile_y=32, overlap=(1, 8, 8)):
+ decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
+ return self.process_output(comfy.utils.tiled_scale_multidim(samples, decode_fn, tile=(tile_t, tile_x, tile_y), overlap=overlap, upscale_amount=self.upscale_ratio, out_channels=self.output_channels, index_formulas=self.upscale_index_formula, output_device=self.output_device))
+
+ def encode_tiled_(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64):
+ steps = pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x, tile_y, overlap)
+ steps += pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x // 2, tile_y * 2, overlap)
+ steps += pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x * 2, tile_y // 2, overlap)
+ pbar = comfy.utils.ProgressBar(steps)
+
+ encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float()
+ samples = comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x, tile_y, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar)
+ samples += comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar)
+ samples += comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar)
+ samples /= 3.0
+ return samples
+
+ def encode_tiled_1d(self, samples, tile_x=256 * 2048, overlap=64 * 2048):
+ if self.latent_dim == 1:
+ encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float()
+ out_channels = self.latent_channels
+ upscale_amount = 1 / self.downscale_ratio
+ else:
+ extra_channel_size = self.extra_1d_channel
+ out_channels = self.latent_channels * extra_channel_size
+ tile_x = tile_x // extra_channel_size
+ overlap = overlap // extra_channel_size
+ upscale_amount = 1 / self.downscale_ratio
+ encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).reshape(1, out_channels, -1).float()
+
+ out = comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=self.output_device)
+ if self.latent_dim == 1:
+ return out
+ else:
+ return out.reshape(samples.shape[0], self.latent_channels, extra_channel_size, -1)
+
+ def encode_tiled_3d(self, samples, tile_t=9999, tile_x=512, tile_y=512, overlap=(1, 64, 64)):
+ encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float()
+ return comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_t, tile_x, tile_y), overlap=overlap, upscale_amount=self.downscale_ratio, out_channels=self.latent_channels, downscale=True, index_formulas=self.downscale_index_formula, output_device=self.output_device)
+
+ def decode(self, samples_in, vae_options={}):
+ self.throw_exception_if_invalid()
+ pixel_samples = None
+ try:
+ memory_used = self.memory_used_decode(samples_in.shape, self.vae_dtype)
+ model_management.load_models_gpu([self.patcher], memory_required=memory_used, force_full_load=self.disable_offload)
+ free_memory = model_management.get_free_memory(self.device)
+ batch_number = int(free_memory / memory_used)
+ batch_number = max(1, batch_number)
+
+ for x in range(0, samples_in.shape[0], batch_number):
+ samples = samples_in[x:x+batch_number].to(self.vae_dtype).to(self.device)
+ out = self.process_output(self.first_stage_model.decode(samples, **vae_options).to(self.output_device).float())
+ if pixel_samples is None:
+ pixel_samples = torch.empty((samples_in.shape[0],) + tuple(out.shape[1:]), device=self.output_device)
+ pixel_samples[x:x+batch_number] = out
+ except model_management.OOM_EXCEPTION:
+ logging.warning("Warning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.")
+ dims = samples_in.ndim - 2
+ if dims == 1 or self.extra_1d_channel is not None:
+ pixel_samples = self.decode_tiled_1d(samples_in)
+ elif dims == 2:
+ pixel_samples = self.decode_tiled_(samples_in)
+ elif dims == 3:
+ tile = 256 // self.spacial_compression_decode()
+ overlap = tile // 4
+ pixel_samples = self.decode_tiled_3d(samples_in, tile_x=tile, tile_y=tile, overlap=(1, overlap, overlap))
+
+ pixel_samples = pixel_samples.to(self.output_device).movedim(1,-1)
+ return pixel_samples
+
+ def decode_tiled(self, samples, tile_x=None, tile_y=None, overlap=None, tile_t=None, overlap_t=None):
+ self.throw_exception_if_invalid()
+ memory_used = self.memory_used_decode(samples.shape, self.vae_dtype) #TODO: calculate mem required for tile
+ model_management.load_models_gpu([self.patcher], memory_required=memory_used, force_full_load=self.disable_offload)
+ dims = samples.ndim - 2
+ args = {}
+ if tile_x is not None:
+ args["tile_x"] = tile_x
+ if tile_y is not None:
+ args["tile_y"] = tile_y
+ if overlap is not None:
+ args["overlap"] = overlap
+
+ if dims == 1:
+ args.pop("tile_y")
+ output = self.decode_tiled_1d(samples, **args)
+ elif dims == 2:
+ output = self.decode_tiled_(samples, **args)
+ elif dims == 3:
+ if overlap_t is None:
+ args["overlap"] = (1, overlap, overlap)
+ else:
+ args["overlap"] = (max(1, overlap_t), overlap, overlap)
+ if tile_t is not None:
+ args["tile_t"] = max(2, tile_t)
+
+ output = self.decode_tiled_3d(samples, **args)
+ return output.movedim(1, -1)
+
+ def encode(self, pixel_samples):
+ self.throw_exception_if_invalid()
+ pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
+ pixel_samples = pixel_samples.movedim(-1, 1)
+ if self.latent_dim == 3 and pixel_samples.ndim < 5:
+ pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
+ try:
+ memory_used = self.memory_used_encode(pixel_samples.shape, self.vae_dtype)
+ model_management.load_models_gpu([self.patcher], memory_required=memory_used, force_full_load=self.disable_offload)
+ free_memory = model_management.get_free_memory(self.device)
+ batch_number = int(free_memory / max(1, memory_used))
+ batch_number = max(1, batch_number)
+ samples = None
+ for x in range(0, pixel_samples.shape[0], batch_number):
+ pixels_in = self.process_input(pixel_samples[x:x + batch_number]).to(self.vae_dtype).to(self.device)
+ out = self.first_stage_model.encode(pixels_in).to(self.output_device).float()
+ if samples is None:
+ samples = torch.empty((pixel_samples.shape[0],) + tuple(out.shape[1:]), device=self.output_device)
+ samples[x:x + batch_number] = out
+
+ except model_management.OOM_EXCEPTION:
+ logging.warning("Warning: Ran out of memory when regular VAE encoding, retrying with tiled VAE encoding.")
+ if self.latent_dim == 3:
+ tile = 256
+ overlap = tile // 4
+ samples = self.encode_tiled_3d(pixel_samples, tile_x=tile, tile_y=tile, overlap=(1, overlap, overlap))
+ elif self.latent_dim == 1 or self.extra_1d_channel is not None:
+ samples = self.encode_tiled_1d(pixel_samples)
+ else:
+ samples = self.encode_tiled_(pixel_samples)
+
+ return samples
+
+ def encode_tiled(self, pixel_samples, tile_x=None, tile_y=None, overlap=None, tile_t=None, overlap_t=None):
+ self.throw_exception_if_invalid()
+ pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
+ dims = self.latent_dim
+ pixel_samples = pixel_samples.movedim(-1, 1)
+ if dims == 3:
+ pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
+
+ memory_used = self.memory_used_encode(pixel_samples.shape, self.vae_dtype) # TODO: calculate mem required for tile
+ model_management.load_models_gpu([self.patcher], memory_required=memory_used, force_full_load=self.disable_offload)
+
+ args = {}
+ if tile_x is not None:
+ args["tile_x"] = tile_x
+ if tile_y is not None:
+ args["tile_y"] = tile_y
+ if overlap is not None:
+ args["overlap"] = overlap
+
+ if dims == 1:
+ args.pop("tile_y")
+ samples = self.encode_tiled_1d(pixel_samples, **args)
+ elif dims == 2:
+ samples = self.encode_tiled_(pixel_samples, **args)
+ elif dims == 3:
+ if tile_t is not None:
+ tile_t_latent = max(2, self.downscale_ratio[0](tile_t))
+ else:
+ tile_t_latent = 9999
+ args["tile_t"] = self.upscale_ratio[0](tile_t_latent)
+
+ if overlap_t is None:
+ args["overlap"] = (1, overlap, overlap)
+ else:
+ args["overlap"] = (self.upscale_ratio[0](max(1, min(tile_t_latent // 2, self.downscale_ratio[0](overlap_t)))), overlap, overlap)
+ maximum = pixel_samples.shape[2]
+ maximum = self.upscale_ratio[0](self.downscale_ratio[0](maximum))
+
+ samples = self.encode_tiled_3d(pixel_samples[:,:,:maximum], **args)
+
+ return samples
+
+ def get_sd(self):
+ return self.first_stage_model.state_dict()
+
+ def spacial_compression_decode(self):
+ try:
+ return self.upscale_ratio[-1]
+ except:
+ return self.upscale_ratio
+
+ def spacial_compression_encode(self):
+ try:
+ return self.downscale_ratio[-1]
+ except:
+ return self.downscale_ratio
+
+ def temporal_compression_decode(self):
+ try:
+ return round(self.upscale_ratio[0](8192) / 8192)
+ except:
+ return None
+
+class StyleModel:
+ def __init__(self, model, device="cpu"):
+ self.model = model
+
+ def get_cond(self, input):
+ return self.model(input.last_hidden_state)
+
+
+def load_style_model(ckpt_path):
+ model_data = comfy.utils.load_torch_file(ckpt_path, safe_load=True)
+ keys = model_data.keys()
+ if "style_embedding" in keys:
+ model = comfy.t2i_adapter.adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8)
+ elif "redux_down.weight" in keys:
+ model = comfy.ldm.flux.redux.ReduxImageEncoder()
+ else:
+ raise Exception("invalid style model {}".format(ckpt_path))
+ model.load_state_dict(model_data)
+ return StyleModel(model)
+
+class CLIPType(Enum):
+ STABLE_DIFFUSION = 1
+ STABLE_CASCADE = 2
+ SD3 = 3
+ STABLE_AUDIO = 4
+ HUNYUAN_DIT = 5
+ FLUX = 6
+ MOCHI = 7
+ LTXV = 8
+ HUNYUAN_VIDEO = 9
+ PIXART = 10
+ COSMOS = 11
+ LUMINA2 = 12
+ WAN = 13
+ HIDREAM = 14
+ CHROMA = 15
+ ACE = 16
+ OMNIGEN2 = 17
+
+
+def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
+ clip_data = []
+ for p in ckpt_paths:
+ clip_data.append(comfy.utils.load_torch_file(p, safe_load=True))
+ return load_text_encoder_state_dicts(clip_data, embedding_directory=embedding_directory, clip_type=clip_type, model_options=model_options)
+
+
+class TEModel(Enum):
+ CLIP_L = 1
+ CLIP_H = 2
+ CLIP_G = 3
+ T5_XXL = 4
+ T5_XL = 5
+ T5_BASE = 6
+ LLAMA3_8 = 7
+ T5_XXL_OLD = 8
+ GEMMA_2_2B = 9
+ QWEN25_3B = 10
+
+def detect_te_model(sd):
+ if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
+ return TEModel.CLIP_G
+ if "text_model.encoder.layers.22.mlp.fc1.weight" in sd:
+ return TEModel.CLIP_H
+ if "text_model.encoder.layers.0.mlp.fc1.weight" in sd:
+ return TEModel.CLIP_L
+ if "encoder.block.23.layer.1.DenseReluDense.wi_1.weight" in sd:
+ weight = sd["encoder.block.23.layer.1.DenseReluDense.wi_1.weight"]
+ if weight.shape[-1] == 4096:
+ return TEModel.T5_XXL
+ elif weight.shape[-1] == 2048:
+ return TEModel.T5_XL
+ if 'encoder.block.23.layer.1.DenseReluDense.wi.weight' in sd:
+ return TEModel.T5_XXL_OLD
+ if "encoder.block.0.layer.0.SelfAttention.k.weight" in sd:
+ return TEModel.T5_BASE
+ if 'model.layers.0.post_feedforward_layernorm.weight' in sd:
+ return TEModel.GEMMA_2_2B
+ if 'model.layers.0.self_attn.k_proj.bias' in sd:
+ return TEModel.QWEN25_3B
+ if "model.layers.0.post_attention_layernorm.weight" in sd:
+ return TEModel.LLAMA3_8
+ return None
+
+
+def t5xxl_detect(clip_data):
+ weight_name = "encoder.block.23.layer.1.DenseReluDense.wi_1.weight"
+ weight_name_old = "encoder.block.23.layer.1.DenseReluDense.wi.weight"
+
+ for sd in clip_data:
+ if weight_name in sd or weight_name_old in sd:
+ return comfy.text_encoders.sd3_clip.t5_xxl_detect(sd)
+
+ return {}
+
+def llama_detect(clip_data):
+ weight_name = "model.layers.0.self_attn.k_proj.weight"
+
+ for sd in clip_data:
+ if weight_name in sd:
+ return comfy.text_encoders.hunyuan_video.llama_detect(sd)
+
+ return {}
+
+def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
+ clip_data = state_dicts
+
+ class EmptyClass:
+ pass
+
+ for i in range(len(clip_data)):
+ if "transformer.resblocks.0.ln_1.weight" in clip_data[i]:
+ clip_data[i] = comfy.utils.clip_text_transformers_convert(clip_data[i], "", "")
+ else:
+ if "text_projection" in clip_data[i]:
+ clip_data[i]["text_projection.weight"] = clip_data[i]["text_projection"].transpose(0, 1) #old models saved with the CLIPSave node
+
+ tokenizer_data = {}
+ clip_target = EmptyClass()
+ clip_target.params = {}
+ if len(clip_data) == 1:
+ te_model = detect_te_model(clip_data[0])
+ if te_model == TEModel.CLIP_G:
+ if clip_type == CLIPType.STABLE_CASCADE:
+ clip_target.clip = sdxl_clip.StableCascadeClipModel
+ clip_target.tokenizer = sdxl_clip.StableCascadeTokenizer
+ elif clip_type == CLIPType.SD3:
+ clip_target.clip = comfy.text_encoders.sd3_clip.sd3_clip(clip_l=False, clip_g=True, t5=False)
+ clip_target.tokenizer = comfy.text_encoders.sd3_clip.SD3Tokenizer
+ elif clip_type == CLIPType.HIDREAM:
+ clip_target.clip = comfy.text_encoders.hidream.hidream_clip(clip_l=False, clip_g=True, t5=False, llama=False, dtype_t5=None, dtype_llama=None, t5xxl_scaled_fp8=None, llama_scaled_fp8=None)
+ clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer
+ else:
+ clip_target.clip = sdxl_clip.SDXLRefinerClipModel
+ clip_target.tokenizer = sdxl_clip.SDXLTokenizer
+ elif te_model == TEModel.CLIP_H:
+ clip_target.clip = comfy.text_encoders.sd2_clip.SD2ClipModel
+ clip_target.tokenizer = comfy.text_encoders.sd2_clip.SD2Tokenizer
+ elif te_model == TEModel.T5_XXL:
+ if clip_type == CLIPType.SD3:
+ clip_target.clip = comfy.text_encoders.sd3_clip.sd3_clip(clip_l=False, clip_g=False, t5=True, **t5xxl_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.sd3_clip.SD3Tokenizer
+ elif clip_type == CLIPType.LTXV:
+ clip_target.clip = comfy.text_encoders.lt.ltxv_te(**t5xxl_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.lt.LTXVT5Tokenizer
+ elif clip_type == CLIPType.PIXART or clip_type == CLIPType.CHROMA:
+ clip_target.clip = comfy.text_encoders.pixart_t5.pixart_te(**t5xxl_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.pixart_t5.PixArtTokenizer
+ elif clip_type == CLIPType.WAN:
+ clip_target.clip = comfy.text_encoders.wan.te(**t5xxl_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.wan.WanT5Tokenizer
+ tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
+ elif clip_type == CLIPType.HIDREAM:
+ clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**t5xxl_detect(clip_data),
+ clip_l=False, clip_g=False, t5=True, llama=False, dtype_llama=None, llama_scaled_fp8=None)
+ clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer
+ else: #CLIPType.MOCHI
+ clip_target.clip = comfy.text_encoders.genmo.mochi_te(**t5xxl_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.genmo.MochiT5Tokenizer
+ elif te_model == TEModel.T5_XXL_OLD:
+ clip_target.clip = comfy.text_encoders.cosmos.te(**t5xxl_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.cosmos.CosmosT5Tokenizer
+ elif te_model == TEModel.T5_XL:
+ clip_target.clip = comfy.text_encoders.aura_t5.AuraT5Model
+ clip_target.tokenizer = comfy.text_encoders.aura_t5.AuraT5Tokenizer
+ elif te_model == TEModel.T5_BASE:
+ if clip_type == CLIPType.ACE or "spiece_model" in clip_data[0]:
+ clip_target.clip = comfy.text_encoders.ace.AceT5Model
+ clip_target.tokenizer = comfy.text_encoders.ace.AceT5Tokenizer
+ tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
+ else:
+ clip_target.clip = comfy.text_encoders.sa_t5.SAT5Model
+ clip_target.tokenizer = comfy.text_encoders.sa_t5.SAT5Tokenizer
+ elif te_model == TEModel.GEMMA_2_2B:
+ clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.lumina2.LuminaTokenizer
+ tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
+ elif te_model == TEModel.LLAMA3_8:
+ clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**llama_detect(clip_data),
+ clip_l=False, clip_g=False, t5=False, llama=True, dtype_t5=None, t5xxl_scaled_fp8=None)
+ clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer
+ elif te_model == TEModel.QWEN25_3B:
+ clip_target.clip = comfy.text_encoders.omnigen2.te(**llama_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.omnigen2.Omnigen2Tokenizer
+ else:
+ # clip_l
+ if clip_type == CLIPType.SD3:
+ clip_target.clip = comfy.text_encoders.sd3_clip.sd3_clip(clip_l=True, clip_g=False, t5=False)
+ clip_target.tokenizer = comfy.text_encoders.sd3_clip.SD3Tokenizer
+ elif clip_type == CLIPType.HIDREAM:
+ clip_target.clip = comfy.text_encoders.hidream.hidream_clip(clip_l=True, clip_g=False, t5=False, llama=False, dtype_t5=None, dtype_llama=None, t5xxl_scaled_fp8=None, llama_scaled_fp8=None)
+ clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer
+ else:
+ clip_target.clip = sd1_clip.SD1ClipModel
+ clip_target.tokenizer = sd1_clip.SD1Tokenizer
+ elif len(clip_data) == 2:
+ if clip_type == CLIPType.SD3:
+ te_models = [detect_te_model(clip_data[0]), detect_te_model(clip_data[1])]
+ clip_target.clip = comfy.text_encoders.sd3_clip.sd3_clip(clip_l=TEModel.CLIP_L in te_models, clip_g=TEModel.CLIP_G in te_models, t5=TEModel.T5_XXL in te_models, **t5xxl_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.sd3_clip.SD3Tokenizer
+ elif clip_type == CLIPType.HUNYUAN_DIT:
+ clip_target.clip = comfy.text_encoders.hydit.HyditModel
+ clip_target.tokenizer = comfy.text_encoders.hydit.HyditTokenizer
+ elif clip_type == CLIPType.FLUX:
+ clip_target.clip = comfy.text_encoders.flux.flux_clip(**t5xxl_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.flux.FluxTokenizer
+ elif clip_type == CLIPType.HUNYUAN_VIDEO:
+ clip_target.clip = comfy.text_encoders.hunyuan_video.hunyuan_video_clip(**llama_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.hunyuan_video.HunyuanVideoTokenizer
+ elif clip_type == CLIPType.HIDREAM:
+ # Detect
+ hidream_dualclip_classes = []
+ for hidream_te in clip_data:
+ te_model = detect_te_model(hidream_te)
+ hidream_dualclip_classes.append(te_model)
+
+ clip_l = TEModel.CLIP_L in hidream_dualclip_classes
+ clip_g = TEModel.CLIP_G in hidream_dualclip_classes
+ t5 = TEModel.T5_XXL in hidream_dualclip_classes
+ llama = TEModel.LLAMA3_8 in hidream_dualclip_classes
+
+ # Initialize t5xxl_detect and llama_detect kwargs if needed
+ t5_kwargs = t5xxl_detect(clip_data) if t5 else {}
+ llama_kwargs = llama_detect(clip_data) if llama else {}
+
+ clip_target.clip = comfy.text_encoders.hidream.hidream_clip(clip_l=clip_l, clip_g=clip_g, t5=t5, llama=llama, **t5_kwargs, **llama_kwargs)
+ clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer
+ else:
+ clip_target.clip = sdxl_clip.SDXLClipModel
+ clip_target.tokenizer = sdxl_clip.SDXLTokenizer
+ elif len(clip_data) == 3:
+ clip_target.clip = comfy.text_encoders.sd3_clip.sd3_clip(**t5xxl_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.sd3_clip.SD3Tokenizer
+ elif len(clip_data) == 4:
+ clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**t5xxl_detect(clip_data), **llama_detect(clip_data))
+ clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer
+
+ parameters = 0
+ for c in clip_data:
+ parameters += comfy.utils.calculate_parameters(c)
+ tokenizer_data, model_options = comfy.text_encoders.long_clipl.model_options_long_clip(c, tokenizer_data, model_options)
+
+ clip = CLIP(clip_target, embedding_directory=embedding_directory, parameters=parameters, tokenizer_data=tokenizer_data, model_options=model_options)
+ for c in clip_data:
+ m, u = clip.load_sd(c)
+ if len(m) > 0:
+ logging.warning("clip missing: {}".format(m))
+
+ if len(u) > 0:
+ logging.debug("clip unexpected: {}".format(u))
+ return clip
+
+def load_gligen(ckpt_path):
+ data = comfy.utils.load_torch_file(ckpt_path, safe_load=True)
+ model = gligen.load_gligen(data)
+ if model_management.should_use_fp16():
+ model = model.half()
+ return comfy.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device())
+
+def model_detection_error_hint(path, state_dict):
+ filename = os.path.basename(path)
+ if 'lora' in filename.lower():
+ return "\nHINT: This seems to be a Lora file and Lora files should be put in the lora folder and loaded with a lora loader node.."
+ return ""
+
+def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_clip=True, embedding_directory=None, state_dict=None, config=None):
+ logging.warning("Warning: The load checkpoint with config function is deprecated and will eventually be removed, please use the other one.")
+ model, clip, vae, _ = load_checkpoint_guess_config(ckpt_path, output_vae=output_vae, output_clip=output_clip, output_clipvision=False, embedding_directory=embedding_directory, output_model=True)
+ #TODO: this function is a mess and should be removed eventually
+ if config is None:
+ with open(config_path, 'r') as stream:
+ config = yaml.safe_load(stream)
+ model_config_params = config['model']['params']
+ clip_config = model_config_params['cond_stage_config']
+
+ if "parameterization" in model_config_params:
+ if model_config_params["parameterization"] == "v":
+ m = model.clone()
+ class ModelSamplingAdvanced(comfy.model_sampling.ModelSamplingDiscrete, comfy.model_sampling.V_PREDICTION):
+ pass
+ m.add_object_patch("model_sampling", ModelSamplingAdvanced(model.model.model_config))
+ model = m
+
+ layer_idx = clip_config.get("params", {}).get("layer_idx", None)
+ if layer_idx is not None:
+ clip.clip_layer(layer_idx)
+
+ return (model, clip, vae)
+
+def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}):
+ sd, metadata = comfy.utils.load_torch_file(ckpt_path, return_metadata=True)
+ out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata)
+ if out is None:
+ raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(ckpt_path, model_detection_error_hint(ckpt_path, sd)))
+ return out
+
+def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None):
+ clip = None
+ clipvision = None
+ vae = None
+ model = None
+ model_patcher = None
+
+ diffusion_model_prefix = model_detection.unet_prefix_from_state_dict(sd)
+ parameters = comfy.utils.calculate_parameters(sd, diffusion_model_prefix)
+ weight_dtype = comfy.utils.weight_dtype(sd, diffusion_model_prefix)
+ load_device = model_management.get_torch_device()
+
+ model_config = model_detection.model_config_from_unet(sd, diffusion_model_prefix, metadata=metadata)
+ if model_config is None:
+ logging.warning("Warning, This is not a checkpoint file, trying to load it as a diffusion model only.")
+ diffusion_model = load_diffusion_model_state_dict(sd, model_options={})
+ if diffusion_model is None:
+ return None
+ return (diffusion_model, None, VAE(sd={}), None) # The VAE object is there to throw an exception if it's actually used'
+
+
+ unet_weight_dtype = list(model_config.supported_inference_dtypes)
+ if model_config.scaled_fp8 is not None:
+ weight_dtype = None
+
+ model_config.custom_operations = model_options.get("custom_operations", None)
+ unet_dtype = model_options.get("dtype", model_options.get("weight_dtype", None))
+
+ if unet_dtype is None:
+ unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype, weight_dtype=weight_dtype)
+
+ manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device, model_config.supported_inference_dtypes)
+ model_config.set_inference_dtype(unet_dtype, manual_cast_dtype)
+
+ if model_config.clip_vision_prefix is not None:
+ if output_clipvision:
+ clipvision = clip_vision.load_clipvision_from_sd(sd, model_config.clip_vision_prefix, True)
+
+ if output_model:
+ inital_load_device = model_management.unet_inital_load_device(parameters, unet_dtype)
+ model = model_config.get_model(sd, diffusion_model_prefix, device=inital_load_device)
+ model.load_model_weights(sd, diffusion_model_prefix)
+
+ if output_vae:
+ vae_sd = comfy.utils.state_dict_prefix_replace(sd, {k: "" for k in model_config.vae_key_prefix}, filter_keys=True)
+ vae_sd = model_config.process_vae_state_dict(vae_sd)
+ vae = VAE(sd=vae_sd, metadata=metadata)
+
+ if output_clip:
+ clip_target = model_config.clip_target(state_dict=sd)
+ if clip_target is not None:
+ clip_sd = model_config.process_clip_state_dict(sd)
+ if len(clip_sd) > 0:
+ parameters = comfy.utils.calculate_parameters(clip_sd)
+ clip = CLIP(clip_target, embedding_directory=embedding_directory, tokenizer_data=clip_sd, parameters=parameters, model_options=te_model_options)
+ m, u = clip.load_sd(clip_sd, full_model=True)
+ if len(m) > 0:
+ m_filter = list(filter(lambda a: ".logit_scale" not in a and ".transformer.text_projection.weight" not in a, m))
+ if len(m_filter) > 0:
+ logging.warning("clip missing: {}".format(m))
+ else:
+ logging.debug("clip missing: {}".format(m))
+
+ if len(u) > 0:
+ logging.debug("clip unexpected {}:".format(u))
+ else:
+ logging.warning("no CLIP/text encoder weights in checkpoint, the text encoder model will not be loaded.")
+
+ left_over = sd.keys()
+ if len(left_over) > 0:
+ logging.debug("left over keys: {}".format(left_over))
+
+ if output_model:
+ model_patcher = comfy.model_patcher.ModelPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device())
+ if inital_load_device != torch.device("cpu"):
+ logging.info("loaded diffusion model directly to GPU")
+ model_management.load_models_gpu([model_patcher], force_full_load=True)
+
+ return (model_patcher, clip, vae, clipvision)
+
+
+def load_diffusion_model_state_dict(sd, model_options={}):
+ """
+ Loads a UNet diffusion model from a state dictionary, supporting both diffusers and regular formats.
+
+ Args:
+ sd (dict): State dictionary containing model weights and configuration
+ model_options (dict, optional): Additional options for model loading. Supports:
+ - dtype: Override model data type
+ - custom_operations: Custom model operations
+ - fp8_optimizations: Enable FP8 optimizations
+
+ Returns:
+ ModelPatcher: A wrapped model instance that handles device management and weight loading.
+ Returns None if the model configuration cannot be detected.
+
+ The function:
+ 1. Detects and handles different model formats (regular, diffusers, mmdit)
+ 2. Configures model dtype based on parameters and device capabilities
+ 3. Handles weight conversion and device placement
+ 4. Manages model optimization settings
+ 5. Loads weights and returns a device-managed model instance
+ """
+ dtype = model_options.get("dtype", None)
+
+ #Allow loading unets from checkpoint files
+ diffusion_model_prefix = model_detection.unet_prefix_from_state_dict(sd)
+ temp_sd = comfy.utils.state_dict_prefix_replace(sd, {diffusion_model_prefix: ""}, filter_keys=True)
+ if len(temp_sd) > 0:
+ sd = temp_sd
+
+ parameters = comfy.utils.calculate_parameters(sd)
+ weight_dtype = comfy.utils.weight_dtype(sd)
+
+ load_device = model_management.get_torch_device()
+ model_config = model_detection.model_config_from_unet(sd, "")
+
+ if model_config is not None:
+ new_sd = sd
+ else:
+ new_sd = model_detection.convert_diffusers_mmdit(sd, "")
+ if new_sd is not None: #diffusers mmdit
+ model_config = model_detection.model_config_from_unet(new_sd, "")
+ if model_config is None:
+ return None
+ else: #diffusers unet
+ model_config = model_detection.model_config_from_diffusers_unet(sd)
+ if model_config is None:
+ return None
+
+ diffusers_keys = comfy.utils.unet_to_diffusers(model_config.unet_config)
+
+ new_sd = {}
+ for k in diffusers_keys:
+ if k in sd:
+ new_sd[diffusers_keys[k]] = sd.pop(k)
+ else:
+ logging.warning("{} {}".format(diffusers_keys[k], k))
+
+ offload_device = model_management.unet_offload_device()
+ unet_weight_dtype = list(model_config.supported_inference_dtypes)
+ if model_config.scaled_fp8 is not None:
+ weight_dtype = None
+
+ if dtype is None:
+ unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype, weight_dtype=weight_dtype)
+ else:
+ unet_dtype = dtype
+
+ manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device, model_config.supported_inference_dtypes)
+ model_config.set_inference_dtype(unet_dtype, manual_cast_dtype)
+ model_config.custom_operations = model_options.get("custom_operations", model_config.custom_operations)
+ if model_options.get("fp8_optimizations", False):
+ model_config.optimizations["fp8"] = True
+
+ model = model_config.get_model(new_sd, "")
+ model = model.to(offload_device)
+ model.load_model_weights(new_sd, "")
+ left_over = sd.keys()
+ if len(left_over) > 0:
+ logging.info("left over keys in diffusion model: {}".format(left_over))
+ return comfy.model_patcher.ModelPatcher(model, load_device=load_device, offload_device=offload_device)
+
+
+def load_diffusion_model(unet_path, model_options={}):
+ sd = comfy.utils.load_torch_file(unet_path)
+ model = load_diffusion_model_state_dict(sd, model_options=model_options)
+ if model is None:
+ logging.error("ERROR UNSUPPORTED DIFFUSION MODEL {}".format(unet_path))
+ raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(unet_path, model_detection_error_hint(unet_path, sd)))
+ return model
+
+def load_unet(unet_path, dtype=None):
+ logging.warning("The load_unet function has been deprecated and will be removed please switch to: load_diffusion_model")
+ return load_diffusion_model(unet_path, model_options={"dtype": dtype})
+
+def load_unet_state_dict(sd, dtype=None):
+ logging.warning("The load_unet_state_dict function has been deprecated and will be removed please switch to: load_diffusion_model_state_dict")
+ return load_diffusion_model_state_dict(sd, model_options={"dtype": dtype})
+
+def save_checkpoint(output_path, model, clip=None, vae=None, clip_vision=None, metadata=None, extra_keys={}):
+ clip_sd = None
+ load_models = [model]
+ if clip is not None:
+ load_models.append(clip.load_model())
+ clip_sd = clip.get_sd()
+ vae_sd = None
+ if vae is not None:
+ vae_sd = vae.get_sd()
+
+ model_management.load_models_gpu(load_models, force_patch_weights=True)
+ clip_vision_sd = clip_vision.get_sd() if clip_vision is not None else None
+ sd = model.model.state_dict_for_saving(clip_sd, vae_sd, clip_vision_sd)
+ for k in extra_keys:
+ sd[k] = extra_keys[k]
+
+ for k in sd:
+ t = sd[k]
+ if not t.is_contiguous():
+ sd[k] = t.contiguous()
+
+ comfy.utils.save_torch_file(sd, output_path, metadata=metadata)
diff --git a/ComfyUI/comfy/sdxl_clip.py b/ComfyUI/comfy/sdxl_clip.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8cef14e4d6f81eb8ec8f4b8825c8afaa1d5caf3
--- /dev/null
+++ b/ComfyUI/comfy/sdxl_clip.py
@@ -0,0 +1,95 @@
+from comfy import sd1_clip
+import torch
+import os
+
+class SDXLClipG(sd1_clip.SDClipModel):
+ def __init__(self, device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None, model_options={}):
+ if layer == "penultimate":
+ layer="hidden"
+ layer_idx=-2
+
+ textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_config_bigg.json")
+ model_options = {**model_options, "model_name": "clip_g"}
+ super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype,
+ special_tokens={"start": 49406, "end": 49407, "pad": 0}, layer_norm_hidden_state=False, return_projected_pooled=True, model_options=model_options)
+
+ def load_sd(self, sd):
+ return super().load_sd(sd)
+
+class SDXLClipGTokenizer(sd1_clip.SDTokenizer):
+ def __init__(self, tokenizer_path=None, embedding_directory=None, tokenizer_data={}):
+ super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1280, embedding_key='clip_g', tokenizer_data=tokenizer_data)
+
+
+class SDXLTokenizer:
+ def __init__(self, embedding_directory=None, tokenizer_data={}):
+ self.clip_l = sd1_clip.SDTokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data)
+ self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data)
+
+ def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
+ out = {}
+ out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids, **kwargs)
+ out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids, **kwargs)
+ return out
+
+ def untokenize(self, token_weight_pair):
+ return self.clip_g.untokenize(token_weight_pair)
+
+ def state_dict(self):
+ return {}
+
+class SDXLClipModel(torch.nn.Module):
+ def __init__(self, device="cpu", dtype=None, model_options={}):
+ super().__init__()
+ self.clip_l = sd1_clip.SDClipModel(layer="hidden", layer_idx=-2, device=device, dtype=dtype, layer_norm_hidden_state=False, model_options=model_options)
+ self.clip_g = SDXLClipG(device=device, dtype=dtype, model_options=model_options)
+ self.dtypes = set([dtype])
+
+ def set_clip_options(self, options):
+ self.clip_l.set_clip_options(options)
+ self.clip_g.set_clip_options(options)
+
+ def reset_clip_options(self):
+ self.clip_g.reset_clip_options()
+ self.clip_l.reset_clip_options()
+
+ def encode_token_weights(self, token_weight_pairs):
+ token_weight_pairs_g = token_weight_pairs["g"]
+ token_weight_pairs_l = token_weight_pairs["l"]
+ g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g)
+ l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l)
+ cut_to = min(l_out.shape[1], g_out.shape[1])
+ return torch.cat([l_out[:,:cut_to], g_out[:,:cut_to]], dim=-1), g_pooled
+
+ def load_sd(self, sd):
+ if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
+ return self.clip_g.load_sd(sd)
+ else:
+ return self.clip_l.load_sd(sd)
+
+class SDXLRefinerClipModel(sd1_clip.SD1ClipModel):
+ def __init__(self, device="cpu", dtype=None, model_options={}):
+ super().__init__(device=device, dtype=dtype, clip_name="g", clip_model=SDXLClipG, model_options=model_options)
+
+
+class StableCascadeClipGTokenizer(sd1_clip.SDTokenizer):
+ def __init__(self, tokenizer_path=None, embedding_directory=None, tokenizer_data={}):
+ super().__init__(tokenizer_path, pad_with_end=True, embedding_directory=embedding_directory, embedding_size=1280, embedding_key='clip_g', tokenizer_data=tokenizer_data)
+
+class StableCascadeTokenizer(sd1_clip.SD1Tokenizer):
+ def __init__(self, embedding_directory=None, tokenizer_data={}):
+ super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, clip_name="g", tokenizer=StableCascadeClipGTokenizer)
+
+class StableCascadeClipG(sd1_clip.SDClipModel):
+ def __init__(self, device="cpu", max_length=77, freeze=True, layer="hidden", layer_idx=-1, dtype=None, model_options={}):
+ textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_config_bigg.json")
+ model_options = {**model_options, "model_name": "clip_g"}
+ super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype,
+ special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=False, enable_attention_masks=True, return_projected_pooled=True, model_options=model_options)
+
+ def load_sd(self, sd):
+ return super().load_sd(sd)
+
+class StableCascadeClipModel(sd1_clip.SD1ClipModel):
+ def __init__(self, device="cpu", dtype=None, model_options={}):
+ super().__init__(device=device, dtype=dtype, clip_name="g", clip_model=StableCascadeClipG, model_options=model_options)