Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +116 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Angry Frown.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Big Laugh.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Kiss Lips Chin Down.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Kiss Lips Chin Up.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Laughing.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Scared.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Shy.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Smile Looking At Viewer.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Smile looking away from viewer.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Smolder.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Surprised.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/confident smile (closed lips).exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/confused.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/disappointed.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/dumbfounded.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/excited (big eyes).exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/focused (concentrated stare).exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/sad.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/skeptical.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/tired (heavy eyes).exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/wink.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/yawning.exp +0 -0
- custom_nodes/ComfyUI-AdvancedLivePortrait/nodes.py +2 -2
- custom_nodes/ComfyUI-CogVideoXWrapper/custom_cogvideox_transformer_3d.py +90 -54
- custom_nodes/ComfyUI-CogVideoXWrapper/examples/cogvideox_Fun_180_orbit_01.json +1922 -0
- custom_nodes/ComfyUI-CogVideoXWrapper/model_loading.py +7 -77
- custom_nodes/ComfyUI-DepthAnythingV2/.gitattributes +2 -0
- custom_nodes/ComfyUI-DepthAnythingV2/.github/workflows/publish.yml +22 -0
- custom_nodes/ComfyUI-DepthAnythingV2/.gitignore +9 -0
- custom_nodes/ComfyUI-DepthAnythingV2/README.md +6 -0
- custom_nodes/ComfyUI-DepthAnythingV2/__init__.py +3 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2.py +415 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/__init__.py +11 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/attention.py +94 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/block.py +252 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/drop_path.py +35 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/layer_scale.py +28 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/mlp.py +41 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/patch_embed.py +90 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/swiglu_ffn.py +63 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dpt.py +199 -0
- custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/util/blocks.py +149 -0
- custom_nodes/ComfyUI-DepthAnythingV2/nodes.py +189 -0
- custom_nodes/ComfyUI-DepthAnythingV2/pyproject.toml +15 -0
- custom_nodes/ComfyUI-DepthAnythingV2/requirements.txt +2 -0
- custom_nodes/ComfyUI-F5-TTS/=1.31.14 +0 -0
- custom_nodes/ComfyUI-Frame-Interpolation/ckpts/flavr/FLAVR_2x.pth +3 -0
- custom_nodes/ComfyUI-Frame-Interpolation/ckpts/rife/rife49.pth +3 -0
- custom_nodes/ComfyUI-Frame-Interpolation/ckpts/rife/sudo_rife4_269.662_testV1_scale1.pth +3 -0
.gitattributes
CHANGED
|
@@ -903,3 +903,119 @@ models/text_encoders/models--openai--clip-vit-large-patch14/blobs/a2bf730a0c7deb
|
|
| 903 |
models/text_encoders/models--xlabs-ai--xflux_text_encoders/blobs/a5640855b301fcdbceddfa90ae8066cd9414aff020552a201a255ecf2059da00 filter=lfs diff=lfs merge=lfs -text
|
| 904 |
models/text_encoders/models--xlabs-ai--xflux_text_encoders/blobs/d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 filter=lfs diff=lfs merge=lfs -text
|
| 905 |
models/text_encoders/models--xlabs-ai--xflux_text_encoders/blobs/ec87bffd1923e8b2774a6d240c922a41f6143081d52cf83b8fe39e9d838c893e filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 903 |
models/text_encoders/models--xlabs-ai--xflux_text_encoders/blobs/a5640855b301fcdbceddfa90ae8066cd9414aff020552a201a255ecf2059da00 filter=lfs diff=lfs merge=lfs -text
|
| 904 |
models/text_encoders/models--xlabs-ai--xflux_text_encoders/blobs/d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 filter=lfs diff=lfs merge=lfs -text
|
| 905 |
models/text_encoders/models--xlabs-ai--xflux_text_encoders/blobs/ec87bffd1923e8b2774a6d240c922a41f6143081d52cf83b8fe39e9d838c893e filter=lfs diff=lfs merge=lfs -text
|
| 906 |
+
custom_nodes/ComfyUI-Hunyuan3DWrapper/hy3dgen/texgen/custom_rasterizer/dist/custom_rasterizer-0.1.0+torch241.cuda121-py3.10-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text
|
| 907 |
+
custom_nodes/ComfyUI-LTXVideo/assets/end.jpg filter=lfs diff=lfs merge=lfs -text
|
| 908 |
+
custom_nodes/ComfyUI-LTXVideo/assets/fox.jpg filter=lfs diff=lfs merge=lfs -text
|
| 909 |
+
custom_nodes/ComfyUI-LTXVideo/assets/jeep.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 910 |
+
custom_nodes/ComfyUI-LTXVideo/assets/ltxvideo-first-sequence-conditioning.png filter=lfs diff=lfs merge=lfs -text
|
| 911 |
+
custom_nodes/ComfyUI-LTXVideo/assets/ltxvideo-frame-interpolation.png filter=lfs diff=lfs merge=lfs -text
|
| 912 |
+
custom_nodes/ComfyUI-LTXVideo/assets/ltxvideo-i2v-distilled.png filter=lfs diff=lfs merge=lfs -text
|
| 913 |
+
custom_nodes/ComfyUI-LTXVideo/assets/ltxvideo-last-sequence-conditioning.png filter=lfs diff=lfs merge=lfs -text
|
| 914 |
+
custom_nodes/ComfyUI-LTXVideo/assets/shrek2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 915 |
+
custom_nodes/ComfyUI-LTXVideo/assets/start.jpg filter=lfs diff=lfs merge=lfs -text
|
| 916 |
+
custom_nodes/ComfyUI-LTXVideo/tricks/assets/ltxvideo-flow-edit.png filter=lfs diff=lfs merge=lfs -text
|
| 917 |
+
custom_nodes/ComfyUI-LTXVideo/tricks/assets/ltxvideo-rf-edit.png filter=lfs diff=lfs merge=lfs -text
|
| 918 |
+
custom_nodes/ComfyUI-LTXVideo/tricks/assets/ref.png filter=lfs diff=lfs merge=lfs -text
|
| 919 |
+
custom_nodes/ComfyUI-LTXVideo/tricks/assets/shot.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 920 |
+
custom_nodes/ComfyUI-LTXVideo/tricks/assets/shot2.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 921 |
+
custom_nodes/ComfyUI-LTXVideo/tricks/assets/shrek2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 922 |
+
custom_nodes/ComfyUI-LTXVideo/tricks/assets/shrek3.jpg filter=lfs diff=lfs merge=lfs -text
|
| 923 |
+
custom_nodes/ComfyUI-TogetherVision/images/Latest.png filter=lfs diff=lfs merge=lfs -text
|
| 924 |
+
custom_nodes/ComfyUI-TogetherVision/images/node-screenshot-old.png filter=lfs diff=lfs merge=lfs -text
|
| 925 |
+
custom_nodes/ComfyUI-TogetherVision/images/node-screenshot.png filter=lfs diff=lfs merge=lfs -text
|
| 926 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/00.gif filter=lfs diff=lfs merge=lfs -text
|
| 927 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/01.gif filter=lfs diff=lfs merge=lfs -text
|
| 928 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/02.gif filter=lfs diff=lfs merge=lfs -text
|
| 929 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/03.gif filter=lfs diff=lfs merge=lfs -text
|
| 930 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/04.gif filter=lfs diff=lfs merge=lfs -text
|
| 931 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/05.gif filter=lfs diff=lfs merge=lfs -text
|
| 932 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/06.gif filter=lfs diff=lfs merge=lfs -text
|
| 933 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/07.gif filter=lfs diff=lfs merge=lfs -text
|
| 934 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/08.gif filter=lfs diff=lfs merge=lfs -text
|
| 935 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/09.gif filter=lfs diff=lfs merge=lfs -text
|
| 936 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/10.gif filter=lfs diff=lfs merge=lfs -text
|
| 937 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/11.gif filter=lfs diff=lfs merge=lfs -text
|
| 938 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/12.gif filter=lfs diff=lfs merge=lfs -text
|
| 939 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/13.gif filter=lfs diff=lfs merge=lfs -text
|
| 940 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/72105_388.mp4_00-00.png filter=lfs diff=lfs merge=lfs -text
|
| 941 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/72105_388.mp4_00-01.png filter=lfs diff=lfs merge=lfs -text
|
| 942 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/72109_125.mp4_00-00.png filter=lfs diff=lfs merge=lfs -text
|
| 943 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/72109_125.mp4_00-01.png filter=lfs diff=lfs merge=lfs -text
|
| 944 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/72110_255.mp4_00-00.png filter=lfs diff=lfs merge=lfs -text
|
| 945 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/72110_255.mp4_00-01.png filter=lfs diff=lfs merge=lfs -text
|
| 946 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/74302_1349_frame1.png filter=lfs diff=lfs merge=lfs -text
|
| 947 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/74302_1349_frame3.png filter=lfs diff=lfs merge=lfs -text
|
| 948 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/Japan_v2_1_070321_s3_frame1.png filter=lfs diff=lfs merge=lfs -text
|
| 949 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/Japan_v2_1_070321_s3_frame3.png filter=lfs diff=lfs merge=lfs -text
|
| 950 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/Japan_v2_2_062266_s2_frame1.png filter=lfs diff=lfs merge=lfs -text
|
| 951 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/Japan_v2_2_062266_s2_frame3.png filter=lfs diff=lfs merge=lfs -text
|
| 952 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/frame0001_05.png filter=lfs diff=lfs merge=lfs -text
|
| 953 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/frame0001_09.png filter=lfs diff=lfs merge=lfs -text
|
| 954 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/frame0001_10.png filter=lfs diff=lfs merge=lfs -text
|
| 955 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/assets/frame0016_10.png filter=lfs diff=lfs merge=lfs -text
|
| 956 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/ldm/modules/image_degradation/utils/test.png filter=lfs diff=lfs merge=lfs -text
|
| 957 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/prompts/512_interp/74906_1462_frame1.png filter=lfs diff=lfs merge=lfs -text
|
| 958 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/prompts/512_interp/74906_1462_frame3.png filter=lfs diff=lfs merge=lfs -text
|
| 959 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/prompts/512_interp/Japan_v2_2_062266_s2_frame1.png filter=lfs diff=lfs merge=lfs -text
|
| 960 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/prompts/512_interp/Japan_v2_2_062266_s2_frame3.png filter=lfs diff=lfs merge=lfs -text
|
| 961 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/prompts/512_interp/Japan_v2_3_119235_s2_frame1.png filter=lfs diff=lfs merge=lfs -text
|
| 962 |
+
custom_nodes/ComfyUI-ToonCrafter/ToonCrafter/prompts/512_interp/Japan_v2_3_119235_s2_frame3.png filter=lfs diff=lfs merge=lfs -text
|
| 963 |
+
custom_nodes/ComfyUI-UNO/asset/show.png filter=lfs diff=lfs merge=lfs -text
|
| 964 |
+
custom_nodes/ComfyUI-WanVideoWrapper/example_workflows/example_inputs/env.png filter=lfs diff=lfs merge=lfs -text
|
| 965 |
+
custom_nodes/ComfyUI-WanVideoWrapper/example_workflows/example_inputs/human.png filter=lfs diff=lfs merge=lfs -text
|
| 966 |
+
custom_nodes/ComfyUI-WanVideoWrapper/example_workflows/example_inputs/wolf_interpolated.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 967 |
+
custom_nodes/ComfyUI-easycontrol/asset/show.png filter=lfs diff=lfs merge=lfs -text
|
| 968 |
+
custom_nodes/ComfyUI-easycontrol/asset/show_2.webp filter=lfs diff=lfs merge=lfs -text
|
| 969 |
+
custom_nodes/ComfyUI-fastblend/drop.wav filter=lfs diff=lfs merge=lfs -text
|
| 970 |
+
custom_nodes/ComfyUI-fastblend/jiji.wav filter=lfs diff=lfs merge=lfs -text
|
| 971 |
+
custom_nodes/ComfyUI-faster-whisper/workflows/faster_whisper_suttitle.png filter=lfs diff=lfs merge=lfs -text
|
| 972 |
+
custom_nodes/ComfyUI_DiffRhythm/g2p/sources/chinese_lexicon.txt filter=lfs diff=lfs merge=lfs -text
|
| 973 |
+
custom_nodes/ComfyUI_DiffRhythm/images/2025-03-16_03-53-48.png filter=lfs diff=lfs merge=lfs -text
|
| 974 |
+
custom_nodes/ComfyUI_InfiniteYou/assets/face_combine_workflow.png filter=lfs diff=lfs merge=lfs -text
|
| 975 |
+
custom_nodes/ComfyUI_InfiniteYou/assets/face_swap.jpg filter=lfs diff=lfs merge=lfs -text
|
| 976 |
+
custom_nodes/ComfyUI_InfiniteYou/assets/musk.png filter=lfs diff=lfs merge=lfs -text
|
| 977 |
+
custom_nodes/ComfyUI_InfiniteYou/assets/teaser.jpg filter=lfs diff=lfs merge=lfs -text
|
| 978 |
+
custom_nodes/ComfyUI_InfiniteYou/assets/workflow_example.png filter=lfs diff=lfs merge=lfs -text
|
| 979 |
+
custom_nodes/ComfyUI_Patches_ll/example/PuLID_with_FBcache.png filter=lfs diff=lfs merge=lfs -text
|
| 980 |
+
custom_nodes/ComfyUI_Patches_ll/example/PuLID_with_teacache.png filter=lfs diff=lfs merge=lfs -text
|
| 981 |
+
custom_nodes/ComfyUI_Patches_ll/example/workflow_base.png filter=lfs diff=lfs merge=lfs -text
|
| 982 |
+
custom_nodes/ComfyUI_Patches_ll/example/workflow_hunyuanvideo.png filter=lfs diff=lfs merge=lfs -text
|
| 983 |
+
custom_nodes/ComfyUI_Patches_ll/example/workflow_ltxvideo.png filter=lfs diff=lfs merge=lfs -text
|
| 984 |
+
custom_nodes/ComfyUI_TiledKSampler/examples/ComfyUI_02006_.png filter=lfs diff=lfs merge=lfs -text
|
| 985 |
+
custom_nodes/ComfyUI_TiledKSampler/examples/ComfyUI_02010_.png filter=lfs diff=lfs merge=lfs -text
|
| 986 |
+
custom_nodes/comfyui_LLM_party/custom_tool/img_temp/-1738351262.JPG filter=lfs diff=lfs merge=lfs -text
|
| 987 |
+
custom_nodes/comfyui_controlnet_aux/ckpts/LayerNorm/DensePose-TorchScript-with-hint-image/densepose_r50_fpn_dl.torchscript filter=lfs diff=lfs merge=lfs -text
|
| 988 |
+
custom_nodes/eden_comfy_pipelines/example_workflows/audio_split_stems.jpg filter=lfs diff=lfs merge=lfs -text
|
| 989 |
+
custom_nodes/eden_comfy_pipelines/example_workflows/background_removal.jpg filter=lfs diff=lfs merge=lfs -text
|
| 990 |
+
custom_nodes/eden_comfy_pipelines/example_workflows/face_styler.jpg filter=lfs diff=lfs merge=lfs -text
|
| 991 |
+
custom_nodes/eden_comfy_pipelines/example_workflows/flux_dev.jpg filter=lfs diff=lfs merge=lfs -text
|
| 992 |
+
custom_nodes/eden_comfy_pipelines/example_workflows/flux_inpainting.jpg filter=lfs diff=lfs merge=lfs -text
|
| 993 |
+
custom_nodes/eden_comfy_pipelines/example_workflows/layer_diffusion.jpg filter=lfs diff=lfs merge=lfs -text
|
| 994 |
+
custom_nodes/eden_comfy_pipelines/example_workflows/mars-id.jpg filter=lfs diff=lfs merge=lfs -text
|
| 995 |
+
custom_nodes/eden_comfy_pipelines/example_workflows/ominicontrol.jpg filter=lfs diff=lfs merge=lfs -text
|
| 996 |
+
custom_nodes/eden_comfy_pipelines/example_workflows/txt2img_SDXL.jpg filter=lfs diff=lfs merge=lfs -text
|
| 997 |
+
custom_nodes/eden_comfy_pipelines/img_utils/depth.png filter=lfs diff=lfs merge=lfs -text
|
| 998 |
+
custom_nodes/eden_comfy_pipelines/img_utils/image.png filter=lfs diff=lfs merge=lfs -text
|
| 999 |
+
custom_nodes/mikey_nodes/HaldCLUT/Agfa[[:space:]]Vista[[:space:]]200.png filter=lfs diff=lfs merge=lfs -text
|
| 1000 |
+
custom_nodes/mikey_nodes/HaldCLUT/Anime.png filter=lfs diff=lfs merge=lfs -text
|
| 1001 |
+
custom_nodes/mikey_nodes/HaldCLUT/CandleLight.png filter=lfs diff=lfs merge=lfs -text
|
| 1002 |
+
custom_nodes/mikey_nodes/HaldCLUT/ColorNegative.png filter=lfs diff=lfs merge=lfs -text
|
| 1003 |
+
custom_nodes/mikey_nodes/HaldCLUT/Fuji[[:space:]]Velvia[[:space:]]50.png filter=lfs diff=lfs merge=lfs -text
|
| 1004 |
+
custom_nodes/mikey_nodes/HaldCLUT/Ilford[[:space:]]HP5.png filter=lfs diff=lfs merge=lfs -text
|
| 1005 |
+
custom_nodes/mikey_nodes/HaldCLUT/Kodak[[:space:]]ColorPlus[[:space:]]200.png filter=lfs diff=lfs merge=lfs -text
|
| 1006 |
+
custom_nodes/mikey_nodes/HaldCLUT/Kodak[[:space:]]Ektachrome[[:space:]]100.png filter=lfs diff=lfs merge=lfs -text
|
| 1007 |
+
custom_nodes/mikey_nodes/HaldCLUT/Kodak[[:space:]]Gold[[:space:]]200.png filter=lfs diff=lfs merge=lfs -text
|
| 1008 |
+
custom_nodes/mikey_nodes/HaldCLUT/Kodak[[:space:]]Kodachrome[[:space:]]64.png filter=lfs diff=lfs merge=lfs -text
|
| 1009 |
+
custom_nodes/mikey_nodes/HaldCLUT/Kodak[[:space:]]TRI-X[[:space:]]400.png filter=lfs diff=lfs merge=lfs -text
|
| 1010 |
+
custom_nodes/mikey_nodes/HaldCLUT/TealMagentaGold.png filter=lfs diff=lfs merge=lfs -text
|
| 1011 |
+
custom_nodes/mikey_nodes/HaldCLUT/broadcast.png filter=lfs diff=lfs merge=lfs -text
|
| 1012 |
+
custom_nodes/mikey_nodes/HaldCLUT/bw.png filter=lfs diff=lfs merge=lfs -text
|
| 1013 |
+
custom_nodes/mikey_nodes/HaldCLUT/clipped.png filter=lfs diff=lfs merge=lfs -text
|
| 1014 |
+
custom_nodes/mikey_nodes/HaldCLUT/dd.png filter=lfs diff=lfs merge=lfs -text
|
| 1015 |
+
custom_nodes/mikey_nodes/HaldCLUT/h8.png filter=lfs diff=lfs merge=lfs -text
|
| 1016 |
+
custom_nodes/mikey_nodes/HaldCLUT/lit.png filter=lfs diff=lfs merge=lfs -text
|
| 1017 |
+
custom_nodes/mikey_nodes/HaldCLUT/modern.png filter=lfs diff=lfs merge=lfs -text
|
| 1018 |
+
custom_nodes/mikey_nodes/HaldCLUT/preset.png filter=lfs diff=lfs merge=lfs -text
|
| 1019 |
+
custom_nodes/mikey_nodes/HaldCLUT/retro.png filter=lfs diff=lfs merge=lfs -text
|
| 1020 |
+
custom_nodes/mikey_nodes/noise.png filter=lfs diff=lfs merge=lfs -text
|
| 1021 |
+
custom_nodes/mikey_nodes/noise_bw.png filter=lfs diff=lfs merge=lfs -text
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Angry Frown.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Big Laugh.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Kiss Lips Chin Down.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Kiss Lips Chin Up.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Laughing.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Scared.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Shy.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Smile Looking At Viewer.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Smile looking away from viewer.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Smolder.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/Surprised.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/confident smile (closed lips).exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/confused.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/disappointed.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/dumbfounded.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/excited (big eyes).exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/focused (concentrated stare).exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/sad.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/skeptical.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/tired (heavy eyes).exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/wink.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/exp_data/yawning.exp
ADDED
|
Binary file (1.04 kB). View file
|
|
|
custom_nodes/ComfyUI-AdvancedLivePortrait/nodes.py
CHANGED
|
@@ -523,8 +523,8 @@ def logging_time(original_fn):
|
|
| 523 |
return wrapper_fn
|
| 524 |
|
| 525 |
|
| 526 |
-
|
| 527 |
-
exp_data_dir = os.path.join(folder_paths.output_directory, "exp_data")
|
| 528 |
if os.path.isdir(exp_data_dir) == False:
|
| 529 |
os.mkdir(exp_data_dir)
|
| 530 |
class SaveExpData:
|
|
|
|
| 523 |
return wrapper_fn
|
| 524 |
|
| 525 |
|
| 526 |
+
exp_data_dir = os.path.join(current_directory, "exp_data")
|
| 527 |
+
# exp_data_dir = os.path.join(folder_paths.output_directory, "exp_data")
|
| 528 |
if os.path.isdir(exp_data_dir) == False:
|
| 529 |
os.mkdir(exp_data_dir)
|
| 530 |
class SaveExpData:
|
custom_nodes/ComfyUI-CogVideoXWrapper/custom_cogvideox_transformer_3d.py
CHANGED
|
@@ -46,40 +46,9 @@ except:
|
|
| 46 |
|
| 47 |
from comfy.ldm.modules.attention import optimized_attention
|
| 48 |
|
| 49 |
-
|
| 50 |
-
def
|
| 51 |
-
|
| 52 |
-
def func(q, k, v, is_causal=False, attn_mask=None):
|
| 53 |
-
return F.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask, dropout_p=0.0, is_causal=is_causal)
|
| 54 |
-
return func
|
| 55 |
-
elif attention_mode == "comfy":
|
| 56 |
-
def func(q, k, v, is_causal=False, attn_mask=None):
|
| 57 |
-
return optimized_attention(q, k, v, mask=attn_mask, heads=heads, skip_reshape=True)
|
| 58 |
-
return func
|
| 59 |
-
|
| 60 |
-
elif attention_mode == "sageattn" or attention_mode == "fused_sageattn":
|
| 61 |
-
@torch.compiler.disable()
|
| 62 |
-
def func(q, k, v, is_causal=False, attn_mask=None):
|
| 63 |
-
return sageattn(q, k, v, is_causal=is_causal, attn_mask=attn_mask)
|
| 64 |
-
return func
|
| 65 |
-
elif attention_mode == "sageattn_qk_int8_pv_fp16_cuda":
|
| 66 |
-
from sageattention import sageattn_qk_int8_pv_fp16_cuda
|
| 67 |
-
@torch.compiler.disable()
|
| 68 |
-
def func(q, k, v, is_causal=False, attn_mask=None):
|
| 69 |
-
return sageattn_qk_int8_pv_fp16_cuda(q, k, v, is_causal=is_causal, attn_mask=attn_mask, pv_accum_dtype="fp32")
|
| 70 |
-
return func
|
| 71 |
-
elif attention_mode == "sageattn_qk_int8_pv_fp16_triton":
|
| 72 |
-
from sageattention import sageattn_qk_int8_pv_fp16_triton
|
| 73 |
-
@torch.compiler.disable()
|
| 74 |
-
def func(q, k, v, is_causal=False, attn_mask=None):
|
| 75 |
-
return sageattn_qk_int8_pv_fp16_triton(q, k, v, is_causal=is_causal, attn_mask=attn_mask)
|
| 76 |
-
return func
|
| 77 |
-
elif attention_mode == "sageattn_qk_int8_pv_fp8_cuda":
|
| 78 |
-
from sageattention import sageattn_qk_int8_pv_fp8_cuda
|
| 79 |
-
@torch.compiler.disable()
|
| 80 |
-
def func(q, k, v, is_causal=False, attn_mask=None):
|
| 81 |
-
return sageattn_qk_int8_pv_fp8_cuda(q, k, v, is_causal=is_causal, attn_mask=attn_mask, pv_accum_dtype="fp32+fp32")
|
| 82 |
-
return func
|
| 83 |
|
| 84 |
def fft(tensor):
|
| 85 |
tensor_fft = torch.fft.fft2(tensor)
|
|
@@ -98,18 +67,16 @@ def fft(tensor):
|
|
| 98 |
|
| 99 |
return low_freq_fft, high_freq_fft
|
| 100 |
|
| 101 |
-
#region Attention
|
| 102 |
class CogVideoXAttnProcessor2_0:
|
| 103 |
r"""
|
| 104 |
Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
|
| 105 |
query and key vectors, but does not include spatial normalization.
|
| 106 |
"""
|
| 107 |
|
| 108 |
-
def __init__(self
|
| 109 |
if not hasattr(F, "scaled_dot_product_attention"):
|
| 110 |
raise ImportError("CogVideoXAttnProcessor requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")
|
| 111 |
-
|
| 112 |
-
self.attn_func = attn_func
|
| 113 |
def __call__(
|
| 114 |
self,
|
| 115 |
attn: Attention,
|
|
@@ -117,6 +84,7 @@ class CogVideoXAttnProcessor2_0:
|
|
| 117 |
encoder_hidden_states: torch.Tensor,
|
| 118 |
attention_mask: Optional[torch.Tensor] = None,
|
| 119 |
image_rotary_emb: Optional[torch.Tensor] = None,
|
|
|
|
| 120 |
) -> torch.Tensor:
|
| 121 |
text_seq_length = encoder_hidden_states.size(1)
|
| 122 |
|
|
@@ -133,7 +101,7 @@ class CogVideoXAttnProcessor2_0:
|
|
| 133 |
if attn.to_q.weight.dtype == torch.float16 or attn.to_q.weight.dtype == torch.bfloat16:
|
| 134 |
hidden_states = hidden_states.to(attn.to_q.weight.dtype)
|
| 135 |
|
| 136 |
-
if
|
| 137 |
query = attn.to_q(hidden_states)
|
| 138 |
key = attn.to_k(hidden_states)
|
| 139 |
value = attn.to_v(hidden_states)
|
|
@@ -160,10 +128,16 @@ class CogVideoXAttnProcessor2_0:
|
|
| 160 |
if not attn.is_cross_attention:
|
| 161 |
key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb)
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
if self.attention_mode != "comfy":
|
| 166 |
hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
# linear proj
|
| 169 |
hidden_states = attn.to_out[0](hidden_states)
|
|
@@ -229,15 +203,13 @@ class CogVideoXBlock(nn.Module):
|
|
| 229 |
ff_inner_dim: Optional[int] = None,
|
| 230 |
ff_bias: bool = True,
|
| 231 |
attention_out_bias: bool = True,
|
| 232 |
-
attention_mode: Optional[str] = "sdpa",
|
| 233 |
):
|
| 234 |
super().__init__()
|
| 235 |
|
| 236 |
# 1. Self Attention
|
| 237 |
self.norm1 = CogVideoXLayerNormZero(time_embed_dim, dim, norm_elementwise_affine, norm_eps, bias=True)
|
| 238 |
-
|
| 239 |
-
attn_func = set_attention_func(attention_mode, num_attention_heads)
|
| 240 |
|
|
|
|
| 241 |
self.attn1 = Attention(
|
| 242 |
query_dim=dim,
|
| 243 |
dim_head=attention_head_dim,
|
|
@@ -246,7 +218,7 @@ class CogVideoXBlock(nn.Module):
|
|
| 246 |
eps=1e-6,
|
| 247 |
bias=attention_bias,
|
| 248 |
out_bias=attention_out_bias,
|
| 249 |
-
processor=CogVideoXAttnProcessor2_0(
|
| 250 |
)
|
| 251 |
|
| 252 |
# 2. Feed Forward
|
|
@@ -275,6 +247,7 @@ class CogVideoXBlock(nn.Module):
|
|
| 275 |
fastercache_counter=0,
|
| 276 |
fastercache_start_step=15,
|
| 277 |
fastercache_device="cuda:0",
|
|
|
|
| 278 |
) -> torch.Tensor:
|
| 279 |
#print("hidden_states in block: ", hidden_states.shape) #1.5: torch.Size([2, 3200, 3072]) 10.: torch.Size([2, 6400, 3072])
|
| 280 |
text_seq_length = encoder_hidden_states.size(1)
|
|
@@ -313,6 +286,7 @@ class CogVideoXBlock(nn.Module):
|
|
| 313 |
hidden_states=norm_hidden_states,
|
| 314 |
encoder_hidden_states=norm_encoder_hidden_states,
|
| 315 |
image_rotary_emb=image_rotary_emb,
|
|
|
|
| 316 |
)
|
| 317 |
if fastercache_counter == fastercache_start_step:
|
| 318 |
self.cached_hidden_states = [attn_hidden_states.to(fastercache_device), attn_hidden_states.to(fastercache_device)]
|
|
@@ -324,7 +298,8 @@ class CogVideoXBlock(nn.Module):
|
|
| 324 |
attn_hidden_states, attn_encoder_hidden_states = self.attn1(
|
| 325 |
hidden_states=norm_hidden_states,
|
| 326 |
encoder_hidden_states=norm_encoder_hidden_states,
|
| 327 |
-
image_rotary_emb=image_rotary_emb
|
|
|
|
| 328 |
)
|
| 329 |
|
| 330 |
hidden_states = hidden_states + gate_msa * attn_hidden_states
|
|
@@ -433,7 +408,6 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
|
|
| 433 |
use_rotary_positional_embeddings: bool = False,
|
| 434 |
use_learned_positional_embeddings: bool = False,
|
| 435 |
patch_bias: bool = True,
|
| 436 |
-
attention_mode: Optional[str] = "sdpa",
|
| 437 |
):
|
| 438 |
super().__init__()
|
| 439 |
inner_dim = num_attention_heads * attention_head_dim
|
|
@@ -487,7 +461,6 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
|
|
| 487 |
dropout=dropout,
|
| 488 |
activation_fn=activation_fn,
|
| 489 |
attention_bias=attention_bias,
|
| 490 |
-
attention_mode=attention_mode,
|
| 491 |
norm_elementwise_affine=norm_elementwise_affine,
|
| 492 |
norm_eps=norm_eps,
|
| 493 |
)
|
|
@@ -523,12 +496,73 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
|
|
| 523 |
self.fastercache_hf_step = 30
|
| 524 |
self.fastercache_device = "cuda"
|
| 525 |
self.fastercache_num_blocks_to_cache = len(self.transformer_blocks)
|
| 526 |
-
self.attention_mode =
|
| 527 |
|
| 528 |
|
| 529 |
def _set_gradient_checkpointing(self, module, value=False):
|
| 530 |
self.gradient_checkpointing = value
|
| 531 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
def forward(
|
| 533 |
self,
|
| 534 |
hidden_states: torch.Tensor,
|
|
@@ -590,7 +624,8 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
|
|
| 590 |
block_use_fastercache = i <= self.fastercache_num_blocks_to_cache,
|
| 591 |
fastercache_counter = self.fastercache_counter,
|
| 592 |
fastercache_start_step = self.fastercache_start_step,
|
| 593 |
-
fastercache_device = self.fastercache_device
|
|
|
|
| 594 |
)
|
| 595 |
|
| 596 |
if (controlnet_states is not None) and (i < len(controlnet_states)):
|
|
@@ -660,7 +695,8 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
|
|
| 660 |
block_use_fastercache = i <= self.fastercache_num_blocks_to_cache,
|
| 661 |
fastercache_counter = self.fastercache_counter,
|
| 662 |
fastercache_start_step = self.fastercache_start_step,
|
| 663 |
-
fastercache_device = self.fastercache_device
|
|
|
|
| 664 |
)
|
| 665 |
#has_nan = torch.isnan(hidden_states).any()
|
| 666 |
#if has_nan:
|
|
@@ -718,4 +754,4 @@ class CogVideoXTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
|
|
| 718 |
if not return_dict:
|
| 719 |
return (output,)
|
| 720 |
return Transformer2DModelOutput(sample=output)
|
| 721 |
-
|
|
|
|
| 46 |
|
| 47 |
from comfy.ldm.modules.attention import optimized_attention
|
| 48 |
|
| 49 |
+
@torch.compiler.disable()
|
| 50 |
+
def sageattn_func(query, key, value, attn_mask=None, dropout_p=0.0,is_causal=False):
|
| 51 |
+
return sageattn(query, key, value, attn_mask=attn_mask, dropout_p=dropout_p,is_causal=is_causal)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
def fft(tensor):
|
| 54 |
tensor_fft = torch.fft.fft2(tensor)
|
|
|
|
| 67 |
|
| 68 |
return low_freq_fft, high_freq_fft
|
| 69 |
|
|
|
|
| 70 |
class CogVideoXAttnProcessor2_0:
|
| 71 |
r"""
|
| 72 |
Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
|
| 73 |
query and key vectors, but does not include spatial normalization.
|
| 74 |
"""
|
| 75 |
|
| 76 |
+
def __init__(self):
|
| 77 |
if not hasattr(F, "scaled_dot_product_attention"):
|
| 78 |
raise ImportError("CogVideoXAttnProcessor requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")
|
| 79 |
+
|
|
|
|
| 80 |
def __call__(
|
| 81 |
self,
|
| 82 |
attn: Attention,
|
|
|
|
| 84 |
encoder_hidden_states: torch.Tensor,
|
| 85 |
attention_mask: Optional[torch.Tensor] = None,
|
| 86 |
image_rotary_emb: Optional[torch.Tensor] = None,
|
| 87 |
+
attention_mode: Optional[str] = None,
|
| 88 |
) -> torch.Tensor:
|
| 89 |
text_seq_length = encoder_hidden_states.size(1)
|
| 90 |
|
|
|
|
| 101 |
if attn.to_q.weight.dtype == torch.float16 or attn.to_q.weight.dtype == torch.bfloat16:
|
| 102 |
hidden_states = hidden_states.to(attn.to_q.weight.dtype)
|
| 103 |
|
| 104 |
+
if attention_mode != "fused_sdpa" or attention_mode != "fused_sageattn":
|
| 105 |
query = attn.to_q(hidden_states)
|
| 106 |
key = attn.to_k(hidden_states)
|
| 107 |
value = attn.to_v(hidden_states)
|
|
|
|
| 128 |
if not attn.is_cross_attention:
|
| 129 |
key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb)
|
| 130 |
|
| 131 |
+
if attention_mode == "sageattn" or attention_mode == "fused_sageattn":
|
| 132 |
+
hidden_states = sageattn_func(query, key, value, attn_mask=attention_mask, dropout_p=0.0,is_causal=False)
|
|
|
|
| 133 |
hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
|
| 134 |
+
elif attention_mode == "sdpa" or attention_mode == "fused_sdpa":
|
| 135 |
+
hidden_states = F.scaled_dot_product_attention(
|
| 136 |
+
query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
|
| 137 |
+
)
|
| 138 |
+
hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
|
| 139 |
+
elif attention_mode == "comfy":
|
| 140 |
+
hidden_states = optimized_attention(query, key, value, mask=attention_mask, heads=attn.heads, skip_reshape=True)
|
| 141 |
|
| 142 |
# linear proj
|
| 143 |
hidden_states = attn.to_out[0](hidden_states)
|
|
|
|
| 203 |
ff_inner_dim: Optional[int] = None,
|
| 204 |
ff_bias: bool = True,
|
| 205 |
attention_out_bias: bool = True,
|
|
|
|
| 206 |
):
|
| 207 |
super().__init__()
|
| 208 |
|
| 209 |
# 1. Self Attention
|
| 210 |
self.norm1 = CogVideoXLayerNormZero(time_embed_dim, dim, norm_elementwise_affine, norm_eps, bias=True)
|
|
|
|
|
|
|
| 211 |
|
| 212 |
+
|
| 213 |
self.attn1 = Attention(
|
| 214 |
query_dim=dim,
|
| 215 |
dim_head=attention_head_dim,
|
|
|
|
| 218 |
eps=1e-6,
|
| 219 |
bias=attention_bias,
|
| 220 |
out_bias=attention_out_bias,
|
| 221 |
+
processor=CogVideoXAttnProcessor2_0(),
|
| 222 |
)
|
| 223 |
|
| 224 |
# 2. Feed Forward
|
|
|
|
| 247 |
fastercache_counter=0,
|
| 248 |
fastercache_start_step=15,
|
| 249 |
fastercache_device="cuda:0",
|
| 250 |
+
attention_mode="sdpa",
|
| 251 |
) -> torch.Tensor:
|
| 252 |
#print("hidden_states in block: ", hidden_states.shape) #1.5: torch.Size([2, 3200, 3072]) 10.: torch.Size([2, 6400, 3072])
|
| 253 |
text_seq_length = encoder_hidden_states.size(1)
|
|
|
|
| 286 |
hidden_states=norm_hidden_states,
|
| 287 |
encoder_hidden_states=norm_encoder_hidden_states,
|
| 288 |
image_rotary_emb=image_rotary_emb,
|
| 289 |
+
attention_mode=attention_mode,
|
| 290 |
)
|
| 291 |
if fastercache_counter == fastercache_start_step:
|
| 292 |
self.cached_hidden_states = [attn_hidden_states.to(fastercache_device), attn_hidden_states.to(fastercache_device)]
|
|
|
|
| 298 |
attn_hidden_states, attn_encoder_hidden_states = self.attn1(
|
| 299 |
hidden_states=norm_hidden_states,
|
| 300 |
encoder_hidden_states=norm_encoder_hidden_states,
|
| 301 |
+
image_rotary_emb=image_rotary_emb,
|
| 302 |
+
attention_mode=attention_mode,
|
| 303 |
)
|
| 304 |
|
| 305 |
hidden_states = hidden_states + gate_msa * attn_hidden_states
|
|
|
|
| 408 |
use_rotary_positional_embeddings: bool = False,
|
| 409 |
use_learned_positional_embeddings: bool = False,
|
| 410 |
patch_bias: bool = True,
|
|
|
|
| 411 |
):
|
| 412 |
super().__init__()
|
| 413 |
inner_dim = num_attention_heads * attention_head_dim
|
|
|
|
| 461 |
dropout=dropout,
|
| 462 |
activation_fn=activation_fn,
|
| 463 |
attention_bias=attention_bias,
|
|
|
|
| 464 |
norm_elementwise_affine=norm_elementwise_affine,
|
| 465 |
norm_eps=norm_eps,
|
| 466 |
)
|
|
|
|
| 496 |
self.fastercache_hf_step = 30
|
| 497 |
self.fastercache_device = "cuda"
|
| 498 |
self.fastercache_num_blocks_to_cache = len(self.transformer_blocks)
|
| 499 |
+
self.attention_mode = "sdpa"
|
| 500 |
|
| 501 |
|
| 502 |
def _set_gradient_checkpointing(self, module, value=False):
|
| 503 |
self.gradient_checkpointing = value
|
| 504 |
+
|
| 505 |
+
@property
|
| 506 |
+
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.attn_processors
|
| 507 |
+
def attn_processors(self) -> Dict[str, AttentionProcessor]:
|
| 508 |
+
r"""
|
| 509 |
+
Returns:
|
| 510 |
+
`dict` of attention processors: A dictionary containing all attention processors used in the model with
|
| 511 |
+
indexed by its weight name.
|
| 512 |
+
"""
|
| 513 |
+
# set recursively
|
| 514 |
+
processors = {}
|
| 515 |
+
|
| 516 |
+
def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
|
| 517 |
+
if hasattr(module, "get_processor"):
|
| 518 |
+
processors[f"{name}.processor"] = module.get_processor()
|
| 519 |
+
|
| 520 |
+
for sub_name, child in module.named_children():
|
| 521 |
+
fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
|
| 522 |
+
|
| 523 |
+
return processors
|
| 524 |
+
|
| 525 |
+
for name, module in self.named_children():
|
| 526 |
+
fn_recursive_add_processors(name, module, processors)
|
| 527 |
+
|
| 528 |
+
return processors
|
| 529 |
+
|
| 530 |
+
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_attn_processor
|
| 531 |
+
def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
|
| 532 |
+
r"""
|
| 533 |
+
Sets the attention processor to use to compute attention.
|
| 534 |
+
|
| 535 |
+
Parameters:
|
| 536 |
+
processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
|
| 537 |
+
The instantiated processor class or a dictionary of processor classes that will be set as the processor
|
| 538 |
+
for **all** `Attention` layers.
|
| 539 |
+
|
| 540 |
+
If `processor` is a dict, the key needs to define the path to the corresponding cross attention
|
| 541 |
+
processor. This is strongly recommended when setting trainable attention processors.
|
| 542 |
+
|
| 543 |
+
"""
|
| 544 |
+
count = len(self.attn_processors.keys())
|
| 545 |
+
|
| 546 |
+
if isinstance(processor, dict) and len(processor) != count:
|
| 547 |
+
raise ValueError(
|
| 548 |
+
f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
|
| 549 |
+
f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
|
| 550 |
+
)
|
| 551 |
+
|
| 552 |
+
def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
|
| 553 |
+
if hasattr(module, "set_processor"):
|
| 554 |
+
if not isinstance(processor, dict):
|
| 555 |
+
module.set_processor(processor)
|
| 556 |
+
else:
|
| 557 |
+
module.set_processor(processor.pop(f"{name}.processor"))
|
| 558 |
+
|
| 559 |
+
for sub_name, child in module.named_children():
|
| 560 |
+
fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
|
| 561 |
+
|
| 562 |
+
for name, module in self.named_children():
|
| 563 |
+
fn_recursive_attn_processor(name, module, processor)
|
| 564 |
+
|
| 565 |
+
|
| 566 |
def forward(
|
| 567 |
self,
|
| 568 |
hidden_states: torch.Tensor,
|
|
|
|
| 624 |
block_use_fastercache = i <= self.fastercache_num_blocks_to_cache,
|
| 625 |
fastercache_counter = self.fastercache_counter,
|
| 626 |
fastercache_start_step = self.fastercache_start_step,
|
| 627 |
+
fastercache_device = self.fastercache_device,
|
| 628 |
+
attention_mode = self.attention_mode
|
| 629 |
)
|
| 630 |
|
| 631 |
if (controlnet_states is not None) and (i < len(controlnet_states)):
|
|
|
|
| 695 |
block_use_fastercache = i <= self.fastercache_num_blocks_to_cache,
|
| 696 |
fastercache_counter = self.fastercache_counter,
|
| 697 |
fastercache_start_step = self.fastercache_start_step,
|
| 698 |
+
fastercache_device = self.fastercache_device,
|
| 699 |
+
attention_mode = self.attention_mode
|
| 700 |
)
|
| 701 |
#has_nan = torch.isnan(hidden_states).any()
|
| 702 |
#if has_nan:
|
|
|
|
| 754 |
if not return_dict:
|
| 755 |
return (output,)
|
| 756 |
return Transformer2DModelOutput(sample=output)
|
| 757 |
+
|
custom_nodes/ComfyUI-CogVideoXWrapper/examples/cogvideox_Fun_180_orbit_01.json
ADDED
|
@@ -0,0 +1,1922 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"last_node_id": 73,
|
| 3 |
+
"last_link_id": 165,
|
| 4 |
+
"nodes": [
|
| 5 |
+
{
|
| 6 |
+
"id": 20,
|
| 7 |
+
"type": "CLIPLoader",
|
| 8 |
+
"pos": {
|
| 9 |
+
"0": -27,
|
| 10 |
+
"1": 42
|
| 11 |
+
},
|
| 12 |
+
"size": {
|
| 13 |
+
"0": 451.30548095703125,
|
| 14 |
+
"1": 82
|
| 15 |
+
},
|
| 16 |
+
"flags": {},
|
| 17 |
+
"order": 0,
|
| 18 |
+
"mode": 0,
|
| 19 |
+
"inputs": [],
|
| 20 |
+
"outputs": [
|
| 21 |
+
{
|
| 22 |
+
"name": "CLIP",
|
| 23 |
+
"type": "CLIP",
|
| 24 |
+
"links": [
|
| 25 |
+
54
|
| 26 |
+
],
|
| 27 |
+
"slot_index": 0,
|
| 28 |
+
"shape": 3
|
| 29 |
+
}
|
| 30 |
+
],
|
| 31 |
+
"properties": {
|
| 32 |
+
"Node name for S&R": "CLIPLoader"
|
| 33 |
+
},
|
| 34 |
+
"widgets_values": [
|
| 35 |
+
"t5\\google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
|
| 36 |
+
"sd3"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"id": 52,
|
| 41 |
+
"type": "CogVideoLoraSelect",
|
| 42 |
+
"pos": {
|
| 43 |
+
"0": -3,
|
| 44 |
+
"1": -383
|
| 45 |
+
},
|
| 46 |
+
"size": [
|
| 47 |
+
438.44762263180314,
|
| 48 |
+
106
|
| 49 |
+
],
|
| 50 |
+
"flags": {},
|
| 51 |
+
"order": 1,
|
| 52 |
+
"mode": 0,
|
| 53 |
+
"inputs": [
|
| 54 |
+
{
|
| 55 |
+
"name": "prev_lora",
|
| 56 |
+
"type": "COGLORA",
|
| 57 |
+
"link": null,
|
| 58 |
+
"shape": 7
|
| 59 |
+
}
|
| 60 |
+
],
|
| 61 |
+
"outputs": [
|
| 62 |
+
{
|
| 63 |
+
"name": "lora",
|
| 64 |
+
"type": "COGLORA",
|
| 65 |
+
"links": [
|
| 66 |
+
124
|
| 67 |
+
]
|
| 68 |
+
}
|
| 69 |
+
],
|
| 70 |
+
"properties": {
|
| 71 |
+
"Node name for S&R": "CogVideoLoraSelect"
|
| 72 |
+
},
|
| 73 |
+
"widgets_values": [
|
| 74 |
+
"DimensionX_orbit_left_lora_rank256_bf16.safetensors",
|
| 75 |
+
1,
|
| 76 |
+
true
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"id": 55,
|
| 81 |
+
"type": "ImageFlip+",
|
| 82 |
+
"pos": {
|
| 83 |
+
"0": 1247,
|
| 84 |
+
"1": 770
|
| 85 |
+
},
|
| 86 |
+
"size": {
|
| 87 |
+
"0": 315,
|
| 88 |
+
"1": 58
|
| 89 |
+
},
|
| 90 |
+
"flags": {},
|
| 91 |
+
"order": 22,
|
| 92 |
+
"mode": 0,
|
| 93 |
+
"inputs": [
|
| 94 |
+
{
|
| 95 |
+
"name": "image",
|
| 96 |
+
"type": "IMAGE",
|
| 97 |
+
"link": 130
|
| 98 |
+
}
|
| 99 |
+
],
|
| 100 |
+
"outputs": [
|
| 101 |
+
{
|
| 102 |
+
"name": "IMAGE",
|
| 103 |
+
"type": "IMAGE",
|
| 104 |
+
"links": [
|
| 105 |
+
131,
|
| 106 |
+
151
|
| 107 |
+
],
|
| 108 |
+
"slot_index": 0
|
| 109 |
+
}
|
| 110 |
+
],
|
| 111 |
+
"properties": {
|
| 112 |
+
"Node name for S&R": "ImageFlip+"
|
| 113 |
+
},
|
| 114 |
+
"widgets_values": [
|
| 115 |
+
"x"
|
| 116 |
+
]
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"id": 54,
|
| 120 |
+
"type": "ImageFlip+",
|
| 121 |
+
"pos": {
|
| 122 |
+
"0": 847,
|
| 123 |
+
"1": 802
|
| 124 |
+
},
|
| 125 |
+
"size": {
|
| 126 |
+
"0": 315,
|
| 127 |
+
"1": 58
|
| 128 |
+
},
|
| 129 |
+
"flags": {},
|
| 130 |
+
"order": 10,
|
| 131 |
+
"mode": 0,
|
| 132 |
+
"inputs": [
|
| 133 |
+
{
|
| 134 |
+
"name": "image",
|
| 135 |
+
"type": "IMAGE",
|
| 136 |
+
"link": 128
|
| 137 |
+
}
|
| 138 |
+
],
|
| 139 |
+
"outputs": [
|
| 140 |
+
{
|
| 141 |
+
"name": "IMAGE",
|
| 142 |
+
"type": "IMAGE",
|
| 143 |
+
"links": [
|
| 144 |
+
129
|
| 145 |
+
],
|
| 146 |
+
"slot_index": 0
|
| 147 |
+
}
|
| 148 |
+
],
|
| 149 |
+
"properties": {
|
| 150 |
+
"Node name for S&R": "ImageFlip+"
|
| 151 |
+
},
|
| 152 |
+
"widgets_values": [
|
| 153 |
+
"x"
|
| 154 |
+
]
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"id": 50,
|
| 158 |
+
"type": "CogVideoImageEncodeFunInP",
|
| 159 |
+
"pos": {
|
| 160 |
+
"0": 865,
|
| 161 |
+
"1": 567
|
| 162 |
+
},
|
| 163 |
+
"size": [
|
| 164 |
+
253.60000610351562,
|
| 165 |
+
146
|
| 166 |
+
],
|
| 167 |
+
"flags": {},
|
| 168 |
+
"order": 13,
|
| 169 |
+
"mode": 0,
|
| 170 |
+
"inputs": [
|
| 171 |
+
{
|
| 172 |
+
"name": "vae",
|
| 173 |
+
"type": "VAE",
|
| 174 |
+
"link": 119
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"name": "start_image",
|
| 178 |
+
"type": "IMAGE",
|
| 179 |
+
"link": 129
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"name": "end_image",
|
| 183 |
+
"type": "IMAGE",
|
| 184 |
+
"link": null,
|
| 185 |
+
"shape": 7
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"name": "num_frames",
|
| 189 |
+
"type": "INT",
|
| 190 |
+
"link": 126,
|
| 191 |
+
"widget": {
|
| 192 |
+
"name": "num_frames"
|
| 193 |
+
}
|
| 194 |
+
}
|
| 195 |
+
],
|
| 196 |
+
"outputs": [
|
| 197 |
+
{
|
| 198 |
+
"name": "image_cond_latents",
|
| 199 |
+
"type": "LATENT",
|
| 200 |
+
"links": [
|
| 201 |
+
120
|
| 202 |
+
],
|
| 203 |
+
"slot_index": 0
|
| 204 |
+
}
|
| 205 |
+
],
|
| 206 |
+
"properties": {
|
| 207 |
+
"Node name for S&R": "CogVideoImageEncodeFunInP"
|
| 208 |
+
},
|
| 209 |
+
"widgets_values": [
|
| 210 |
+
33,
|
| 211 |
+
true,
|
| 212 |
+
0.03
|
| 213 |
+
]
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"id": 63,
|
| 217 |
+
"type": "CogVideoImageEncodeFunInP",
|
| 218 |
+
"pos": {
|
| 219 |
+
"0": 936.3893432617188,
|
| 220 |
+
"1": 1048.5242919921875
|
| 221 |
+
},
|
| 222 |
+
"size": {
|
| 223 |
+
"0": 253.60000610351562,
|
| 224 |
+
"1": 146
|
| 225 |
+
},
|
| 226 |
+
"flags": {},
|
| 227 |
+
"order": 11,
|
| 228 |
+
"mode": 0,
|
| 229 |
+
"inputs": [
|
| 230 |
+
{
|
| 231 |
+
"name": "vae",
|
| 232 |
+
"type": "VAE",
|
| 233 |
+
"link": 144
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"name": "start_image",
|
| 237 |
+
"type": "IMAGE",
|
| 238 |
+
"link": 146
|
| 239 |
+
},
|
| 240 |
+
{
|
| 241 |
+
"name": "end_image",
|
| 242 |
+
"type": "IMAGE",
|
| 243 |
+
"link": null,
|
| 244 |
+
"shape": 7
|
| 245 |
+
},
|
| 246 |
+
{
|
| 247 |
+
"name": "num_frames",
|
| 248 |
+
"type": "INT",
|
| 249 |
+
"link": 145,
|
| 250 |
+
"widget": {
|
| 251 |
+
"name": "num_frames"
|
| 252 |
+
}
|
| 253 |
+
}
|
| 254 |
+
],
|
| 255 |
+
"outputs": [
|
| 256 |
+
{
|
| 257 |
+
"name": "image_cond_latents",
|
| 258 |
+
"type": "LATENT",
|
| 259 |
+
"links": [
|
| 260 |
+
147
|
| 261 |
+
],
|
| 262 |
+
"slot_index": 0
|
| 263 |
+
}
|
| 264 |
+
],
|
| 265 |
+
"properties": {
|
| 266 |
+
"Node name for S&R": "CogVideoImageEncodeFunInP"
|
| 267 |
+
},
|
| 268 |
+
"widgets_values": [
|
| 269 |
+
33,
|
| 270 |
+
true,
|
| 271 |
+
0.03
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"id": 51,
|
| 276 |
+
"type": "CogVideoDecode",
|
| 277 |
+
"pos": {
|
| 278 |
+
"0": 1219,
|
| 279 |
+
"1": -134
|
| 280 |
+
},
|
| 281 |
+
"size": {
|
| 282 |
+
"0": 315,
|
| 283 |
+
"1": 198
|
| 284 |
+
},
|
| 285 |
+
"flags": {},
|
| 286 |
+
"order": 20,
|
| 287 |
+
"mode": 0,
|
| 288 |
+
"inputs": [
|
| 289 |
+
{
|
| 290 |
+
"name": "vae",
|
| 291 |
+
"type": "VAE",
|
| 292 |
+
"link": 122
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"name": "samples",
|
| 296 |
+
"type": "LATENT",
|
| 297 |
+
"link": 123
|
| 298 |
+
}
|
| 299 |
+
],
|
| 300 |
+
"outputs": [
|
| 301 |
+
{
|
| 302 |
+
"name": "images",
|
| 303 |
+
"type": "IMAGE",
|
| 304 |
+
"links": [
|
| 305 |
+
130
|
| 306 |
+
],
|
| 307 |
+
"slot_index": 0
|
| 308 |
+
}
|
| 309 |
+
],
|
| 310 |
+
"properties": {
|
| 311 |
+
"Node name for S&R": "CogVideoDecode"
|
| 312 |
+
},
|
| 313 |
+
"widgets_values": [
|
| 314 |
+
true,
|
| 315 |
+
240,
|
| 316 |
+
360,
|
| 317 |
+
0.2,
|
| 318 |
+
0.2,
|
| 319 |
+
true
|
| 320 |
+
]
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"id": 53,
|
| 324 |
+
"type": "PrimitiveNode",
|
| 325 |
+
"pos": {
|
| 326 |
+
"0": 117,
|
| 327 |
+
"1": 399
|
| 328 |
+
},
|
| 329 |
+
"size": [
|
| 330 |
+
261.57286031534534,
|
| 331 |
+
82
|
| 332 |
+
],
|
| 333 |
+
"flags": {},
|
| 334 |
+
"order": 2,
|
| 335 |
+
"mode": 0,
|
| 336 |
+
"inputs": [],
|
| 337 |
+
"outputs": [
|
| 338 |
+
{
|
| 339 |
+
"name": "INT",
|
| 340 |
+
"type": "INT",
|
| 341 |
+
"links": [
|
| 342 |
+
126,
|
| 343 |
+
127,
|
| 344 |
+
143,
|
| 345 |
+
145
|
| 346 |
+
],
|
| 347 |
+
"widget": {
|
| 348 |
+
"name": "num_frames"
|
| 349 |
+
},
|
| 350 |
+
"slot_index": 0
|
| 351 |
+
}
|
| 352 |
+
],
|
| 353 |
+
"title": "num_frames",
|
| 354 |
+
"properties": {
|
| 355 |
+
"Run widget replace on values": false
|
| 356 |
+
},
|
| 357 |
+
"widgets_values": [
|
| 358 |
+
33,
|
| 359 |
+
"fixed"
|
| 360 |
+
]
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"id": 48,
|
| 364 |
+
"type": "CogVideoSampler",
|
| 365 |
+
"pos": {
|
| 366 |
+
"0": 1200,
|
| 367 |
+
"1": 124
|
| 368 |
+
},
|
| 369 |
+
"size": [
|
| 370 |
+
330,
|
| 371 |
+
574
|
| 372 |
+
],
|
| 373 |
+
"flags": {},
|
| 374 |
+
"order": 18,
|
| 375 |
+
"mode": 0,
|
| 376 |
+
"inputs": [
|
| 377 |
+
{
|
| 378 |
+
"name": "model",
|
| 379 |
+
"type": "COGVIDEOMODEL",
|
| 380 |
+
"link": 114
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"name": "positive",
|
| 384 |
+
"type": "CONDITIONING",
|
| 385 |
+
"link": 116
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"name": "negative",
|
| 389 |
+
"type": "CONDITIONING",
|
| 390 |
+
"link": 117
|
| 391 |
+
},
|
| 392 |
+
{
|
| 393 |
+
"name": "samples",
|
| 394 |
+
"type": "LATENT",
|
| 395 |
+
"link": null,
|
| 396 |
+
"shape": 7
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"name": "image_cond_latents",
|
| 400 |
+
"type": "LATENT",
|
| 401 |
+
"link": 120,
|
| 402 |
+
"shape": 7
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"name": "context_options",
|
| 406 |
+
"type": "COGCONTEXT",
|
| 407 |
+
"link": null,
|
| 408 |
+
"shape": 7
|
| 409 |
+
},
|
| 410 |
+
{
|
| 411 |
+
"name": "controlnet",
|
| 412 |
+
"type": "COGVIDECONTROLNET",
|
| 413 |
+
"link": null,
|
| 414 |
+
"shape": 7
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"name": "tora_trajectory",
|
| 418 |
+
"type": "TORAFEATURES",
|
| 419 |
+
"link": null,
|
| 420 |
+
"shape": 7
|
| 421 |
+
},
|
| 422 |
+
{
|
| 423 |
+
"name": "fastercache",
|
| 424 |
+
"type": "FASTERCACHEARGS",
|
| 425 |
+
"link": null,
|
| 426 |
+
"shape": 7
|
| 427 |
+
},
|
| 428 |
+
{
|
| 429 |
+
"name": "num_frames",
|
| 430 |
+
"type": "INT",
|
| 431 |
+
"link": 127,
|
| 432 |
+
"widget": {
|
| 433 |
+
"name": "num_frames"
|
| 434 |
+
}
|
| 435 |
+
},
|
| 436 |
+
{
|
| 437 |
+
"name": "seed",
|
| 438 |
+
"type": "INT",
|
| 439 |
+
"link": 156,
|
| 440 |
+
"widget": {
|
| 441 |
+
"name": "seed"
|
| 442 |
+
}
|
| 443 |
+
}
|
| 444 |
+
],
|
| 445 |
+
"outputs": [
|
| 446 |
+
{
|
| 447 |
+
"name": "samples",
|
| 448 |
+
"type": "LATENT",
|
| 449 |
+
"links": [
|
| 450 |
+
123
|
| 451 |
+
],
|
| 452 |
+
"slot_index": 0
|
| 453 |
+
}
|
| 454 |
+
],
|
| 455 |
+
"properties": {
|
| 456 |
+
"Node name for S&R": "CogVideoSampler"
|
| 457 |
+
},
|
| 458 |
+
"widgets_values": [
|
| 459 |
+
33,
|
| 460 |
+
40,
|
| 461 |
+
6,
|
| 462 |
+
458091243358278,
|
| 463 |
+
"fixed",
|
| 464 |
+
"CogVideoXDDIM",
|
| 465 |
+
1
|
| 466 |
+
]
|
| 467 |
+
},
|
| 468 |
+
{
|
| 469 |
+
"id": 68,
|
| 470 |
+
"type": "PrimitiveNode",
|
| 471 |
+
"pos": {
|
| 472 |
+
"0": 514,
|
| 473 |
+
"1": 985
|
| 474 |
+
},
|
| 475 |
+
"size": [
|
| 476 |
+
295.90419649751334,
|
| 477 |
+
82
|
| 478 |
+
],
|
| 479 |
+
"flags": {},
|
| 480 |
+
"order": 3,
|
| 481 |
+
"mode": 0,
|
| 482 |
+
"inputs": [],
|
| 483 |
+
"outputs": [
|
| 484 |
+
{
|
| 485 |
+
"name": "INT",
|
| 486 |
+
"type": "INT",
|
| 487 |
+
"links": [
|
| 488 |
+
156,
|
| 489 |
+
157
|
| 490 |
+
],
|
| 491 |
+
"widget": {
|
| 492 |
+
"name": "seed"
|
| 493 |
+
},
|
| 494 |
+
"slot_index": 0
|
| 495 |
+
}
|
| 496 |
+
],
|
| 497 |
+
"title": "seed",
|
| 498 |
+
"properties": {
|
| 499 |
+
"Run widget replace on values": false
|
| 500 |
+
},
|
| 501 |
+
"widgets_values": [
|
| 502 |
+
458091243358278,
|
| 503 |
+
"fixed"
|
| 504 |
+
]
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"id": 69,
|
| 508 |
+
"type": "DownloadAndLoadFlorence2Model",
|
| 509 |
+
"pos": {
|
| 510 |
+
"0": -1305,
|
| 511 |
+
"1": -13
|
| 512 |
+
},
|
| 513 |
+
"size": [
|
| 514 |
+
442.37554309913344,
|
| 515 |
+
106
|
| 516 |
+
],
|
| 517 |
+
"flags": {},
|
| 518 |
+
"order": 4,
|
| 519 |
+
"mode": 0,
|
| 520 |
+
"inputs": [
|
| 521 |
+
{
|
| 522 |
+
"name": "lora",
|
| 523 |
+
"type": "PEFTLORA",
|
| 524 |
+
"link": null,
|
| 525 |
+
"shape": 7
|
| 526 |
+
}
|
| 527 |
+
],
|
| 528 |
+
"outputs": [
|
| 529 |
+
{
|
| 530 |
+
"name": "florence2_model",
|
| 531 |
+
"type": "FL2MODEL",
|
| 532 |
+
"links": [
|
| 533 |
+
158
|
| 534 |
+
],
|
| 535 |
+
"slot_index": 0
|
| 536 |
+
}
|
| 537 |
+
],
|
| 538 |
+
"properties": {
|
| 539 |
+
"Node name for S&R": "DownloadAndLoadFlorence2Model"
|
| 540 |
+
},
|
| 541 |
+
"widgets_values": [
|
| 542 |
+
"MiaoshouAI/Florence-2-base-PromptGen-v2.0",
|
| 543 |
+
"fp16",
|
| 544 |
+
"sdpa"
|
| 545 |
+
]
|
| 546 |
+
},
|
| 547 |
+
{
|
| 548 |
+
"id": 37,
|
| 549 |
+
"type": "ImageResizeKJ",
|
| 550 |
+
"pos": {
|
| 551 |
+
"0": -202,
|
| 552 |
+
"1": 588
|
| 553 |
+
},
|
| 554 |
+
"size": {
|
| 555 |
+
"0": 315,
|
| 556 |
+
"1": 266
|
| 557 |
+
},
|
| 558 |
+
"flags": {},
|
| 559 |
+
"order": 9,
|
| 560 |
+
"mode": 0,
|
| 561 |
+
"inputs": [
|
| 562 |
+
{
|
| 563 |
+
"name": "image",
|
| 564 |
+
"type": "IMAGE",
|
| 565 |
+
"link": 71
|
| 566 |
+
},
|
| 567 |
+
{
|
| 568 |
+
"name": "get_image_size",
|
| 569 |
+
"type": "IMAGE",
|
| 570 |
+
"link": null,
|
| 571 |
+
"shape": 7
|
| 572 |
+
},
|
| 573 |
+
{
|
| 574 |
+
"name": "width_input",
|
| 575 |
+
"type": "INT",
|
| 576 |
+
"link": null,
|
| 577 |
+
"widget": {
|
| 578 |
+
"name": "width_input"
|
| 579 |
+
}
|
| 580 |
+
},
|
| 581 |
+
{
|
| 582 |
+
"name": "height_input",
|
| 583 |
+
"type": "INT",
|
| 584 |
+
"link": null,
|
| 585 |
+
"widget": {
|
| 586 |
+
"name": "height_input"
|
| 587 |
+
}
|
| 588 |
+
}
|
| 589 |
+
],
|
| 590 |
+
"outputs": [
|
| 591 |
+
{
|
| 592 |
+
"name": "IMAGE",
|
| 593 |
+
"type": "IMAGE",
|
| 594 |
+
"links": [
|
| 595 |
+
128,
|
| 596 |
+
146,
|
| 597 |
+
159
|
| 598 |
+
],
|
| 599 |
+
"slot_index": 0,
|
| 600 |
+
"shape": 3
|
| 601 |
+
},
|
| 602 |
+
{
|
| 603 |
+
"name": "width",
|
| 604 |
+
"type": "INT",
|
| 605 |
+
"links": null,
|
| 606 |
+
"shape": 3
|
| 607 |
+
},
|
| 608 |
+
{
|
| 609 |
+
"name": "height",
|
| 610 |
+
"type": "INT",
|
| 611 |
+
"links": null,
|
| 612 |
+
"shape": 3
|
| 613 |
+
}
|
| 614 |
+
],
|
| 615 |
+
"properties": {
|
| 616 |
+
"Node name for S&R": "ImageResizeKJ"
|
| 617 |
+
},
|
| 618 |
+
"widgets_values": [
|
| 619 |
+
768,
|
| 620 |
+
768,
|
| 621 |
+
"lanczos",
|
| 622 |
+
true,
|
| 623 |
+
2,
|
| 624 |
+
0,
|
| 625 |
+
0,
|
| 626 |
+
"disabled"
|
| 627 |
+
]
|
| 628 |
+
},
|
| 629 |
+
{
|
| 630 |
+
"id": 71,
|
| 631 |
+
"type": "StringConstantMultiline",
|
| 632 |
+
"pos": {
|
| 633 |
+
"0": -709,
|
| 634 |
+
"1": 20
|
| 635 |
+
},
|
| 636 |
+
"size": {
|
| 637 |
+
"0": 400,
|
| 638 |
+
"1": 200
|
| 639 |
+
},
|
| 640 |
+
"flags": {},
|
| 641 |
+
"order": 5,
|
| 642 |
+
"mode": 0,
|
| 643 |
+
"inputs": [],
|
| 644 |
+
"outputs": [
|
| 645 |
+
{
|
| 646 |
+
"name": "STRING",
|
| 647 |
+
"type": "STRING",
|
| 648 |
+
"links": [
|
| 649 |
+
160
|
| 650 |
+
],
|
| 651 |
+
"slot_index": 0
|
| 652 |
+
}
|
| 653 |
+
],
|
| 654 |
+
"properties": {
|
| 655 |
+
"Node name for S&R": "StringConstantMultiline"
|
| 656 |
+
},
|
| 657 |
+
"widgets_values": [
|
| 658 |
+
"camera orbit",
|
| 659 |
+
false
|
| 660 |
+
]
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"id": 72,
|
| 664 |
+
"type": "JoinStrings",
|
| 665 |
+
"pos": {
|
| 666 |
+
"0": -232,
|
| 667 |
+
"1": 231
|
| 668 |
+
},
|
| 669 |
+
"size": [
|
| 670 |
+
315,
|
| 671 |
+
106
|
| 672 |
+
],
|
| 673 |
+
"flags": {},
|
| 674 |
+
"order": 15,
|
| 675 |
+
"mode": 0,
|
| 676 |
+
"inputs": [
|
| 677 |
+
{
|
| 678 |
+
"name": "string1",
|
| 679 |
+
"type": "STRING",
|
| 680 |
+
"link": 160,
|
| 681 |
+
"widget": {
|
| 682 |
+
"name": "string1"
|
| 683 |
+
}
|
| 684 |
+
},
|
| 685 |
+
{
|
| 686 |
+
"name": "string2",
|
| 687 |
+
"type": "STRING",
|
| 688 |
+
"link": 162,
|
| 689 |
+
"widget": {
|
| 690 |
+
"name": "string2"
|
| 691 |
+
}
|
| 692 |
+
}
|
| 693 |
+
],
|
| 694 |
+
"outputs": [
|
| 695 |
+
{
|
| 696 |
+
"name": "STRING",
|
| 697 |
+
"type": "STRING",
|
| 698 |
+
"links": [
|
| 699 |
+
163
|
| 700 |
+
],
|
| 701 |
+
"slot_index": 0
|
| 702 |
+
}
|
| 703 |
+
],
|
| 704 |
+
"properties": {
|
| 705 |
+
"Node name for S&R": "JoinStrings"
|
| 706 |
+
},
|
| 707 |
+
"widgets_values": [
|
| 708 |
+
"",
|
| 709 |
+
"",
|
| 710 |
+
", "
|
| 711 |
+
]
|
| 712 |
+
},
|
| 713 |
+
{
|
| 714 |
+
"id": 70,
|
| 715 |
+
"type": "Florence2Run",
|
| 716 |
+
"pos": {
|
| 717 |
+
"0": -1276,
|
| 718 |
+
"1": 170
|
| 719 |
+
},
|
| 720 |
+
"size": {
|
| 721 |
+
"0": 400,
|
| 722 |
+
"1": 352
|
| 723 |
+
},
|
| 724 |
+
"flags": {},
|
| 725 |
+
"order": 12,
|
| 726 |
+
"mode": 0,
|
| 727 |
+
"inputs": [
|
| 728 |
+
{
|
| 729 |
+
"name": "image",
|
| 730 |
+
"type": "IMAGE",
|
| 731 |
+
"link": 159
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"name": "florence2_model",
|
| 735 |
+
"type": "FL2MODEL",
|
| 736 |
+
"link": 158
|
| 737 |
+
}
|
| 738 |
+
],
|
| 739 |
+
"outputs": [
|
| 740 |
+
{
|
| 741 |
+
"name": "image",
|
| 742 |
+
"type": "IMAGE",
|
| 743 |
+
"links": null,
|
| 744 |
+
"slot_index": 0
|
| 745 |
+
},
|
| 746 |
+
{
|
| 747 |
+
"name": "mask",
|
| 748 |
+
"type": "MASK",
|
| 749 |
+
"links": null
|
| 750 |
+
},
|
| 751 |
+
{
|
| 752 |
+
"name": "caption",
|
| 753 |
+
"type": "STRING",
|
| 754 |
+
"links": [
|
| 755 |
+
161,
|
| 756 |
+
162
|
| 757 |
+
],
|
| 758 |
+
"slot_index": 2
|
| 759 |
+
},
|
| 760 |
+
{
|
| 761 |
+
"name": "data",
|
| 762 |
+
"type": "JSON",
|
| 763 |
+
"links": null
|
| 764 |
+
}
|
| 765 |
+
],
|
| 766 |
+
"properties": {
|
| 767 |
+
"Node name for S&R": "Florence2Run"
|
| 768 |
+
},
|
| 769 |
+
"widgets_values": [
|
| 770 |
+
"",
|
| 771 |
+
"more_detailed_caption",
|
| 772 |
+
true,
|
| 773 |
+
false,
|
| 774 |
+
226,
|
| 775 |
+
3,
|
| 776 |
+
true,
|
| 777 |
+
"",
|
| 778 |
+
586007018516875,
|
| 779 |
+
"fixed"
|
| 780 |
+
]
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"id": 73,
|
| 784 |
+
"type": "ShowText|pysssss",
|
| 785 |
+
"pos": {
|
| 786 |
+
"0": -793,
|
| 787 |
+
"1": 321
|
| 788 |
+
},
|
| 789 |
+
"size": [
|
| 790 |
+
502.3168660879171,
|
| 791 |
+
180.55015376950485
|
| 792 |
+
],
|
| 793 |
+
"flags": {},
|
| 794 |
+
"order": 14,
|
| 795 |
+
"mode": 0,
|
| 796 |
+
"inputs": [
|
| 797 |
+
{
|
| 798 |
+
"name": "text",
|
| 799 |
+
"type": "STRING",
|
| 800 |
+
"link": 161,
|
| 801 |
+
"widget": {
|
| 802 |
+
"name": "text"
|
| 803 |
+
}
|
| 804 |
+
}
|
| 805 |
+
],
|
| 806 |
+
"outputs": [
|
| 807 |
+
{
|
| 808 |
+
"name": "STRING",
|
| 809 |
+
"type": "STRING",
|
| 810 |
+
"links": null,
|
| 811 |
+
"shape": 6
|
| 812 |
+
}
|
| 813 |
+
],
|
| 814 |
+
"properties": {
|
| 815 |
+
"Node name for S&R": "ShowText|pysssss"
|
| 816 |
+
},
|
| 817 |
+
"widgets_values": [
|
| 818 |
+
"",
|
| 819 |
+
"A digital illustration shoot from a frontal camera angle about a dark knight in shining armor stands in a dimly lit forest, with a glowing fire in the background. the image also shows a mysterious and intense atmosphere. on the middle of the image, a male knight appears to be standing, facing the viewer, with his full body visible. he is wearing a full plate armor with a red cloth draped over his shoulders. the armor is shiny and detailed, with intricate designs and a chain attached to it. he has two curved horns on his head, and his eyes are glowing yellow. the background is dark and smoky, with tall trees and a warm, glowing fire."
|
| 820 |
+
]
|
| 821 |
+
},
|
| 822 |
+
{
|
| 823 |
+
"id": 59,
|
| 824 |
+
"type": "GIMMVFI_interpolate",
|
| 825 |
+
"pos": {
|
| 826 |
+
"0": 2880,
|
| 827 |
+
"1": -200
|
| 828 |
+
},
|
| 829 |
+
"size": {
|
| 830 |
+
"0": 330,
|
| 831 |
+
"1": 150
|
| 832 |
+
},
|
| 833 |
+
"flags": {},
|
| 834 |
+
"order": 27,
|
| 835 |
+
"mode": 0,
|
| 836 |
+
"inputs": [
|
| 837 |
+
{
|
| 838 |
+
"name": "gimmvfi_model",
|
| 839 |
+
"type": "GIMMVIF_MODEL",
|
| 840 |
+
"link": 134
|
| 841 |
+
},
|
| 842 |
+
{
|
| 843 |
+
"name": "images",
|
| 844 |
+
"type": "IMAGE",
|
| 845 |
+
"link": 165
|
| 846 |
+
}
|
| 847 |
+
],
|
| 848 |
+
"outputs": [
|
| 849 |
+
{
|
| 850 |
+
"name": "images",
|
| 851 |
+
"type": "IMAGE",
|
| 852 |
+
"links": [
|
| 853 |
+
164
|
| 854 |
+
],
|
| 855 |
+
"slot_index": 0
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"name": "flow_tensors",
|
| 859 |
+
"type": "IMAGE",
|
| 860 |
+
"links": null
|
| 861 |
+
}
|
| 862 |
+
],
|
| 863 |
+
"properties": {
|
| 864 |
+
"Node name for S&R": "GIMMVFI_interpolate"
|
| 865 |
+
},
|
| 866 |
+
"widgets_values": [
|
| 867 |
+
1,
|
| 868 |
+
2,
|
| 869 |
+
223874235763998,
|
| 870 |
+
"fixed"
|
| 871 |
+
]
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"id": 67,
|
| 875 |
+
"type": "ImageBatchMulti",
|
| 876 |
+
"pos": {
|
| 877 |
+
"0": 2900,
|
| 878 |
+
"1": 20
|
| 879 |
+
},
|
| 880 |
+
"size": {
|
| 881 |
+
"0": 210,
|
| 882 |
+
"1": 102
|
| 883 |
+
},
|
| 884 |
+
"flags": {},
|
| 885 |
+
"order": 26,
|
| 886 |
+
"mode": 0,
|
| 887 |
+
"inputs": [
|
| 888 |
+
{
|
| 889 |
+
"name": "image_1",
|
| 890 |
+
"type": "IMAGE",
|
| 891 |
+
"link": 152
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"name": "image_2",
|
| 895 |
+
"type": "IMAGE",
|
| 896 |
+
"link": 153
|
| 897 |
+
}
|
| 898 |
+
],
|
| 899 |
+
"outputs": [
|
| 900 |
+
{
|
| 901 |
+
"name": "images",
|
| 902 |
+
"type": "IMAGE",
|
| 903 |
+
"links": [
|
| 904 |
+
165
|
| 905 |
+
],
|
| 906 |
+
"slot_index": 0
|
| 907 |
+
}
|
| 908 |
+
],
|
| 909 |
+
"properties": {},
|
| 910 |
+
"widgets_values": [
|
| 911 |
+
2,
|
| 912 |
+
null
|
| 913 |
+
]
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"id": 66,
|
| 917 |
+
"type": "ReverseImageBatch",
|
| 918 |
+
"pos": {
|
| 919 |
+
"0": 2590,
|
| 920 |
+
"1": -20
|
| 921 |
+
},
|
| 922 |
+
"size": {
|
| 923 |
+
"0": 239.40000915527344,
|
| 924 |
+
"1": 26
|
| 925 |
+
},
|
| 926 |
+
"flags": {},
|
| 927 |
+
"order": 25,
|
| 928 |
+
"mode": 0,
|
| 929 |
+
"inputs": [
|
| 930 |
+
{
|
| 931 |
+
"name": "images",
|
| 932 |
+
"type": "IMAGE",
|
| 933 |
+
"link": 151
|
| 934 |
+
}
|
| 935 |
+
],
|
| 936 |
+
"outputs": [
|
| 937 |
+
{
|
| 938 |
+
"name": "IMAGE",
|
| 939 |
+
"type": "IMAGE",
|
| 940 |
+
"links": [
|
| 941 |
+
152
|
| 942 |
+
],
|
| 943 |
+
"slot_index": 0
|
| 944 |
+
}
|
| 945 |
+
],
|
| 946 |
+
"properties": {
|
| 947 |
+
"Node name for S&R": "ReverseImageBatch"
|
| 948 |
+
}
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"id": 58,
|
| 952 |
+
"type": "DownloadAndLoadGIMMVFIModel",
|
| 953 |
+
"pos": {
|
| 954 |
+
"0": 2510,
|
| 955 |
+
"1": -210
|
| 956 |
+
},
|
| 957 |
+
"size": {
|
| 958 |
+
"0": 315,
|
| 959 |
+
"1": 58
|
| 960 |
+
},
|
| 961 |
+
"flags": {},
|
| 962 |
+
"order": 6,
|
| 963 |
+
"mode": 0,
|
| 964 |
+
"inputs": [],
|
| 965 |
+
"outputs": [
|
| 966 |
+
{
|
| 967 |
+
"name": "gimmvfi_model",
|
| 968 |
+
"type": "GIMMVIF_MODEL",
|
| 969 |
+
"links": [
|
| 970 |
+
134
|
| 971 |
+
],
|
| 972 |
+
"slot_index": 0
|
| 973 |
+
}
|
| 974 |
+
],
|
| 975 |
+
"properties": {
|
| 976 |
+
"Node name for S&R": "DownloadAndLoadGIMMVFIModel"
|
| 977 |
+
},
|
| 978 |
+
"widgets_values": [
|
| 979 |
+
"gimmvfi_r_arb_lpips_fp32.safetensors"
|
| 980 |
+
]
|
| 981 |
+
},
|
| 982 |
+
{
|
| 983 |
+
"id": 36,
|
| 984 |
+
"type": "LoadImage",
|
| 985 |
+
"pos": {
|
| 986 |
+
"0": -808,
|
| 987 |
+
"1": 573
|
| 988 |
+
},
|
| 989 |
+
"size": [
|
| 990 |
+
556.7343073028583,
|
| 991 |
+
502.50569947324857
|
| 992 |
+
],
|
| 993 |
+
"flags": {},
|
| 994 |
+
"order": 7,
|
| 995 |
+
"mode": 0,
|
| 996 |
+
"inputs": [],
|
| 997 |
+
"outputs": [
|
| 998 |
+
{
|
| 999 |
+
"name": "IMAGE",
|
| 1000 |
+
"type": "IMAGE",
|
| 1001 |
+
"links": [
|
| 1002 |
+
71
|
| 1003 |
+
],
|
| 1004 |
+
"slot_index": 0,
|
| 1005 |
+
"shape": 3
|
| 1006 |
+
},
|
| 1007 |
+
{
|
| 1008 |
+
"name": "MASK",
|
| 1009 |
+
"type": "MASK",
|
| 1010 |
+
"links": null,
|
| 1011 |
+
"shape": 3
|
| 1012 |
+
}
|
| 1013 |
+
],
|
| 1014 |
+
"properties": {
|
| 1015 |
+
"Node name for S&R": "LoadImage"
|
| 1016 |
+
},
|
| 1017 |
+
"widgets_values": [
|
| 1018 |
+
"ComfyUI_temp_lhgah_00059_.png",
|
| 1019 |
+
"image"
|
| 1020 |
+
]
|
| 1021 |
+
},
|
| 1022 |
+
{
|
| 1023 |
+
"id": 60,
|
| 1024 |
+
"type": "VHS_VideoCombine",
|
| 1025 |
+
"pos": {
|
| 1026 |
+
"0": 2520,
|
| 1027 |
+
"1": 180
|
| 1028 |
+
},
|
| 1029 |
+
"size": [
|
| 1030 |
+
860.5738525390625,
|
| 1031 |
+
1444.76513671875
|
| 1032 |
+
],
|
| 1033 |
+
"flags": {},
|
| 1034 |
+
"order": 28,
|
| 1035 |
+
"mode": 0,
|
| 1036 |
+
"inputs": [
|
| 1037 |
+
{
|
| 1038 |
+
"name": "images",
|
| 1039 |
+
"type": "IMAGE",
|
| 1040 |
+
"link": 164
|
| 1041 |
+
},
|
| 1042 |
+
{
|
| 1043 |
+
"name": "audio",
|
| 1044 |
+
"type": "AUDIO",
|
| 1045 |
+
"link": null,
|
| 1046 |
+
"shape": 7
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"name": "meta_batch",
|
| 1050 |
+
"type": "VHS_BatchManager",
|
| 1051 |
+
"link": null,
|
| 1052 |
+
"shape": 7
|
| 1053 |
+
},
|
| 1054 |
+
{
|
| 1055 |
+
"name": "vae",
|
| 1056 |
+
"type": "VAE",
|
| 1057 |
+
"link": null,
|
| 1058 |
+
"shape": 7
|
| 1059 |
+
}
|
| 1060 |
+
],
|
| 1061 |
+
"outputs": [
|
| 1062 |
+
{
|
| 1063 |
+
"name": "Filenames",
|
| 1064 |
+
"type": "VHS_FILENAMES",
|
| 1065 |
+
"links": null,
|
| 1066 |
+
"shape": 3
|
| 1067 |
+
}
|
| 1068 |
+
],
|
| 1069 |
+
"properties": {
|
| 1070 |
+
"Node name for S&R": "VHS_VideoCombine"
|
| 1071 |
+
},
|
| 1072 |
+
"widgets_values": {
|
| 1073 |
+
"frame_rate": 16,
|
| 1074 |
+
"loop_count": 0,
|
| 1075 |
+
"filename_prefix": "CogVideoX_Fun_orbits",
|
| 1076 |
+
"format": "video/h264-mp4",
|
| 1077 |
+
"pix_fmt": "yuv420p",
|
| 1078 |
+
"crf": 19,
|
| 1079 |
+
"save_metadata": true,
|
| 1080 |
+
"pingpong": false,
|
| 1081 |
+
"save_output": true,
|
| 1082 |
+
"videopreview": {
|
| 1083 |
+
"hidden": false,
|
| 1084 |
+
"paused": false,
|
| 1085 |
+
"params": {
|
| 1086 |
+
"filename": "CogVideoX_Fun_orbits_00003.mp4",
|
| 1087 |
+
"subfolder": "",
|
| 1088 |
+
"type": "temp",
|
| 1089 |
+
"format": "video/h264-mp4",
|
| 1090 |
+
"frame_rate": 16
|
| 1091 |
+
},
|
| 1092 |
+
"muted": false
|
| 1093 |
+
}
|
| 1094 |
+
}
|
| 1095 |
+
},
|
| 1096 |
+
{
|
| 1097 |
+
"id": 30,
|
| 1098 |
+
"type": "CogVideoTextEncode",
|
| 1099 |
+
"pos": {
|
| 1100 |
+
"0": 478,
|
| 1101 |
+
"1": 90
|
| 1102 |
+
},
|
| 1103 |
+
"size": [
|
| 1104 |
+
471.90142822265625,
|
| 1105 |
+
168.08047485351562
|
| 1106 |
+
],
|
| 1107 |
+
"flags": {},
|
| 1108 |
+
"order": 16,
|
| 1109 |
+
"mode": 0,
|
| 1110 |
+
"inputs": [
|
| 1111 |
+
{
|
| 1112 |
+
"name": "clip",
|
| 1113 |
+
"type": "CLIP",
|
| 1114 |
+
"link": 54
|
| 1115 |
+
},
|
| 1116 |
+
{
|
| 1117 |
+
"name": "prompt",
|
| 1118 |
+
"type": "STRING",
|
| 1119 |
+
"link": 163,
|
| 1120 |
+
"widget": {
|
| 1121 |
+
"name": "prompt"
|
| 1122 |
+
}
|
| 1123 |
+
}
|
| 1124 |
+
],
|
| 1125 |
+
"outputs": [
|
| 1126 |
+
{
|
| 1127 |
+
"name": "conditioning",
|
| 1128 |
+
"type": "CONDITIONING",
|
| 1129 |
+
"links": [
|
| 1130 |
+
116,
|
| 1131 |
+
140
|
| 1132 |
+
],
|
| 1133 |
+
"slot_index": 0,
|
| 1134 |
+
"shape": 3
|
| 1135 |
+
},
|
| 1136 |
+
{
|
| 1137 |
+
"name": "clip",
|
| 1138 |
+
"type": "CLIP",
|
| 1139 |
+
"links": [
|
| 1140 |
+
110
|
| 1141 |
+
],
|
| 1142 |
+
"slot_index": 1
|
| 1143 |
+
}
|
| 1144 |
+
],
|
| 1145 |
+
"properties": {
|
| 1146 |
+
"Node name for S&R": "CogVideoTextEncode"
|
| 1147 |
+
},
|
| 1148 |
+
"widgets_values": [
|
| 1149 |
+
"camera orbit around a mouse knight standing in a fantasy forest",
|
| 1150 |
+
1,
|
| 1151 |
+
false
|
| 1152 |
+
]
|
| 1153 |
+
},
|
| 1154 |
+
{
|
| 1155 |
+
"id": 31,
|
| 1156 |
+
"type": "CogVideoTextEncode",
|
| 1157 |
+
"pos": {
|
| 1158 |
+
"0": 493,
|
| 1159 |
+
"1": 334
|
| 1160 |
+
},
|
| 1161 |
+
"size": {
|
| 1162 |
+
"0": 463.01251220703125,
|
| 1163 |
+
"1": 144
|
| 1164 |
+
},
|
| 1165 |
+
"flags": {},
|
| 1166 |
+
"order": 17,
|
| 1167 |
+
"mode": 0,
|
| 1168 |
+
"inputs": [
|
| 1169 |
+
{
|
| 1170 |
+
"name": "clip",
|
| 1171 |
+
"type": "CLIP",
|
| 1172 |
+
"link": 110
|
| 1173 |
+
}
|
| 1174 |
+
],
|
| 1175 |
+
"outputs": [
|
| 1176 |
+
{
|
| 1177 |
+
"name": "conditioning",
|
| 1178 |
+
"type": "CONDITIONING",
|
| 1179 |
+
"links": [
|
| 1180 |
+
117,
|
| 1181 |
+
141
|
| 1182 |
+
],
|
| 1183 |
+
"slot_index": 0,
|
| 1184 |
+
"shape": 3
|
| 1185 |
+
},
|
| 1186 |
+
{
|
| 1187 |
+
"name": "clip",
|
| 1188 |
+
"type": "CLIP",
|
| 1189 |
+
"links": null
|
| 1190 |
+
}
|
| 1191 |
+
],
|
| 1192 |
+
"properties": {
|
| 1193 |
+
"Node name for S&R": "CogVideoTextEncode"
|
| 1194 |
+
},
|
| 1195 |
+
"widgets_values": [
|
| 1196 |
+
"The video is not of a high quality, it has a low resolution. Watermark present in each frame. Strange motion trajectory. ",
|
| 1197 |
+
1,
|
| 1198 |
+
true
|
| 1199 |
+
]
|
| 1200 |
+
},
|
| 1201 |
+
{
|
| 1202 |
+
"id": 62,
|
| 1203 |
+
"type": "CogVideoSampler",
|
| 1204 |
+
"pos": {
|
| 1205 |
+
"0": 1258,
|
| 1206 |
+
"1": 1151
|
| 1207 |
+
},
|
| 1208 |
+
"size": [
|
| 1209 |
+
330,
|
| 1210 |
+
574
|
| 1211 |
+
],
|
| 1212 |
+
"flags": {},
|
| 1213 |
+
"order": 19,
|
| 1214 |
+
"mode": 0,
|
| 1215 |
+
"inputs": [
|
| 1216 |
+
{
|
| 1217 |
+
"name": "model",
|
| 1218 |
+
"type": "COGVIDEOMODEL",
|
| 1219 |
+
"link": 139
|
| 1220 |
+
},
|
| 1221 |
+
{
|
| 1222 |
+
"name": "positive",
|
| 1223 |
+
"type": "CONDITIONING",
|
| 1224 |
+
"link": 140
|
| 1225 |
+
},
|
| 1226 |
+
{
|
| 1227 |
+
"name": "negative",
|
| 1228 |
+
"type": "CONDITIONING",
|
| 1229 |
+
"link": 141
|
| 1230 |
+
},
|
| 1231 |
+
{
|
| 1232 |
+
"name": "samples",
|
| 1233 |
+
"type": "LATENT",
|
| 1234 |
+
"link": null,
|
| 1235 |
+
"shape": 7
|
| 1236 |
+
},
|
| 1237 |
+
{
|
| 1238 |
+
"name": "image_cond_latents",
|
| 1239 |
+
"type": "LATENT",
|
| 1240 |
+
"link": 147,
|
| 1241 |
+
"shape": 7
|
| 1242 |
+
},
|
| 1243 |
+
{
|
| 1244 |
+
"name": "context_options",
|
| 1245 |
+
"type": "COGCONTEXT",
|
| 1246 |
+
"link": null,
|
| 1247 |
+
"shape": 7
|
| 1248 |
+
},
|
| 1249 |
+
{
|
| 1250 |
+
"name": "controlnet",
|
| 1251 |
+
"type": "COGVIDECONTROLNET",
|
| 1252 |
+
"link": null,
|
| 1253 |
+
"shape": 7
|
| 1254 |
+
},
|
| 1255 |
+
{
|
| 1256 |
+
"name": "tora_trajectory",
|
| 1257 |
+
"type": "TORAFEATURES",
|
| 1258 |
+
"link": null,
|
| 1259 |
+
"shape": 7
|
| 1260 |
+
},
|
| 1261 |
+
{
|
| 1262 |
+
"name": "fastercache",
|
| 1263 |
+
"type": "FASTERCACHEARGS",
|
| 1264 |
+
"link": null,
|
| 1265 |
+
"shape": 7
|
| 1266 |
+
},
|
| 1267 |
+
{
|
| 1268 |
+
"name": "num_frames",
|
| 1269 |
+
"type": "INT",
|
| 1270 |
+
"link": 143,
|
| 1271 |
+
"widget": {
|
| 1272 |
+
"name": "num_frames"
|
| 1273 |
+
}
|
| 1274 |
+
},
|
| 1275 |
+
{
|
| 1276 |
+
"name": "seed",
|
| 1277 |
+
"type": "INT",
|
| 1278 |
+
"link": 157,
|
| 1279 |
+
"widget": {
|
| 1280 |
+
"name": "seed"
|
| 1281 |
+
}
|
| 1282 |
+
}
|
| 1283 |
+
],
|
| 1284 |
+
"outputs": [
|
| 1285 |
+
{
|
| 1286 |
+
"name": "samples",
|
| 1287 |
+
"type": "LATENT",
|
| 1288 |
+
"links": [
|
| 1289 |
+
148
|
| 1290 |
+
],
|
| 1291 |
+
"slot_index": 0
|
| 1292 |
+
}
|
| 1293 |
+
],
|
| 1294 |
+
"properties": {
|
| 1295 |
+
"Node name for S&R": "CogVideoSampler"
|
| 1296 |
+
},
|
| 1297 |
+
"widgets_values": [
|
| 1298 |
+
33,
|
| 1299 |
+
40,
|
| 1300 |
+
6,
|
| 1301 |
+
458091243358278,
|
| 1302 |
+
"fixed",
|
| 1303 |
+
"CogVideoXDDIM",
|
| 1304 |
+
1
|
| 1305 |
+
]
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"id": 64,
|
| 1309 |
+
"type": "CogVideoDecode",
|
| 1310 |
+
"pos": {
|
| 1311 |
+
"0": 1258,
|
| 1312 |
+
"1": 889
|
| 1313 |
+
},
|
| 1314 |
+
"size": {
|
| 1315 |
+
"0": 315,
|
| 1316 |
+
"1": 198
|
| 1317 |
+
},
|
| 1318 |
+
"flags": {},
|
| 1319 |
+
"order": 21,
|
| 1320 |
+
"mode": 0,
|
| 1321 |
+
"inputs": [
|
| 1322 |
+
{
|
| 1323 |
+
"name": "vae",
|
| 1324 |
+
"type": "VAE",
|
| 1325 |
+
"link": 149
|
| 1326 |
+
},
|
| 1327 |
+
{
|
| 1328 |
+
"name": "samples",
|
| 1329 |
+
"type": "LATENT",
|
| 1330 |
+
"link": 148
|
| 1331 |
+
}
|
| 1332 |
+
],
|
| 1333 |
+
"outputs": [
|
| 1334 |
+
{
|
| 1335 |
+
"name": "images",
|
| 1336 |
+
"type": "IMAGE",
|
| 1337 |
+
"links": [
|
| 1338 |
+
150,
|
| 1339 |
+
153
|
| 1340 |
+
],
|
| 1341 |
+
"slot_index": 0
|
| 1342 |
+
}
|
| 1343 |
+
],
|
| 1344 |
+
"properties": {
|
| 1345 |
+
"Node name for S&R": "CogVideoDecode"
|
| 1346 |
+
},
|
| 1347 |
+
"widgets_values": [
|
| 1348 |
+
true,
|
| 1349 |
+
240,
|
| 1350 |
+
360,
|
| 1351 |
+
0.2,
|
| 1352 |
+
0.2,
|
| 1353 |
+
true
|
| 1354 |
+
]
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
"id": 44,
|
| 1358 |
+
"type": "VHS_VideoCombine",
|
| 1359 |
+
"pos": {
|
| 1360 |
+
"0": 1652,
|
| 1361 |
+
"1": -465
|
| 1362 |
+
},
|
| 1363 |
+
"size": [
|
| 1364 |
+
592.7721081788095,
|
| 1365 |
+
1087.6961669921875
|
| 1366 |
+
],
|
| 1367 |
+
"flags": {},
|
| 1368 |
+
"order": 24,
|
| 1369 |
+
"mode": 0,
|
| 1370 |
+
"inputs": [
|
| 1371 |
+
{
|
| 1372 |
+
"name": "images",
|
| 1373 |
+
"type": "IMAGE",
|
| 1374 |
+
"link": 131
|
| 1375 |
+
},
|
| 1376 |
+
{
|
| 1377 |
+
"name": "audio",
|
| 1378 |
+
"type": "AUDIO",
|
| 1379 |
+
"link": null,
|
| 1380 |
+
"shape": 7
|
| 1381 |
+
},
|
| 1382 |
+
{
|
| 1383 |
+
"name": "meta_batch",
|
| 1384 |
+
"type": "VHS_BatchManager",
|
| 1385 |
+
"link": null,
|
| 1386 |
+
"shape": 7
|
| 1387 |
+
},
|
| 1388 |
+
{
|
| 1389 |
+
"name": "vae",
|
| 1390 |
+
"type": "VAE",
|
| 1391 |
+
"link": null,
|
| 1392 |
+
"shape": 7
|
| 1393 |
+
}
|
| 1394 |
+
],
|
| 1395 |
+
"outputs": [
|
| 1396 |
+
{
|
| 1397 |
+
"name": "Filenames",
|
| 1398 |
+
"type": "VHS_FILENAMES",
|
| 1399 |
+
"links": null,
|
| 1400 |
+
"shape": 3
|
| 1401 |
+
}
|
| 1402 |
+
],
|
| 1403 |
+
"properties": {
|
| 1404 |
+
"Node name for S&R": "VHS_VideoCombine"
|
| 1405 |
+
},
|
| 1406 |
+
"widgets_values": {
|
| 1407 |
+
"frame_rate": 8,
|
| 1408 |
+
"loop_count": 0,
|
| 1409 |
+
"filename_prefix": "CogVideoX_Fun",
|
| 1410 |
+
"format": "video/h264-mp4",
|
| 1411 |
+
"pix_fmt": "yuv420p",
|
| 1412 |
+
"crf": 19,
|
| 1413 |
+
"save_metadata": true,
|
| 1414 |
+
"pingpong": false,
|
| 1415 |
+
"save_output": false,
|
| 1416 |
+
"videopreview": {
|
| 1417 |
+
"hidden": false,
|
| 1418 |
+
"paused": false,
|
| 1419 |
+
"params": {
|
| 1420 |
+
"filename": "CogVideoX_Fun_00027.mp4",
|
| 1421 |
+
"subfolder": "",
|
| 1422 |
+
"type": "temp",
|
| 1423 |
+
"format": "video/h264-mp4",
|
| 1424 |
+
"frame_rate": 8
|
| 1425 |
+
},
|
| 1426 |
+
"muted": false
|
| 1427 |
+
}
|
| 1428 |
+
}
|
| 1429 |
+
},
|
| 1430 |
+
{
|
| 1431 |
+
"id": 65,
|
| 1432 |
+
"type": "VHS_VideoCombine",
|
| 1433 |
+
"pos": {
|
| 1434 |
+
"0": 1674,
|
| 1435 |
+
"1": 688
|
| 1436 |
+
},
|
| 1437 |
+
"size": [
|
| 1438 |
+
620.0130829180325,
|
| 1439 |
+
1124.0174560546875
|
| 1440 |
+
],
|
| 1441 |
+
"flags": {},
|
| 1442 |
+
"order": 23,
|
| 1443 |
+
"mode": 0,
|
| 1444 |
+
"inputs": [
|
| 1445 |
+
{
|
| 1446 |
+
"name": "images",
|
| 1447 |
+
"type": "IMAGE",
|
| 1448 |
+
"link": 150
|
| 1449 |
+
},
|
| 1450 |
+
{
|
| 1451 |
+
"name": "audio",
|
| 1452 |
+
"type": "AUDIO",
|
| 1453 |
+
"link": null,
|
| 1454 |
+
"shape": 7
|
| 1455 |
+
},
|
| 1456 |
+
{
|
| 1457 |
+
"name": "meta_batch",
|
| 1458 |
+
"type": "VHS_BatchManager",
|
| 1459 |
+
"link": null,
|
| 1460 |
+
"shape": 7
|
| 1461 |
+
},
|
| 1462 |
+
{
|
| 1463 |
+
"name": "vae",
|
| 1464 |
+
"type": "VAE",
|
| 1465 |
+
"link": null,
|
| 1466 |
+
"shape": 7
|
| 1467 |
+
}
|
| 1468 |
+
],
|
| 1469 |
+
"outputs": [
|
| 1470 |
+
{
|
| 1471 |
+
"name": "Filenames",
|
| 1472 |
+
"type": "VHS_FILENAMES",
|
| 1473 |
+
"links": null,
|
| 1474 |
+
"shape": 3
|
| 1475 |
+
}
|
| 1476 |
+
],
|
| 1477 |
+
"properties": {
|
| 1478 |
+
"Node name for S&R": "VHS_VideoCombine"
|
| 1479 |
+
},
|
| 1480 |
+
"widgets_values": {
|
| 1481 |
+
"frame_rate": 8,
|
| 1482 |
+
"loop_count": 0,
|
| 1483 |
+
"filename_prefix": "CogVideoX_Fun",
|
| 1484 |
+
"format": "video/h264-mp4",
|
| 1485 |
+
"pix_fmt": "yuv420p",
|
| 1486 |
+
"crf": 19,
|
| 1487 |
+
"save_metadata": true,
|
| 1488 |
+
"pingpong": false,
|
| 1489 |
+
"save_output": false,
|
| 1490 |
+
"videopreview": {
|
| 1491 |
+
"hidden": false,
|
| 1492 |
+
"paused": false,
|
| 1493 |
+
"params": {
|
| 1494 |
+
"filename": "CogVideoX_Fun_00026.mp4",
|
| 1495 |
+
"subfolder": "",
|
| 1496 |
+
"type": "temp",
|
| 1497 |
+
"format": "video/h264-mp4",
|
| 1498 |
+
"frame_rate": 8
|
| 1499 |
+
},
|
| 1500 |
+
"muted": false
|
| 1501 |
+
}
|
| 1502 |
+
}
|
| 1503 |
+
},
|
| 1504 |
+
{
|
| 1505 |
+
"id": 49,
|
| 1506 |
+
"type": "DownloadAndLoadCogVideoModel",
|
| 1507 |
+
"pos": {
|
| 1508 |
+
"0": 450,
|
| 1509 |
+
"1": -217
|
| 1510 |
+
},
|
| 1511 |
+
"size": {
|
| 1512 |
+
"0": 362.1656799316406,
|
| 1513 |
+
"1": 218
|
| 1514 |
+
},
|
| 1515 |
+
"flags": {},
|
| 1516 |
+
"order": 8,
|
| 1517 |
+
"mode": 0,
|
| 1518 |
+
"inputs": [
|
| 1519 |
+
{
|
| 1520 |
+
"name": "block_edit",
|
| 1521 |
+
"type": "TRANSFORMERBLOCKS",
|
| 1522 |
+
"link": null,
|
| 1523 |
+
"shape": 7
|
| 1524 |
+
},
|
| 1525 |
+
{
|
| 1526 |
+
"name": "lora",
|
| 1527 |
+
"type": "COGLORA",
|
| 1528 |
+
"link": 124,
|
| 1529 |
+
"shape": 7
|
| 1530 |
+
},
|
| 1531 |
+
{
|
| 1532 |
+
"name": "compile_args",
|
| 1533 |
+
"type": "COMPILEARGS",
|
| 1534 |
+
"link": null,
|
| 1535 |
+
"shape": 7
|
| 1536 |
+
}
|
| 1537 |
+
],
|
| 1538 |
+
"outputs": [
|
| 1539 |
+
{
|
| 1540 |
+
"name": "model",
|
| 1541 |
+
"type": "COGVIDEOMODEL",
|
| 1542 |
+
"links": [
|
| 1543 |
+
114,
|
| 1544 |
+
139
|
| 1545 |
+
]
|
| 1546 |
+
},
|
| 1547 |
+
{
|
| 1548 |
+
"name": "vae",
|
| 1549 |
+
"type": "VAE",
|
| 1550 |
+
"links": [
|
| 1551 |
+
119,
|
| 1552 |
+
122,
|
| 1553 |
+
144,
|
| 1554 |
+
149
|
| 1555 |
+
],
|
| 1556 |
+
"slot_index": 1
|
| 1557 |
+
}
|
| 1558 |
+
],
|
| 1559 |
+
"properties": {
|
| 1560 |
+
"Node name for S&R": "DownloadAndLoadCogVideoModel"
|
| 1561 |
+
},
|
| 1562 |
+
"widgets_values": [
|
| 1563 |
+
"alibaba-pai/CogVideoX-Fun-V1.1-5b-InP",
|
| 1564 |
+
"bf16",
|
| 1565 |
+
"disabled",
|
| 1566 |
+
false,
|
| 1567 |
+
"sdpa",
|
| 1568 |
+
"main_device"
|
| 1569 |
+
]
|
| 1570 |
+
}
|
| 1571 |
+
],
|
| 1572 |
+
"links": [
|
| 1573 |
+
[
|
| 1574 |
+
54,
|
| 1575 |
+
20,
|
| 1576 |
+
0,
|
| 1577 |
+
30,
|
| 1578 |
+
0,
|
| 1579 |
+
"CLIP"
|
| 1580 |
+
],
|
| 1581 |
+
[
|
| 1582 |
+
71,
|
| 1583 |
+
36,
|
| 1584 |
+
0,
|
| 1585 |
+
37,
|
| 1586 |
+
0,
|
| 1587 |
+
"IMAGE"
|
| 1588 |
+
],
|
| 1589 |
+
[
|
| 1590 |
+
110,
|
| 1591 |
+
30,
|
| 1592 |
+
1,
|
| 1593 |
+
31,
|
| 1594 |
+
0,
|
| 1595 |
+
"CLIP"
|
| 1596 |
+
],
|
| 1597 |
+
[
|
| 1598 |
+
114,
|
| 1599 |
+
49,
|
| 1600 |
+
0,
|
| 1601 |
+
48,
|
| 1602 |
+
0,
|
| 1603 |
+
"COGVIDEOMODEL"
|
| 1604 |
+
],
|
| 1605 |
+
[
|
| 1606 |
+
116,
|
| 1607 |
+
30,
|
| 1608 |
+
0,
|
| 1609 |
+
48,
|
| 1610 |
+
1,
|
| 1611 |
+
"CONDITIONING"
|
| 1612 |
+
],
|
| 1613 |
+
[
|
| 1614 |
+
117,
|
| 1615 |
+
31,
|
| 1616 |
+
0,
|
| 1617 |
+
48,
|
| 1618 |
+
2,
|
| 1619 |
+
"CONDITIONING"
|
| 1620 |
+
],
|
| 1621 |
+
[
|
| 1622 |
+
119,
|
| 1623 |
+
49,
|
| 1624 |
+
1,
|
| 1625 |
+
50,
|
| 1626 |
+
0,
|
| 1627 |
+
"VAE"
|
| 1628 |
+
],
|
| 1629 |
+
[
|
| 1630 |
+
120,
|
| 1631 |
+
50,
|
| 1632 |
+
0,
|
| 1633 |
+
48,
|
| 1634 |
+
4,
|
| 1635 |
+
"LATENT"
|
| 1636 |
+
],
|
| 1637 |
+
[
|
| 1638 |
+
122,
|
| 1639 |
+
49,
|
| 1640 |
+
1,
|
| 1641 |
+
51,
|
| 1642 |
+
0,
|
| 1643 |
+
"VAE"
|
| 1644 |
+
],
|
| 1645 |
+
[
|
| 1646 |
+
123,
|
| 1647 |
+
48,
|
| 1648 |
+
0,
|
| 1649 |
+
51,
|
| 1650 |
+
1,
|
| 1651 |
+
"LATENT"
|
| 1652 |
+
],
|
| 1653 |
+
[
|
| 1654 |
+
124,
|
| 1655 |
+
52,
|
| 1656 |
+
0,
|
| 1657 |
+
49,
|
| 1658 |
+
1,
|
| 1659 |
+
"COGLORA"
|
| 1660 |
+
],
|
| 1661 |
+
[
|
| 1662 |
+
126,
|
| 1663 |
+
53,
|
| 1664 |
+
0,
|
| 1665 |
+
50,
|
| 1666 |
+
3,
|
| 1667 |
+
"INT"
|
| 1668 |
+
],
|
| 1669 |
+
[
|
| 1670 |
+
127,
|
| 1671 |
+
53,
|
| 1672 |
+
0,
|
| 1673 |
+
48,
|
| 1674 |
+
9,
|
| 1675 |
+
"INT"
|
| 1676 |
+
],
|
| 1677 |
+
[
|
| 1678 |
+
128,
|
| 1679 |
+
37,
|
| 1680 |
+
0,
|
| 1681 |
+
54,
|
| 1682 |
+
0,
|
| 1683 |
+
"IMAGE"
|
| 1684 |
+
],
|
| 1685 |
+
[
|
| 1686 |
+
129,
|
| 1687 |
+
54,
|
| 1688 |
+
0,
|
| 1689 |
+
50,
|
| 1690 |
+
1,
|
| 1691 |
+
"IMAGE"
|
| 1692 |
+
],
|
| 1693 |
+
[
|
| 1694 |
+
130,
|
| 1695 |
+
51,
|
| 1696 |
+
0,
|
| 1697 |
+
55,
|
| 1698 |
+
0,
|
| 1699 |
+
"IMAGE"
|
| 1700 |
+
],
|
| 1701 |
+
[
|
| 1702 |
+
131,
|
| 1703 |
+
55,
|
| 1704 |
+
0,
|
| 1705 |
+
44,
|
| 1706 |
+
0,
|
| 1707 |
+
"IMAGE"
|
| 1708 |
+
],
|
| 1709 |
+
[
|
| 1710 |
+
134,
|
| 1711 |
+
58,
|
| 1712 |
+
0,
|
| 1713 |
+
59,
|
| 1714 |
+
0,
|
| 1715 |
+
"GIMMVIF_MODEL"
|
| 1716 |
+
],
|
| 1717 |
+
[
|
| 1718 |
+
139,
|
| 1719 |
+
49,
|
| 1720 |
+
0,
|
| 1721 |
+
62,
|
| 1722 |
+
0,
|
| 1723 |
+
"COGVIDEOMODEL"
|
| 1724 |
+
],
|
| 1725 |
+
[
|
| 1726 |
+
140,
|
| 1727 |
+
30,
|
| 1728 |
+
0,
|
| 1729 |
+
62,
|
| 1730 |
+
1,
|
| 1731 |
+
"CONDITIONING"
|
| 1732 |
+
],
|
| 1733 |
+
[
|
| 1734 |
+
141,
|
| 1735 |
+
31,
|
| 1736 |
+
0,
|
| 1737 |
+
62,
|
| 1738 |
+
2,
|
| 1739 |
+
"CONDITIONING"
|
| 1740 |
+
],
|
| 1741 |
+
[
|
| 1742 |
+
143,
|
| 1743 |
+
53,
|
| 1744 |
+
0,
|
| 1745 |
+
62,
|
| 1746 |
+
9,
|
| 1747 |
+
"INT"
|
| 1748 |
+
],
|
| 1749 |
+
[
|
| 1750 |
+
144,
|
| 1751 |
+
49,
|
| 1752 |
+
1,
|
| 1753 |
+
63,
|
| 1754 |
+
0,
|
| 1755 |
+
"VAE"
|
| 1756 |
+
],
|
| 1757 |
+
[
|
| 1758 |
+
145,
|
| 1759 |
+
53,
|
| 1760 |
+
0,
|
| 1761 |
+
63,
|
| 1762 |
+
3,
|
| 1763 |
+
"INT"
|
| 1764 |
+
],
|
| 1765 |
+
[
|
| 1766 |
+
146,
|
| 1767 |
+
37,
|
| 1768 |
+
0,
|
| 1769 |
+
63,
|
| 1770 |
+
1,
|
| 1771 |
+
"IMAGE"
|
| 1772 |
+
],
|
| 1773 |
+
[
|
| 1774 |
+
147,
|
| 1775 |
+
63,
|
| 1776 |
+
0,
|
| 1777 |
+
62,
|
| 1778 |
+
4,
|
| 1779 |
+
"LATENT"
|
| 1780 |
+
],
|
| 1781 |
+
[
|
| 1782 |
+
148,
|
| 1783 |
+
62,
|
| 1784 |
+
0,
|
| 1785 |
+
64,
|
| 1786 |
+
1,
|
| 1787 |
+
"LATENT"
|
| 1788 |
+
],
|
| 1789 |
+
[
|
| 1790 |
+
149,
|
| 1791 |
+
49,
|
| 1792 |
+
1,
|
| 1793 |
+
64,
|
| 1794 |
+
0,
|
| 1795 |
+
"VAE"
|
| 1796 |
+
],
|
| 1797 |
+
[
|
| 1798 |
+
150,
|
| 1799 |
+
64,
|
| 1800 |
+
0,
|
| 1801 |
+
65,
|
| 1802 |
+
0,
|
| 1803 |
+
"IMAGE"
|
| 1804 |
+
],
|
| 1805 |
+
[
|
| 1806 |
+
151,
|
| 1807 |
+
55,
|
| 1808 |
+
0,
|
| 1809 |
+
66,
|
| 1810 |
+
0,
|
| 1811 |
+
"IMAGE"
|
| 1812 |
+
],
|
| 1813 |
+
[
|
| 1814 |
+
152,
|
| 1815 |
+
66,
|
| 1816 |
+
0,
|
| 1817 |
+
67,
|
| 1818 |
+
0,
|
| 1819 |
+
"IMAGE"
|
| 1820 |
+
],
|
| 1821 |
+
[
|
| 1822 |
+
153,
|
| 1823 |
+
64,
|
| 1824 |
+
0,
|
| 1825 |
+
67,
|
| 1826 |
+
1,
|
| 1827 |
+
"IMAGE"
|
| 1828 |
+
],
|
| 1829 |
+
[
|
| 1830 |
+
156,
|
| 1831 |
+
68,
|
| 1832 |
+
0,
|
| 1833 |
+
48,
|
| 1834 |
+
10,
|
| 1835 |
+
"INT"
|
| 1836 |
+
],
|
| 1837 |
+
[
|
| 1838 |
+
157,
|
| 1839 |
+
68,
|
| 1840 |
+
0,
|
| 1841 |
+
62,
|
| 1842 |
+
10,
|
| 1843 |
+
"INT"
|
| 1844 |
+
],
|
| 1845 |
+
[
|
| 1846 |
+
158,
|
| 1847 |
+
69,
|
| 1848 |
+
0,
|
| 1849 |
+
70,
|
| 1850 |
+
1,
|
| 1851 |
+
"FL2MODEL"
|
| 1852 |
+
],
|
| 1853 |
+
[
|
| 1854 |
+
159,
|
| 1855 |
+
37,
|
| 1856 |
+
0,
|
| 1857 |
+
70,
|
| 1858 |
+
0,
|
| 1859 |
+
"IMAGE"
|
| 1860 |
+
],
|
| 1861 |
+
[
|
| 1862 |
+
160,
|
| 1863 |
+
71,
|
| 1864 |
+
0,
|
| 1865 |
+
72,
|
| 1866 |
+
0,
|
| 1867 |
+
"STRING"
|
| 1868 |
+
],
|
| 1869 |
+
[
|
| 1870 |
+
161,
|
| 1871 |
+
70,
|
| 1872 |
+
2,
|
| 1873 |
+
73,
|
| 1874 |
+
0,
|
| 1875 |
+
"STRING"
|
| 1876 |
+
],
|
| 1877 |
+
[
|
| 1878 |
+
162,
|
| 1879 |
+
70,
|
| 1880 |
+
2,
|
| 1881 |
+
72,
|
| 1882 |
+
1,
|
| 1883 |
+
"STRING"
|
| 1884 |
+
],
|
| 1885 |
+
[
|
| 1886 |
+
163,
|
| 1887 |
+
72,
|
| 1888 |
+
0,
|
| 1889 |
+
30,
|
| 1890 |
+
1,
|
| 1891 |
+
"STRING"
|
| 1892 |
+
],
|
| 1893 |
+
[
|
| 1894 |
+
164,
|
| 1895 |
+
59,
|
| 1896 |
+
0,
|
| 1897 |
+
60,
|
| 1898 |
+
0,
|
| 1899 |
+
"IMAGE"
|
| 1900 |
+
],
|
| 1901 |
+
[
|
| 1902 |
+
165,
|
| 1903 |
+
67,
|
| 1904 |
+
0,
|
| 1905 |
+
59,
|
| 1906 |
+
1,
|
| 1907 |
+
"IMAGE"
|
| 1908 |
+
]
|
| 1909 |
+
],
|
| 1910 |
+
"groups": [],
|
| 1911 |
+
"config": {},
|
| 1912 |
+
"extra": {
|
| 1913 |
+
"ds": {
|
| 1914 |
+
"scale": 0.47362440744777223,
|
| 1915 |
+
"offset": [
|
| 1916 |
+
1633.9967545643788,
|
| 1917 |
+
525.3824652843582
|
| 1918 |
+
]
|
| 1919 |
+
}
|
| 1920 |
+
},
|
| 1921 |
+
"version": 0.4
|
| 1922 |
+
}
|
custom_nodes/ComfyUI-CogVideoXWrapper/model_loading.py
CHANGED
|
@@ -70,7 +70,6 @@ class CogVideoLoraSelect:
|
|
| 70 |
RETURN_NAMES = ("lora", )
|
| 71 |
FUNCTION = "getlorapath"
|
| 72 |
CATEGORY = "CogVideoWrapper"
|
| 73 |
-
DESCRIPTION = "Select a LoRA model from ComfyUI/models/CogVideo/loras"
|
| 74 |
|
| 75 |
def getlorapath(self, lora, strength, prev_lora=None, fuse_lora=False):
|
| 76 |
cog_loras_list = []
|
|
@@ -87,43 +86,6 @@ class CogVideoLoraSelect:
|
|
| 87 |
cog_loras_list.append(cog_lora)
|
| 88 |
print(cog_loras_list)
|
| 89 |
return (cog_loras_list,)
|
| 90 |
-
|
| 91 |
-
class CogVideoLoraSelectComfy:
|
| 92 |
-
@classmethod
|
| 93 |
-
def INPUT_TYPES(s):
|
| 94 |
-
return {
|
| 95 |
-
"required": {
|
| 96 |
-
"lora": (folder_paths.get_filename_list("loras"),
|
| 97 |
-
{"tooltip": "LORA models are expected to be in ComfyUI/models/loras with .safetensors extension"}),
|
| 98 |
-
"strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.0001, "tooltip": "LORA strength, set to 0.0 to unmerge the LORA"}),
|
| 99 |
-
},
|
| 100 |
-
"optional": {
|
| 101 |
-
"prev_lora":("COGLORA", {"default": None, "tooltip": "For loading multiple LoRAs"}),
|
| 102 |
-
"fuse_lora": ("BOOLEAN", {"default": False, "tooltip": "Fuse the LoRA weights into the transformer"}),
|
| 103 |
-
}
|
| 104 |
-
}
|
| 105 |
-
|
| 106 |
-
RETURN_TYPES = ("COGLORA",)
|
| 107 |
-
RETURN_NAMES = ("lora", )
|
| 108 |
-
FUNCTION = "getlorapath"
|
| 109 |
-
CATEGORY = "CogVideoWrapper"
|
| 110 |
-
DESCRIPTION = "Select a LoRA model from ComfyUI/models/loras"
|
| 111 |
-
|
| 112 |
-
def getlorapath(self, lora, strength, prev_lora=None, fuse_lora=False):
|
| 113 |
-
cog_loras_list = []
|
| 114 |
-
|
| 115 |
-
cog_lora = {
|
| 116 |
-
"path": folder_paths.get_full_path("loras", lora),
|
| 117 |
-
"strength": strength,
|
| 118 |
-
"name": lora.split(".")[0],
|
| 119 |
-
"fuse_lora": fuse_lora
|
| 120 |
-
}
|
| 121 |
-
if prev_lora is not None:
|
| 122 |
-
cog_loras_list.extend(prev_lora)
|
| 123 |
-
|
| 124 |
-
cog_loras_list.append(cog_lora)
|
| 125 |
-
print(cog_loras_list)
|
| 126 |
-
return (cog_loras_list,)
|
| 127 |
|
| 128 |
#region DownloadAndLoadCogVideoModel
|
| 129 |
class DownloadAndLoadCogVideoModel:
|
|
@@ -162,19 +124,7 @@ class DownloadAndLoadCogVideoModel:
|
|
| 162 |
"block_edit": ("TRANSFORMERBLOCKS", {"default": None}),
|
| 163 |
"lora": ("COGLORA", {"default": None}),
|
| 164 |
"compile_args":("COMPILEARGS", ),
|
| 165 |
-
"attention_mode": ([
|
| 166 |
-
"sdpa",
|
| 167 |
-
"fused_sdpa",
|
| 168 |
-
"sageattn",
|
| 169 |
-
"fused_sageattn",
|
| 170 |
-
"sageattn_qk_int8_pv_fp8_cuda",
|
| 171 |
-
"sageattn_qk_int8_pv_fp16_cuda",
|
| 172 |
-
"sageattn_qk_int8_pv_fp16_triton",
|
| 173 |
-
"fused_sageattn_qk_int8_pv_fp8_cuda",
|
| 174 |
-
"fused_sageattn_qk_int8_pv_fp16_cuda",
|
| 175 |
-
"fused_sageattn_qk_int8_pv_fp16_triton",
|
| 176 |
-
"comfy"
|
| 177 |
-
], {"default": "sdpa"}),
|
| 178 |
"load_device": (["main_device", "offload_device"], {"default": "main_device"}),
|
| 179 |
}
|
| 180 |
}
|
|
@@ -189,18 +139,11 @@ class DownloadAndLoadCogVideoModel:
|
|
| 189 |
enable_sequential_cpu_offload=False, block_edit=None, lora=None, compile_args=None,
|
| 190 |
attention_mode="sdpa", load_device="main_device"):
|
| 191 |
|
| 192 |
-
transformer = None
|
| 193 |
-
|
| 194 |
if "sage" in attention_mode:
|
| 195 |
try:
|
| 196 |
from sageattention import sageattn
|
| 197 |
except Exception as e:
|
| 198 |
raise ValueError(f"Can't import SageAttention: {str(e)}")
|
| 199 |
-
if "qk_int8" in attention_mode:
|
| 200 |
-
try:
|
| 201 |
-
from sageattention import sageattn_qk_int8_pv_fp16_cuda
|
| 202 |
-
except Exception as e:
|
| 203 |
-
raise ValueError(f"Can't import SageAttention 2.0.0: {str(e)}")
|
| 204 |
|
| 205 |
if precision == "fp16" and "1.5" in model:
|
| 206 |
raise ValueError("1.5 models do not currently work in fp16")
|
|
@@ -275,7 +218,7 @@ class DownloadAndLoadCogVideoModel:
|
|
| 275 |
local_dir_use_symlinks=False,
|
| 276 |
)
|
| 277 |
|
| 278 |
-
transformer = CogVideoXTransformer3DModel.from_pretrained(base_path, subfolder=subfolder
|
| 279 |
transformer = transformer.to(dtype).to(transformer_load_device)
|
| 280 |
|
| 281 |
if "1.5" in model:
|
|
@@ -348,6 +291,7 @@ class DownloadAndLoadCogVideoModel:
|
|
| 348 |
for module in pipe.transformer.modules():
|
| 349 |
if isinstance(module, Attention):
|
| 350 |
module.fuse_projections(fuse=True)
|
|
|
|
| 351 |
|
| 352 |
if compile_args is not None:
|
| 353 |
pipe.transformer.to(memory_format=torch.channels_last)
|
|
@@ -571,7 +515,7 @@ class DownloadAndLoadCogVideoGGUFModel:
|
|
| 571 |
else:
|
| 572 |
transformer_config["in_channels"] = 16
|
| 573 |
|
| 574 |
-
transformer = CogVideoXTransformer3DModel.from_config(transformer_config
|
| 575 |
cast_dtype = vae_dtype
|
| 576 |
params_to_keep = {"patch_embed", "pos_embedding", "time_embedding"}
|
| 577 |
if "2b" in model:
|
|
@@ -675,19 +619,7 @@ class CogVideoXModelLoader:
|
|
| 675 |
"block_edit": ("TRANSFORMERBLOCKS", {"default": None}),
|
| 676 |
"lora": ("COGLORA", {"default": None}),
|
| 677 |
"compile_args":("COMPILEARGS", ),
|
| 678 |
-
"attention_mode": ([
|
| 679 |
-
"sdpa",
|
| 680 |
-
"fused_sdpa",
|
| 681 |
-
"sageattn",
|
| 682 |
-
"fused_sageattn",
|
| 683 |
-
"sageattn_qk_int8_pv_fp8_cuda",
|
| 684 |
-
"sageattn_qk_int8_pv_fp16_cuda",
|
| 685 |
-
"sageattn_qk_int8_pv_fp16_triton",
|
| 686 |
-
"fused_sageattn_qk_int8_pv_fp8_cuda",
|
| 687 |
-
"fused_sageattn_qk_int8_pv_fp16_cuda",
|
| 688 |
-
"fused_sageattn_qk_int8_pv_fp16_triton",
|
| 689 |
-
"comfy"
|
| 690 |
-
], {"default": "sdpa"}),
|
| 691 |
}
|
| 692 |
}
|
| 693 |
|
|
@@ -698,7 +630,7 @@ class CogVideoXModelLoader:
|
|
| 698 |
|
| 699 |
def loadmodel(self, model, base_precision, load_device, enable_sequential_cpu_offload,
|
| 700 |
block_edit=None, compile_args=None, lora=None, attention_mode="sdpa", quantization="disabled"):
|
| 701 |
-
|
| 702 |
if "sage" in attention_mode:
|
| 703 |
try:
|
| 704 |
from sageattention import sageattn
|
|
@@ -764,7 +696,7 @@ class CogVideoXModelLoader:
|
|
| 764 |
transformer_config["sample_width"] = 300
|
| 765 |
|
| 766 |
with init_empty_weights():
|
| 767 |
-
transformer = CogVideoXTransformer3DModel.from_config(transformer_config
|
| 768 |
|
| 769 |
#load weights
|
| 770 |
#params_to_keep = {}
|
|
@@ -1116,7 +1048,6 @@ NODE_CLASS_MAPPINGS = {
|
|
| 1116 |
"CogVideoLoraSelect": CogVideoLoraSelect,
|
| 1117 |
"CogVideoXVAELoader": CogVideoXVAELoader,
|
| 1118 |
"CogVideoXModelLoader": CogVideoXModelLoader,
|
| 1119 |
-
"CogVideoLoraSelectComfy": CogVideoLoraSelectComfy
|
| 1120 |
}
|
| 1121 |
NODE_DISPLAY_NAME_MAPPINGS = {
|
| 1122 |
"DownloadAndLoadCogVideoModel": "(Down)load CogVideo Model",
|
|
@@ -1126,5 +1057,4 @@ NODE_DISPLAY_NAME_MAPPINGS = {
|
|
| 1126 |
"CogVideoLoraSelect": "CogVideo LoraSelect",
|
| 1127 |
"CogVideoXVAELoader": "CogVideoX VAE Loader",
|
| 1128 |
"CogVideoXModelLoader": "CogVideoX Model Loader",
|
| 1129 |
-
"CogVideoLoraSelectComfy": "CogVideo LoraSelect Comfy"
|
| 1130 |
}
|
|
|
|
| 70 |
RETURN_NAMES = ("lora", )
|
| 71 |
FUNCTION = "getlorapath"
|
| 72 |
CATEGORY = "CogVideoWrapper"
|
|
|
|
| 73 |
|
| 74 |
def getlorapath(self, lora, strength, prev_lora=None, fuse_lora=False):
|
| 75 |
cog_loras_list = []
|
|
|
|
| 86 |
cog_loras_list.append(cog_lora)
|
| 87 |
print(cog_loras_list)
|
| 88 |
return (cog_loras_list,)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
#region DownloadAndLoadCogVideoModel
|
| 91 |
class DownloadAndLoadCogVideoModel:
|
|
|
|
| 124 |
"block_edit": ("TRANSFORMERBLOCKS", {"default": None}),
|
| 125 |
"lora": ("COGLORA", {"default": None}),
|
| 126 |
"compile_args":("COMPILEARGS", ),
|
| 127 |
+
"attention_mode": (["sdpa", "sageattn", "fused_sdpa", "fused_sageattn", "comfy"], {"default": "sdpa"}),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
"load_device": (["main_device", "offload_device"], {"default": "main_device"}),
|
| 129 |
}
|
| 130 |
}
|
|
|
|
| 139 |
enable_sequential_cpu_offload=False, block_edit=None, lora=None, compile_args=None,
|
| 140 |
attention_mode="sdpa", load_device="main_device"):
|
| 141 |
|
|
|
|
|
|
|
| 142 |
if "sage" in attention_mode:
|
| 143 |
try:
|
| 144 |
from sageattention import sageattn
|
| 145 |
except Exception as e:
|
| 146 |
raise ValueError(f"Can't import SageAttention: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
if precision == "fp16" and "1.5" in model:
|
| 149 |
raise ValueError("1.5 models do not currently work in fp16")
|
|
|
|
| 218 |
local_dir_use_symlinks=False,
|
| 219 |
)
|
| 220 |
|
| 221 |
+
transformer = CogVideoXTransformer3DModel.from_pretrained(base_path, subfolder=subfolder)
|
| 222 |
transformer = transformer.to(dtype).to(transformer_load_device)
|
| 223 |
|
| 224 |
if "1.5" in model:
|
|
|
|
| 291 |
for module in pipe.transformer.modules():
|
| 292 |
if isinstance(module, Attention):
|
| 293 |
module.fuse_projections(fuse=True)
|
| 294 |
+
pipe.transformer.attention_mode = attention_mode
|
| 295 |
|
| 296 |
if compile_args is not None:
|
| 297 |
pipe.transformer.to(memory_format=torch.channels_last)
|
|
|
|
| 515 |
else:
|
| 516 |
transformer_config["in_channels"] = 16
|
| 517 |
|
| 518 |
+
transformer = CogVideoXTransformer3DModel.from_config(transformer_config)
|
| 519 |
cast_dtype = vae_dtype
|
| 520 |
params_to_keep = {"patch_embed", "pos_embedding", "time_embedding"}
|
| 521 |
if "2b" in model:
|
|
|
|
| 619 |
"block_edit": ("TRANSFORMERBLOCKS", {"default": None}),
|
| 620 |
"lora": ("COGLORA", {"default": None}),
|
| 621 |
"compile_args":("COMPILEARGS", ),
|
| 622 |
+
"attention_mode": (["sdpa", "sageattn", "fused_sdpa", "fused_sageattn"], {"default": "sdpa"}),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 623 |
}
|
| 624 |
}
|
| 625 |
|
|
|
|
| 630 |
|
| 631 |
def loadmodel(self, model, base_precision, load_device, enable_sequential_cpu_offload,
|
| 632 |
block_edit=None, compile_args=None, lora=None, attention_mode="sdpa", quantization="disabled"):
|
| 633 |
+
|
| 634 |
if "sage" in attention_mode:
|
| 635 |
try:
|
| 636 |
from sageattention import sageattn
|
|
|
|
| 696 |
transformer_config["sample_width"] = 300
|
| 697 |
|
| 698 |
with init_empty_weights():
|
| 699 |
+
transformer = CogVideoXTransformer3DModel.from_config(transformer_config)
|
| 700 |
|
| 701 |
#load weights
|
| 702 |
#params_to_keep = {}
|
|
|
|
| 1048 |
"CogVideoLoraSelect": CogVideoLoraSelect,
|
| 1049 |
"CogVideoXVAELoader": CogVideoXVAELoader,
|
| 1050 |
"CogVideoXModelLoader": CogVideoXModelLoader,
|
|
|
|
| 1051 |
}
|
| 1052 |
NODE_DISPLAY_NAME_MAPPINGS = {
|
| 1053 |
"DownloadAndLoadCogVideoModel": "(Down)load CogVideo Model",
|
|
|
|
| 1057 |
"CogVideoLoraSelect": "CogVideo LoraSelect",
|
| 1058 |
"CogVideoXVAELoader": "CogVideoX VAE Loader",
|
| 1059 |
"CogVideoXModelLoader": "CogVideoX Model Loader",
|
|
|
|
| 1060 |
}
|
custom_nodes/ComfyUI-DepthAnythingV2/.gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Auto detect text files and perform LF normalization
|
| 2 |
+
* text=auto
|
custom_nodes/ComfyUI-DepthAnythingV2/.github/workflows/publish.yml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Publish to Comfy registry
|
| 2 |
+
on:
|
| 3 |
+
workflow_dispatch:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
- master
|
| 8 |
+
paths:
|
| 9 |
+
- "pyproject.toml"
|
| 10 |
+
|
| 11 |
+
jobs:
|
| 12 |
+
publish-node:
|
| 13 |
+
name: Publish Custom Node to registry
|
| 14 |
+
runs-on: ubuntu-latest
|
| 15 |
+
steps:
|
| 16 |
+
- name: Check out code
|
| 17 |
+
uses: actions/checkout@v4
|
| 18 |
+
- name: Publish Custom Node
|
| 19 |
+
uses: Comfy-Org/publish-node-action@main
|
| 20 |
+
with:
|
| 21 |
+
## Add your own personal access token to your Github Repository secrets and reference it here.
|
| 22 |
+
personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}
|
custom_nodes/ComfyUI-DepthAnythingV2/.gitignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.DS_Store
|
| 2 |
+
*pyc
|
| 3 |
+
.vscode
|
| 4 |
+
__pycache__
|
| 5 |
+
*.egg-info
|
| 6 |
+
*.bak
|
| 7 |
+
checkpoints
|
| 8 |
+
results
|
| 9 |
+
backup
|
custom_nodes/ComfyUI-DepthAnythingV2/README.md
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ComfyUI nodes to use DepthAnythingV2
|
| 2 |
+
https://depth-anything-v2.github.io
|
| 3 |
+
|
| 4 |
+
Models autodownload to `ComfyUI\models\depthanything` from https://huggingface.co/Kijai/DepthAnythingV2-safetensors/tree/main
|
| 5 |
+
|
| 6 |
+

|
custom_nodes/ComfyUI-DepthAnythingV2/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
|
| 2 |
+
|
| 3 |
+
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2.py
ADDED
|
@@ -0,0 +1,415 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
#
|
| 3 |
+
# This source code is licensed under the Apache License, Version 2.0
|
| 4 |
+
# found in the LICENSE file in the root directory of this source tree.
|
| 5 |
+
|
| 6 |
+
# References:
|
| 7 |
+
# https://github.com/facebookresearch/dino/blob/main/vision_transformer.py
|
| 8 |
+
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
|
| 9 |
+
|
| 10 |
+
from functools import partial
|
| 11 |
+
import math
|
| 12 |
+
import logging
|
| 13 |
+
from typing import Sequence, Tuple, Union, Callable
|
| 14 |
+
|
| 15 |
+
import torch
|
| 16 |
+
import torch.nn as nn
|
| 17 |
+
import torch.utils.checkpoint
|
| 18 |
+
from torch.nn.init import trunc_normal_
|
| 19 |
+
|
| 20 |
+
from .dinov2_layers import Mlp, PatchEmbed, SwiGLUFFNFused, MemEffAttention, NestedTensorBlock as Block
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger("dinov2")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def named_apply(fn: Callable, module: nn.Module, name="", depth_first=True, include_root=False) -> nn.Module:
|
| 27 |
+
if not depth_first and include_root:
|
| 28 |
+
fn(module=module, name=name)
|
| 29 |
+
for child_name, child_module in module.named_children():
|
| 30 |
+
child_name = ".".join((name, child_name)) if name else child_name
|
| 31 |
+
named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True)
|
| 32 |
+
if depth_first and include_root:
|
| 33 |
+
fn(module=module, name=name)
|
| 34 |
+
return module
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class BlockChunk(nn.ModuleList):
|
| 38 |
+
def forward(self, x):
|
| 39 |
+
for b in self:
|
| 40 |
+
x = b(x)
|
| 41 |
+
return x
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class DinoVisionTransformer(nn.Module):
|
| 45 |
+
def __init__(
|
| 46 |
+
self,
|
| 47 |
+
img_size=224,
|
| 48 |
+
patch_size=16,
|
| 49 |
+
in_chans=3,
|
| 50 |
+
embed_dim=768,
|
| 51 |
+
depth=12,
|
| 52 |
+
num_heads=12,
|
| 53 |
+
mlp_ratio=4.0,
|
| 54 |
+
qkv_bias=True,
|
| 55 |
+
ffn_bias=True,
|
| 56 |
+
proj_bias=True,
|
| 57 |
+
drop_path_rate=0.0,
|
| 58 |
+
drop_path_uniform=False,
|
| 59 |
+
init_values=None, # for layerscale: None or 0 => no layerscale
|
| 60 |
+
embed_layer=PatchEmbed,
|
| 61 |
+
act_layer=nn.GELU,
|
| 62 |
+
block_fn=Block,
|
| 63 |
+
ffn_layer="mlp",
|
| 64 |
+
block_chunks=1,
|
| 65 |
+
num_register_tokens=0,
|
| 66 |
+
interpolate_antialias=False,
|
| 67 |
+
interpolate_offset=0.1,
|
| 68 |
+
):
|
| 69 |
+
"""
|
| 70 |
+
Args:
|
| 71 |
+
img_size (int, tuple): input image size
|
| 72 |
+
patch_size (int, tuple): patch size
|
| 73 |
+
in_chans (int): number of input channels
|
| 74 |
+
embed_dim (int): embedding dimension
|
| 75 |
+
depth (int): depth of transformer
|
| 76 |
+
num_heads (int): number of attention heads
|
| 77 |
+
mlp_ratio (int): ratio of mlp hidden dim to embedding dim
|
| 78 |
+
qkv_bias (bool): enable bias for qkv if True
|
| 79 |
+
proj_bias (bool): enable bias for proj in attn if True
|
| 80 |
+
ffn_bias (bool): enable bias for ffn if True
|
| 81 |
+
drop_path_rate (float): stochastic depth rate
|
| 82 |
+
drop_path_uniform (bool): apply uniform drop rate across blocks
|
| 83 |
+
weight_init (str): weight init scheme
|
| 84 |
+
init_values (float): layer-scale init values
|
| 85 |
+
embed_layer (nn.Module): patch embedding layer
|
| 86 |
+
act_layer (nn.Module): MLP activation layer
|
| 87 |
+
block_fn (nn.Module): transformer block class
|
| 88 |
+
ffn_layer (str): "mlp", "swiglu", "swiglufused" or "identity"
|
| 89 |
+
block_chunks: (int) split block sequence into block_chunks units for FSDP wrap
|
| 90 |
+
num_register_tokens: (int) number of extra cls tokens (so-called "registers")
|
| 91 |
+
interpolate_antialias: (str) flag to apply anti-aliasing when interpolating positional embeddings
|
| 92 |
+
interpolate_offset: (float) work-around offset to apply when interpolating positional embeddings
|
| 93 |
+
"""
|
| 94 |
+
super().__init__()
|
| 95 |
+
norm_layer = partial(nn.LayerNorm, eps=1e-6)
|
| 96 |
+
|
| 97 |
+
self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
|
| 98 |
+
self.num_tokens = 1
|
| 99 |
+
self.n_blocks = depth
|
| 100 |
+
self.num_heads = num_heads
|
| 101 |
+
self.patch_size = patch_size
|
| 102 |
+
self.num_register_tokens = num_register_tokens
|
| 103 |
+
self.interpolate_antialias = interpolate_antialias
|
| 104 |
+
self.interpolate_offset = interpolate_offset
|
| 105 |
+
|
| 106 |
+
self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
|
| 107 |
+
num_patches = self.patch_embed.num_patches
|
| 108 |
+
|
| 109 |
+
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
|
| 110 |
+
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim))
|
| 111 |
+
assert num_register_tokens >= 0
|
| 112 |
+
self.register_tokens = (
|
| 113 |
+
nn.Parameter(torch.zeros(1, num_register_tokens, embed_dim)) if num_register_tokens else None
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
if drop_path_uniform is True:
|
| 117 |
+
dpr = [drop_path_rate] * depth
|
| 118 |
+
else:
|
| 119 |
+
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
|
| 120 |
+
|
| 121 |
+
if ffn_layer == "mlp":
|
| 122 |
+
logger.info("using MLP layer as FFN")
|
| 123 |
+
ffn_layer = Mlp
|
| 124 |
+
elif ffn_layer == "swiglufused" or ffn_layer == "swiglu":
|
| 125 |
+
logger.info("using SwiGLU layer as FFN")
|
| 126 |
+
ffn_layer = SwiGLUFFNFused
|
| 127 |
+
elif ffn_layer == "identity":
|
| 128 |
+
logger.info("using Identity layer as FFN")
|
| 129 |
+
|
| 130 |
+
def f(*args, **kwargs):
|
| 131 |
+
return nn.Identity()
|
| 132 |
+
|
| 133 |
+
ffn_layer = f
|
| 134 |
+
else:
|
| 135 |
+
raise NotImplementedError
|
| 136 |
+
|
| 137 |
+
blocks_list = [
|
| 138 |
+
block_fn(
|
| 139 |
+
dim=embed_dim,
|
| 140 |
+
num_heads=num_heads,
|
| 141 |
+
mlp_ratio=mlp_ratio,
|
| 142 |
+
qkv_bias=qkv_bias,
|
| 143 |
+
proj_bias=proj_bias,
|
| 144 |
+
ffn_bias=ffn_bias,
|
| 145 |
+
drop_path=dpr[i],
|
| 146 |
+
norm_layer=norm_layer,
|
| 147 |
+
act_layer=act_layer,
|
| 148 |
+
ffn_layer=ffn_layer,
|
| 149 |
+
init_values=init_values,
|
| 150 |
+
)
|
| 151 |
+
for i in range(depth)
|
| 152 |
+
]
|
| 153 |
+
if block_chunks > 0:
|
| 154 |
+
self.chunked_blocks = True
|
| 155 |
+
chunked_blocks = []
|
| 156 |
+
chunksize = depth // block_chunks
|
| 157 |
+
for i in range(0, depth, chunksize):
|
| 158 |
+
# this is to keep the block index consistent if we chunk the block list
|
| 159 |
+
chunked_blocks.append([nn.Identity()] * i + blocks_list[i : i + chunksize])
|
| 160 |
+
self.blocks = nn.ModuleList([BlockChunk(p) for p in chunked_blocks])
|
| 161 |
+
else:
|
| 162 |
+
self.chunked_blocks = False
|
| 163 |
+
self.blocks = nn.ModuleList(blocks_list)
|
| 164 |
+
|
| 165 |
+
self.norm = norm_layer(embed_dim)
|
| 166 |
+
self.head = nn.Identity()
|
| 167 |
+
|
| 168 |
+
self.mask_token = nn.Parameter(torch.zeros(1, embed_dim))
|
| 169 |
+
|
| 170 |
+
self.init_weights()
|
| 171 |
+
|
| 172 |
+
def init_weights(self):
|
| 173 |
+
trunc_normal_(self.pos_embed, std=0.02)
|
| 174 |
+
nn.init.normal_(self.cls_token, std=1e-6)
|
| 175 |
+
if self.register_tokens is not None:
|
| 176 |
+
nn.init.normal_(self.register_tokens, std=1e-6)
|
| 177 |
+
named_apply(init_weights_vit_timm, self)
|
| 178 |
+
|
| 179 |
+
def interpolate_pos_encoding(self, x, w, h):
|
| 180 |
+
previous_dtype = x.dtype
|
| 181 |
+
npatch = x.shape[1] - 1
|
| 182 |
+
N = self.pos_embed.shape[1] - 1
|
| 183 |
+
if npatch == N and w == h:
|
| 184 |
+
return self.pos_embed
|
| 185 |
+
pos_embed = self.pos_embed.float()
|
| 186 |
+
class_pos_embed = pos_embed[:, 0]
|
| 187 |
+
patch_pos_embed = pos_embed[:, 1:]
|
| 188 |
+
dim = x.shape[-1]
|
| 189 |
+
w0 = w // self.patch_size
|
| 190 |
+
h0 = h // self.patch_size
|
| 191 |
+
# we add a small number to avoid floating point error in the interpolation
|
| 192 |
+
# see discussion at https://github.com/facebookresearch/dino/issues/8
|
| 193 |
+
# DINOv2 with register modify the interpolate_offset from 0.1 to 0.0
|
| 194 |
+
w0, h0 = w0 + self.interpolate_offset, h0 + self.interpolate_offset
|
| 195 |
+
# w0, h0 = w0 + 0.1, h0 + 0.1
|
| 196 |
+
|
| 197 |
+
sqrt_N = math.sqrt(N)
|
| 198 |
+
sx, sy = float(w0) / sqrt_N, float(h0) / sqrt_N
|
| 199 |
+
patch_pos_embed = nn.functional.interpolate(
|
| 200 |
+
patch_pos_embed.reshape(1, int(sqrt_N), int(sqrt_N), dim).permute(0, 3, 1, 2),
|
| 201 |
+
scale_factor=(sx, sy),
|
| 202 |
+
# (int(w0), int(h0)), # to solve the upsampling shape issue
|
| 203 |
+
mode="bicubic",
|
| 204 |
+
antialias=self.interpolate_antialias
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
assert int(w0) == patch_pos_embed.shape[-2]
|
| 208 |
+
assert int(h0) == patch_pos_embed.shape[-1]
|
| 209 |
+
patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
|
| 210 |
+
return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1).to(previous_dtype)
|
| 211 |
+
|
| 212 |
+
def prepare_tokens_with_masks(self, x, masks=None):
|
| 213 |
+
B, nc, w, h = x.shape
|
| 214 |
+
x = self.patch_embed(x)
|
| 215 |
+
if masks is not None:
|
| 216 |
+
x = torch.where(masks.unsqueeze(-1), self.mask_token.to(x.dtype).unsqueeze(0), x)
|
| 217 |
+
|
| 218 |
+
x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1)
|
| 219 |
+
x = x + self.interpolate_pos_encoding(x, w, h)
|
| 220 |
+
|
| 221 |
+
if self.register_tokens is not None:
|
| 222 |
+
x = torch.cat(
|
| 223 |
+
(
|
| 224 |
+
x[:, :1],
|
| 225 |
+
self.register_tokens.expand(x.shape[0], -1, -1),
|
| 226 |
+
x[:, 1:],
|
| 227 |
+
),
|
| 228 |
+
dim=1,
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
return x
|
| 232 |
+
|
| 233 |
+
def forward_features_list(self, x_list, masks_list):
|
| 234 |
+
x = [self.prepare_tokens_with_masks(x, masks) for x, masks in zip(x_list, masks_list)]
|
| 235 |
+
for blk in self.blocks:
|
| 236 |
+
x = blk(x)
|
| 237 |
+
|
| 238 |
+
all_x = x
|
| 239 |
+
output = []
|
| 240 |
+
for x, masks in zip(all_x, masks_list):
|
| 241 |
+
x_norm = self.norm(x)
|
| 242 |
+
output.append(
|
| 243 |
+
{
|
| 244 |
+
"x_norm_clstoken": x_norm[:, 0],
|
| 245 |
+
"x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1],
|
| 246 |
+
"x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :],
|
| 247 |
+
"x_prenorm": x,
|
| 248 |
+
"masks": masks,
|
| 249 |
+
}
|
| 250 |
+
)
|
| 251 |
+
return output
|
| 252 |
+
|
| 253 |
+
def forward_features(self, x, masks=None):
|
| 254 |
+
if isinstance(x, list):
|
| 255 |
+
return self.forward_features_list(x, masks)
|
| 256 |
+
|
| 257 |
+
x = self.prepare_tokens_with_masks(x, masks)
|
| 258 |
+
|
| 259 |
+
for blk in self.blocks:
|
| 260 |
+
x = blk(x)
|
| 261 |
+
|
| 262 |
+
x_norm = self.norm(x)
|
| 263 |
+
return {
|
| 264 |
+
"x_norm_clstoken": x_norm[:, 0],
|
| 265 |
+
"x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1],
|
| 266 |
+
"x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :],
|
| 267 |
+
"x_prenorm": x,
|
| 268 |
+
"masks": masks,
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
def _get_intermediate_layers_not_chunked(self, x, n=1):
|
| 272 |
+
x = self.prepare_tokens_with_masks(x)
|
| 273 |
+
# If n is an int, take the n last blocks. If it's a list, take them
|
| 274 |
+
output, total_block_len = [], len(self.blocks)
|
| 275 |
+
blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n
|
| 276 |
+
for i, blk in enumerate(self.blocks):
|
| 277 |
+
x = blk(x)
|
| 278 |
+
if i in blocks_to_take:
|
| 279 |
+
output.append(x)
|
| 280 |
+
assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found"
|
| 281 |
+
return output
|
| 282 |
+
|
| 283 |
+
def _get_intermediate_layers_chunked(self, x, n=1):
|
| 284 |
+
x = self.prepare_tokens_with_masks(x)
|
| 285 |
+
output, i, total_block_len = [], 0, len(self.blocks[-1])
|
| 286 |
+
# If n is an int, take the n last blocks. If it's a list, take them
|
| 287 |
+
blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n
|
| 288 |
+
for block_chunk in self.blocks:
|
| 289 |
+
for blk in block_chunk[i:]: # Passing the nn.Identity()
|
| 290 |
+
x = blk(x)
|
| 291 |
+
if i in blocks_to_take:
|
| 292 |
+
output.append(x)
|
| 293 |
+
i += 1
|
| 294 |
+
assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found"
|
| 295 |
+
return output
|
| 296 |
+
|
| 297 |
+
def get_intermediate_layers(
|
| 298 |
+
self,
|
| 299 |
+
x: torch.Tensor,
|
| 300 |
+
n: Union[int, Sequence] = 1, # Layers or n last layers to take
|
| 301 |
+
reshape: bool = False,
|
| 302 |
+
return_class_token: bool = False,
|
| 303 |
+
norm=True
|
| 304 |
+
) -> Tuple[Union[torch.Tensor, Tuple[torch.Tensor]]]:
|
| 305 |
+
if self.chunked_blocks:
|
| 306 |
+
outputs = self._get_intermediate_layers_chunked(x, n)
|
| 307 |
+
else:
|
| 308 |
+
outputs = self._get_intermediate_layers_not_chunked(x, n)
|
| 309 |
+
if norm:
|
| 310 |
+
outputs = [self.norm(out) for out in outputs]
|
| 311 |
+
class_tokens = [out[:, 0] for out in outputs]
|
| 312 |
+
outputs = [out[:, 1 + self.num_register_tokens:] for out in outputs]
|
| 313 |
+
if reshape:
|
| 314 |
+
B, _, w, h = x.shape
|
| 315 |
+
outputs = [
|
| 316 |
+
out.reshape(B, w // self.patch_size, h // self.patch_size, -1).permute(0, 3, 1, 2).contiguous()
|
| 317 |
+
for out in outputs
|
| 318 |
+
]
|
| 319 |
+
if return_class_token:
|
| 320 |
+
return tuple(zip(outputs, class_tokens))
|
| 321 |
+
return tuple(outputs)
|
| 322 |
+
|
| 323 |
+
def forward(self, *args, is_training=False, **kwargs):
|
| 324 |
+
ret = self.forward_features(*args, **kwargs)
|
| 325 |
+
if is_training:
|
| 326 |
+
return ret
|
| 327 |
+
else:
|
| 328 |
+
return self.head(ret["x_norm_clstoken"])
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def init_weights_vit_timm(module: nn.Module, name: str = ""):
|
| 332 |
+
"""ViT weight initialization, original timm impl (for reproducibility)"""
|
| 333 |
+
if isinstance(module, nn.Linear):
|
| 334 |
+
trunc_normal_(module.weight, std=0.02)
|
| 335 |
+
if module.bias is not None:
|
| 336 |
+
nn.init.zeros_(module.bias)
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
def vit_small(patch_size=16, num_register_tokens=0, **kwargs):
|
| 340 |
+
model = DinoVisionTransformer(
|
| 341 |
+
patch_size=patch_size,
|
| 342 |
+
embed_dim=384,
|
| 343 |
+
depth=12,
|
| 344 |
+
num_heads=6,
|
| 345 |
+
mlp_ratio=4,
|
| 346 |
+
block_fn=partial(Block, attn_class=MemEffAttention),
|
| 347 |
+
num_register_tokens=num_register_tokens,
|
| 348 |
+
**kwargs,
|
| 349 |
+
)
|
| 350 |
+
return model
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
def vit_base(patch_size=16, num_register_tokens=0, **kwargs):
|
| 354 |
+
model = DinoVisionTransformer(
|
| 355 |
+
patch_size=patch_size,
|
| 356 |
+
embed_dim=768,
|
| 357 |
+
depth=12,
|
| 358 |
+
num_heads=12,
|
| 359 |
+
mlp_ratio=4,
|
| 360 |
+
block_fn=partial(Block, attn_class=MemEffAttention),
|
| 361 |
+
num_register_tokens=num_register_tokens,
|
| 362 |
+
**kwargs,
|
| 363 |
+
)
|
| 364 |
+
return model
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def vit_large(patch_size=16, num_register_tokens=0, **kwargs):
|
| 368 |
+
model = DinoVisionTransformer(
|
| 369 |
+
patch_size=patch_size,
|
| 370 |
+
embed_dim=1024,
|
| 371 |
+
depth=24,
|
| 372 |
+
num_heads=16,
|
| 373 |
+
mlp_ratio=4,
|
| 374 |
+
block_fn=partial(Block, attn_class=MemEffAttention),
|
| 375 |
+
num_register_tokens=num_register_tokens,
|
| 376 |
+
**kwargs,
|
| 377 |
+
)
|
| 378 |
+
return model
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
def vit_giant2(patch_size=16, num_register_tokens=0, **kwargs):
|
| 382 |
+
"""
|
| 383 |
+
Close to ViT-giant, with embed-dim 1536 and 24 heads => embed-dim per head 64
|
| 384 |
+
"""
|
| 385 |
+
model = DinoVisionTransformer(
|
| 386 |
+
patch_size=patch_size,
|
| 387 |
+
embed_dim=1536,
|
| 388 |
+
depth=40,
|
| 389 |
+
num_heads=24,
|
| 390 |
+
mlp_ratio=4,
|
| 391 |
+
block_fn=partial(Block, attn_class=MemEffAttention),
|
| 392 |
+
num_register_tokens=num_register_tokens,
|
| 393 |
+
**kwargs,
|
| 394 |
+
)
|
| 395 |
+
return model
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
def DINOv2(model_name):
|
| 399 |
+
model_zoo = {
|
| 400 |
+
"vits": vit_small,
|
| 401 |
+
"vitb": vit_base,
|
| 402 |
+
"vitl": vit_large,
|
| 403 |
+
"vitg": vit_giant2
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
return model_zoo[model_name](
|
| 407 |
+
img_size=518,
|
| 408 |
+
patch_size=14,
|
| 409 |
+
init_values=1.0,
|
| 410 |
+
ffn_layer="mlp" if model_name != "vitg" else "swiglufused",
|
| 411 |
+
block_chunks=0,
|
| 412 |
+
num_register_tokens=0,
|
| 413 |
+
interpolate_antialias=False,
|
| 414 |
+
interpolate_offset=0.1
|
| 415 |
+
)
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
from .mlp import Mlp
|
| 8 |
+
from .patch_embed import PatchEmbed
|
| 9 |
+
from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused
|
| 10 |
+
from .block import NestedTensorBlock
|
| 11 |
+
from .attention import MemEffAttention
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/attention.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# References:
|
| 8 |
+
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
|
| 9 |
+
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
|
| 13 |
+
from torch import Tensor
|
| 14 |
+
from torch import nn
|
| 15 |
+
import comfy.ops
|
| 16 |
+
ops = comfy.ops.manual_cast
|
| 17 |
+
from comfy.ldm.modules.attention import optimized_attention
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger("dinov2")
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
from xformers.ops import memory_efficient_attention, unbind
|
| 23 |
+
|
| 24 |
+
XFORMERS_AVAILABLE = True
|
| 25 |
+
except ImportError:
|
| 26 |
+
logger.warning("xFormers not available")
|
| 27 |
+
XFORMERS_AVAILABLE = False
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class Attention(nn.Module):
|
| 31 |
+
def __init__(
|
| 32 |
+
self,
|
| 33 |
+
dim: int,
|
| 34 |
+
num_heads: int = 8,
|
| 35 |
+
qkv_bias: bool = False,
|
| 36 |
+
proj_bias: bool = True,
|
| 37 |
+
attn_drop: float = 0.0,
|
| 38 |
+
proj_drop: float = 0.0,
|
| 39 |
+
) -> None:
|
| 40 |
+
super().__init__()
|
| 41 |
+
self.num_heads = num_heads
|
| 42 |
+
self.head_dim = dim // num_heads
|
| 43 |
+
self.scale = self.head_dim**-0.5
|
| 44 |
+
|
| 45 |
+
self.qkv = ops.Linear(dim, dim * 3, bias=qkv_bias)
|
| 46 |
+
self.attn_drop = nn.Dropout(attn_drop)
|
| 47 |
+
self.proj = ops.Linear(dim, dim, bias=proj_bias)
|
| 48 |
+
self.proj_drop = nn.Dropout(proj_drop)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def forward(self, x: Tensor) -> Tensor:
|
| 52 |
+
# B, N, C = x.shape
|
| 53 |
+
# qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
|
| 54 |
+
|
| 55 |
+
# q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
|
| 56 |
+
# attn = q @ k.transpose(-2, -1)
|
| 57 |
+
|
| 58 |
+
# attn = attn.softmax(dim=-1)
|
| 59 |
+
# #attn = self.attn_drop(attn)
|
| 60 |
+
|
| 61 |
+
# x = (attn @ v).transpose(1, 2).reshape(B, N, C)
|
| 62 |
+
# x = self.proj(x)
|
| 63 |
+
# #x = self.proj_drop(x)
|
| 64 |
+
# return x
|
| 65 |
+
# print("x shape: ", x.shape)
|
| 66 |
+
|
| 67 |
+
B, N, C = x.shape
|
| 68 |
+
q, k, v = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
|
| 69 |
+
out = optimized_attention(q, k, v, self.num_heads, skip_reshape=True)
|
| 70 |
+
|
| 71 |
+
out= self.proj(out)
|
| 72 |
+
out = self.proj_drop(out)
|
| 73 |
+
return out
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class MemEffAttention(Attention):
|
| 77 |
+
def forward(self, x: Tensor, attn_bias=None) -> Tensor:
|
| 78 |
+
if not XFORMERS_AVAILABLE:
|
| 79 |
+
assert attn_bias is None, "xFormers is required for nested tensors usage"
|
| 80 |
+
return super().forward(x)
|
| 81 |
+
|
| 82 |
+
B, N, C = x.shape
|
| 83 |
+
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
|
| 84 |
+
|
| 85 |
+
q, k, v = unbind(qkv, 2)
|
| 86 |
+
|
| 87 |
+
x = memory_efficient_attention(q, k, v, attn_bias=attn_bias)
|
| 88 |
+
x = x.reshape([B, N, C])
|
| 89 |
+
|
| 90 |
+
x = self.proj(x)
|
| 91 |
+
x = self.proj_drop(x)
|
| 92 |
+
return x
|
| 93 |
+
|
| 94 |
+
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/block.py
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# References:
|
| 8 |
+
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
|
| 9 |
+
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
from typing import Callable, List, Any, Tuple, Dict
|
| 13 |
+
|
| 14 |
+
import torch
|
| 15 |
+
from torch import nn, Tensor
|
| 16 |
+
|
| 17 |
+
from .attention import Attention, MemEffAttention
|
| 18 |
+
from .drop_path import DropPath
|
| 19 |
+
from .layer_scale import LayerScale
|
| 20 |
+
from .mlp import Mlp
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger("dinov2")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
from xformers.ops import fmha
|
| 28 |
+
from xformers.ops import scaled_index_add, index_select_cat
|
| 29 |
+
|
| 30 |
+
XFORMERS_AVAILABLE = True
|
| 31 |
+
except ImportError:
|
| 32 |
+
logger.warning("xFormers not available")
|
| 33 |
+
XFORMERS_AVAILABLE = False
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class Block(nn.Module):
|
| 37 |
+
def __init__(
|
| 38 |
+
self,
|
| 39 |
+
dim: int,
|
| 40 |
+
num_heads: int,
|
| 41 |
+
mlp_ratio: float = 4.0,
|
| 42 |
+
qkv_bias: bool = False,
|
| 43 |
+
proj_bias: bool = True,
|
| 44 |
+
ffn_bias: bool = True,
|
| 45 |
+
drop: float = 0.0,
|
| 46 |
+
attn_drop: float = 0.0,
|
| 47 |
+
init_values=None,
|
| 48 |
+
drop_path: float = 0.0,
|
| 49 |
+
act_layer: Callable[..., nn.Module] = nn.GELU,
|
| 50 |
+
norm_layer: Callable[..., nn.Module] = nn.LayerNorm,
|
| 51 |
+
attn_class: Callable[..., nn.Module] = Attention,
|
| 52 |
+
ffn_layer: Callable[..., nn.Module] = Mlp,
|
| 53 |
+
) -> None:
|
| 54 |
+
super().__init__()
|
| 55 |
+
# print(f"biases: qkv: {qkv_bias}, proj: {proj_bias}, ffn: {ffn_bias}")
|
| 56 |
+
self.norm1 = norm_layer(dim)
|
| 57 |
+
self.attn = attn_class(
|
| 58 |
+
dim,
|
| 59 |
+
num_heads=num_heads,
|
| 60 |
+
qkv_bias=qkv_bias,
|
| 61 |
+
proj_bias=proj_bias,
|
| 62 |
+
attn_drop=attn_drop,
|
| 63 |
+
proj_drop=drop,
|
| 64 |
+
)
|
| 65 |
+
self.ls1 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
|
| 66 |
+
self.drop_path1 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
|
| 67 |
+
|
| 68 |
+
self.norm2 = norm_layer(dim)
|
| 69 |
+
mlp_hidden_dim = int(dim * mlp_ratio)
|
| 70 |
+
self.mlp = ffn_layer(
|
| 71 |
+
in_features=dim,
|
| 72 |
+
hidden_features=mlp_hidden_dim,
|
| 73 |
+
act_layer=act_layer,
|
| 74 |
+
drop=drop,
|
| 75 |
+
bias=ffn_bias,
|
| 76 |
+
)
|
| 77 |
+
self.ls2 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
|
| 78 |
+
self.drop_path2 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
|
| 79 |
+
|
| 80 |
+
self.sample_drop_ratio = drop_path
|
| 81 |
+
|
| 82 |
+
def forward(self, x: Tensor) -> Tensor:
|
| 83 |
+
def attn_residual_func(x: Tensor) -> Tensor:
|
| 84 |
+
return self.ls1(self.attn(self.norm1(x)))
|
| 85 |
+
|
| 86 |
+
def ffn_residual_func(x: Tensor) -> Tensor:
|
| 87 |
+
return self.ls2(self.mlp(self.norm2(x)))
|
| 88 |
+
|
| 89 |
+
if self.training and self.sample_drop_ratio > 0.1:
|
| 90 |
+
# the overhead is compensated only for a drop path rate larger than 0.1
|
| 91 |
+
x = drop_add_residual_stochastic_depth(
|
| 92 |
+
x,
|
| 93 |
+
residual_func=attn_residual_func,
|
| 94 |
+
sample_drop_ratio=self.sample_drop_ratio,
|
| 95 |
+
)
|
| 96 |
+
x = drop_add_residual_stochastic_depth(
|
| 97 |
+
x,
|
| 98 |
+
residual_func=ffn_residual_func,
|
| 99 |
+
sample_drop_ratio=self.sample_drop_ratio,
|
| 100 |
+
)
|
| 101 |
+
elif self.training and self.sample_drop_ratio > 0.0:
|
| 102 |
+
x = x + self.drop_path1(attn_residual_func(x))
|
| 103 |
+
x = x + self.drop_path1(ffn_residual_func(x)) # FIXME: drop_path2
|
| 104 |
+
else:
|
| 105 |
+
x = x + attn_residual_func(x)
|
| 106 |
+
x = x + ffn_residual_func(x)
|
| 107 |
+
return x
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def drop_add_residual_stochastic_depth(
|
| 111 |
+
x: Tensor,
|
| 112 |
+
residual_func: Callable[[Tensor], Tensor],
|
| 113 |
+
sample_drop_ratio: float = 0.0,
|
| 114 |
+
) -> Tensor:
|
| 115 |
+
# 1) extract subset using permutation
|
| 116 |
+
b, n, d = x.shape
|
| 117 |
+
sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1)
|
| 118 |
+
brange = (torch.randperm(b, device=x.device))[:sample_subset_size]
|
| 119 |
+
x_subset = x[brange]
|
| 120 |
+
|
| 121 |
+
# 2) apply residual_func to get residual
|
| 122 |
+
residual = residual_func(x_subset)
|
| 123 |
+
|
| 124 |
+
x_flat = x.flatten(1)
|
| 125 |
+
residual = residual.flatten(1)
|
| 126 |
+
|
| 127 |
+
residual_scale_factor = b / sample_subset_size
|
| 128 |
+
|
| 129 |
+
# 3) add the residual
|
| 130 |
+
x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor)
|
| 131 |
+
return x_plus_residual.view_as(x)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def get_branges_scales(x, sample_drop_ratio=0.0):
|
| 135 |
+
b, n, d = x.shape
|
| 136 |
+
sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1)
|
| 137 |
+
brange = (torch.randperm(b, device=x.device))[:sample_subset_size]
|
| 138 |
+
residual_scale_factor = b / sample_subset_size
|
| 139 |
+
return brange, residual_scale_factor
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def add_residual(x, brange, residual, residual_scale_factor, scaling_vector=None):
|
| 143 |
+
if scaling_vector is None:
|
| 144 |
+
x_flat = x.flatten(1)
|
| 145 |
+
residual = residual.flatten(1)
|
| 146 |
+
x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor)
|
| 147 |
+
else:
|
| 148 |
+
x_plus_residual = scaled_index_add(
|
| 149 |
+
x, brange, residual.to(dtype=x.dtype), scaling=scaling_vector, alpha=residual_scale_factor
|
| 150 |
+
)
|
| 151 |
+
return x_plus_residual
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
attn_bias_cache: Dict[Tuple, Any] = {}
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def get_attn_bias_and_cat(x_list, branges=None):
|
| 158 |
+
"""
|
| 159 |
+
this will perform the index select, cat the tensors, and provide the attn_bias from cache
|
| 160 |
+
"""
|
| 161 |
+
batch_sizes = [b.shape[0] for b in branges] if branges is not None else [x.shape[0] for x in x_list]
|
| 162 |
+
all_shapes = tuple((b, x.shape[1]) for b, x in zip(batch_sizes, x_list))
|
| 163 |
+
if all_shapes not in attn_bias_cache.keys():
|
| 164 |
+
seqlens = []
|
| 165 |
+
for b, x in zip(batch_sizes, x_list):
|
| 166 |
+
for _ in range(b):
|
| 167 |
+
seqlens.append(x.shape[1])
|
| 168 |
+
attn_bias = fmha.BlockDiagonalMask.from_seqlens(seqlens)
|
| 169 |
+
attn_bias._batch_sizes = batch_sizes
|
| 170 |
+
attn_bias_cache[all_shapes] = attn_bias
|
| 171 |
+
|
| 172 |
+
if branges is not None:
|
| 173 |
+
cat_tensors = index_select_cat([x.flatten(1) for x in x_list], branges).view(1, -1, x_list[0].shape[-1])
|
| 174 |
+
else:
|
| 175 |
+
tensors_bs1 = tuple(x.reshape([1, -1, *x.shape[2:]]) for x in x_list)
|
| 176 |
+
cat_tensors = torch.cat(tensors_bs1, dim=1)
|
| 177 |
+
|
| 178 |
+
return attn_bias_cache[all_shapes], cat_tensors
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def drop_add_residual_stochastic_depth_list(
|
| 182 |
+
x_list: List[Tensor],
|
| 183 |
+
residual_func: Callable[[Tensor, Any], Tensor],
|
| 184 |
+
sample_drop_ratio: float = 0.0,
|
| 185 |
+
scaling_vector=None,
|
| 186 |
+
) -> Tensor:
|
| 187 |
+
# 1) generate random set of indices for dropping samples in the batch
|
| 188 |
+
branges_scales = [get_branges_scales(x, sample_drop_ratio=sample_drop_ratio) for x in x_list]
|
| 189 |
+
branges = [s[0] for s in branges_scales]
|
| 190 |
+
residual_scale_factors = [s[1] for s in branges_scales]
|
| 191 |
+
|
| 192 |
+
# 2) get attention bias and index+concat the tensors
|
| 193 |
+
attn_bias, x_cat = get_attn_bias_and_cat(x_list, branges)
|
| 194 |
+
|
| 195 |
+
# 3) apply residual_func to get residual, and split the result
|
| 196 |
+
residual_list = attn_bias.split(residual_func(x_cat, attn_bias=attn_bias)) # type: ignore
|
| 197 |
+
|
| 198 |
+
outputs = []
|
| 199 |
+
for x, brange, residual, residual_scale_factor in zip(x_list, branges, residual_list, residual_scale_factors):
|
| 200 |
+
outputs.append(add_residual(x, brange, residual, residual_scale_factor, scaling_vector).view_as(x))
|
| 201 |
+
return outputs
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
class NestedTensorBlock(Block):
|
| 205 |
+
def forward_nested(self, x_list: List[Tensor]) -> List[Tensor]:
|
| 206 |
+
"""
|
| 207 |
+
x_list contains a list of tensors to nest together and run
|
| 208 |
+
"""
|
| 209 |
+
assert isinstance(self.attn, MemEffAttention)
|
| 210 |
+
|
| 211 |
+
if self.training and self.sample_drop_ratio > 0.0:
|
| 212 |
+
|
| 213 |
+
def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
|
| 214 |
+
return self.attn(self.norm1(x), attn_bias=attn_bias)
|
| 215 |
+
|
| 216 |
+
def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
|
| 217 |
+
return self.mlp(self.norm2(x))
|
| 218 |
+
|
| 219 |
+
x_list = drop_add_residual_stochastic_depth_list(
|
| 220 |
+
x_list,
|
| 221 |
+
residual_func=attn_residual_func,
|
| 222 |
+
sample_drop_ratio=self.sample_drop_ratio,
|
| 223 |
+
scaling_vector=self.ls1.gamma if isinstance(self.ls1, LayerScale) else None,
|
| 224 |
+
)
|
| 225 |
+
x_list = drop_add_residual_stochastic_depth_list(
|
| 226 |
+
x_list,
|
| 227 |
+
residual_func=ffn_residual_func,
|
| 228 |
+
sample_drop_ratio=self.sample_drop_ratio,
|
| 229 |
+
scaling_vector=self.ls2.gamma if isinstance(self.ls1, LayerScale) else None,
|
| 230 |
+
)
|
| 231 |
+
return x_list
|
| 232 |
+
else:
|
| 233 |
+
|
| 234 |
+
def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
|
| 235 |
+
return self.ls1(self.attn(self.norm1(x), attn_bias=attn_bias))
|
| 236 |
+
|
| 237 |
+
def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
|
| 238 |
+
return self.ls2(self.mlp(self.norm2(x)))
|
| 239 |
+
|
| 240 |
+
attn_bias, x = get_attn_bias_and_cat(x_list)
|
| 241 |
+
x = x + attn_residual_func(x, attn_bias=attn_bias)
|
| 242 |
+
x = x + ffn_residual_func(x)
|
| 243 |
+
return attn_bias.split(x)
|
| 244 |
+
|
| 245 |
+
def forward(self, x_or_x_list):
|
| 246 |
+
if isinstance(x_or_x_list, Tensor):
|
| 247 |
+
return super().forward(x_or_x_list)
|
| 248 |
+
elif isinstance(x_or_x_list, list):
|
| 249 |
+
assert XFORMERS_AVAILABLE, "Please install xFormers for nested tensors usage"
|
| 250 |
+
return self.forward_nested(x_or_x_list)
|
| 251 |
+
else:
|
| 252 |
+
raise AssertionError
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/drop_path.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# References:
|
| 8 |
+
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
|
| 9 |
+
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
from torch import nn
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def drop_path(x, drop_prob: float = 0.0, training: bool = False):
|
| 16 |
+
if drop_prob == 0.0 or not training:
|
| 17 |
+
return x
|
| 18 |
+
keep_prob = 1 - drop_prob
|
| 19 |
+
shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
|
| 20 |
+
random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
|
| 21 |
+
if keep_prob > 0.0:
|
| 22 |
+
random_tensor.div_(keep_prob)
|
| 23 |
+
output = x * random_tensor
|
| 24 |
+
return output
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class DropPath(nn.Module):
|
| 28 |
+
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
|
| 29 |
+
|
| 30 |
+
def __init__(self, drop_prob=None):
|
| 31 |
+
super(DropPath, self).__init__()
|
| 32 |
+
self.drop_prob = drop_prob
|
| 33 |
+
|
| 34 |
+
def forward(self, x):
|
| 35 |
+
return drop_path(x, self.drop_prob, self.training)
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/layer_scale.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110
|
| 8 |
+
|
| 9 |
+
from typing import Union
|
| 10 |
+
|
| 11 |
+
import torch
|
| 12 |
+
from torch import Tensor
|
| 13 |
+
from torch import nn
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class LayerScale(nn.Module):
|
| 17 |
+
def __init__(
|
| 18 |
+
self,
|
| 19 |
+
dim: int,
|
| 20 |
+
init_values: Union[float, Tensor] = 1e-5,
|
| 21 |
+
inplace: bool = False,
|
| 22 |
+
) -> None:
|
| 23 |
+
super().__init__()
|
| 24 |
+
self.inplace = inplace
|
| 25 |
+
self.gamma = nn.Parameter(init_values * torch.ones(dim))
|
| 26 |
+
|
| 27 |
+
def forward(self, x: Tensor) -> Tensor:
|
| 28 |
+
return x.mul_(self.gamma) if self.inplace else x * self.gamma
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/mlp.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# References:
|
| 8 |
+
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
|
| 9 |
+
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
from typing import Callable, Optional
|
| 13 |
+
|
| 14 |
+
from torch import Tensor, nn
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class Mlp(nn.Module):
|
| 18 |
+
def __init__(
|
| 19 |
+
self,
|
| 20 |
+
in_features: int,
|
| 21 |
+
hidden_features: Optional[int] = None,
|
| 22 |
+
out_features: Optional[int] = None,
|
| 23 |
+
act_layer: Callable[..., nn.Module] = nn.GELU,
|
| 24 |
+
drop: float = 0.0,
|
| 25 |
+
bias: bool = True,
|
| 26 |
+
) -> None:
|
| 27 |
+
super().__init__()
|
| 28 |
+
out_features = out_features or in_features
|
| 29 |
+
hidden_features = hidden_features or in_features
|
| 30 |
+
self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
|
| 31 |
+
self.act = act_layer()
|
| 32 |
+
self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
|
| 33 |
+
self.drop = nn.Dropout(drop)
|
| 34 |
+
|
| 35 |
+
def forward(self, x: Tensor) -> Tensor:
|
| 36 |
+
x = self.fc1(x)
|
| 37 |
+
x = self.act(x)
|
| 38 |
+
x = self.drop(x)
|
| 39 |
+
x = self.fc2(x)
|
| 40 |
+
x = self.drop(x)
|
| 41 |
+
return x
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/patch_embed.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# References:
|
| 8 |
+
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
|
| 9 |
+
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
|
| 10 |
+
|
| 11 |
+
from typing import Callable, Optional, Tuple, Union
|
| 12 |
+
|
| 13 |
+
from torch import Tensor
|
| 14 |
+
import torch.nn as nn
|
| 15 |
+
import comfy.ops
|
| 16 |
+
ops = comfy.ops.manual_cast
|
| 17 |
+
|
| 18 |
+
def make_2tuple(x):
|
| 19 |
+
if isinstance(x, tuple):
|
| 20 |
+
assert len(x) == 2
|
| 21 |
+
return x
|
| 22 |
+
|
| 23 |
+
assert isinstance(x, int)
|
| 24 |
+
return (x, x)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class PatchEmbed(nn.Module):
|
| 28 |
+
"""
|
| 29 |
+
2D image to patch embedding: (B,C,H,W) -> (B,N,D)
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
img_size: Image size.
|
| 33 |
+
patch_size: Patch token size.
|
| 34 |
+
in_chans: Number of input image channels.
|
| 35 |
+
embed_dim: Number of linear projection output channels.
|
| 36 |
+
norm_layer: Normalization layer.
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
def __init__(
|
| 40 |
+
self,
|
| 41 |
+
img_size: Union[int, Tuple[int, int]] = 224,
|
| 42 |
+
patch_size: Union[int, Tuple[int, int]] = 16,
|
| 43 |
+
in_chans: int = 3,
|
| 44 |
+
embed_dim: int = 768,
|
| 45 |
+
norm_layer: Optional[Callable] = None,
|
| 46 |
+
flatten_embedding: bool = True,
|
| 47 |
+
) -> None:
|
| 48 |
+
super().__init__()
|
| 49 |
+
|
| 50 |
+
image_HW = make_2tuple(img_size)
|
| 51 |
+
patch_HW = make_2tuple(patch_size)
|
| 52 |
+
patch_grid_size = (
|
| 53 |
+
image_HW[0] // patch_HW[0],
|
| 54 |
+
image_HW[1] // patch_HW[1],
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
self.img_size = image_HW
|
| 58 |
+
self.patch_size = patch_HW
|
| 59 |
+
self.patches_resolution = patch_grid_size
|
| 60 |
+
self.num_patches = patch_grid_size[0] * patch_grid_size[1]
|
| 61 |
+
|
| 62 |
+
self.in_chans = in_chans
|
| 63 |
+
self.embed_dim = embed_dim
|
| 64 |
+
|
| 65 |
+
self.flatten_embedding = flatten_embedding
|
| 66 |
+
|
| 67 |
+
self.proj = ops.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW)
|
| 68 |
+
self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
|
| 69 |
+
|
| 70 |
+
def forward(self, x: Tensor) -> Tensor:
|
| 71 |
+
_, _, H, W = x.shape
|
| 72 |
+
patch_H, patch_W = self.patch_size
|
| 73 |
+
|
| 74 |
+
assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}"
|
| 75 |
+
assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}"
|
| 76 |
+
|
| 77 |
+
x = self.proj(x) # B C H W
|
| 78 |
+
H, W = x.size(2), x.size(3)
|
| 79 |
+
x = x.flatten(2).transpose(1, 2) # B HW C
|
| 80 |
+
x = self.norm(x)
|
| 81 |
+
if not self.flatten_embedding:
|
| 82 |
+
x = x.reshape(-1, H, W, self.embed_dim) # B H W C
|
| 83 |
+
return x
|
| 84 |
+
|
| 85 |
+
def flops(self) -> float:
|
| 86 |
+
Ho, Wo = self.patches_resolution
|
| 87 |
+
flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
|
| 88 |
+
if self.norm is not None:
|
| 89 |
+
flops += Ho * Wo * self.embed_dim
|
| 90 |
+
return flops
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dinov2_layers/swiglu_ffn.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
from typing import Callable, Optional
|
| 8 |
+
|
| 9 |
+
from torch import Tensor, nn
|
| 10 |
+
import torch.nn.functional as F
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class SwiGLUFFN(nn.Module):
|
| 14 |
+
def __init__(
|
| 15 |
+
self,
|
| 16 |
+
in_features: int,
|
| 17 |
+
hidden_features: Optional[int] = None,
|
| 18 |
+
out_features: Optional[int] = None,
|
| 19 |
+
act_layer: Callable[..., nn.Module] = None,
|
| 20 |
+
drop: float = 0.0,
|
| 21 |
+
bias: bool = True,
|
| 22 |
+
) -> None:
|
| 23 |
+
super().__init__()
|
| 24 |
+
out_features = out_features or in_features
|
| 25 |
+
hidden_features = hidden_features or in_features
|
| 26 |
+
self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
|
| 27 |
+
self.w3 = nn.Linear(hidden_features, out_features, bias=bias)
|
| 28 |
+
|
| 29 |
+
def forward(self, x: Tensor) -> Tensor:
|
| 30 |
+
x12 = self.w12(x)
|
| 31 |
+
x1, x2 = x12.chunk(2, dim=-1)
|
| 32 |
+
hidden = F.silu(x1) * x2
|
| 33 |
+
return self.w3(hidden)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
from xformers.ops import SwiGLU
|
| 38 |
+
|
| 39 |
+
XFORMERS_AVAILABLE = True
|
| 40 |
+
except ImportError:
|
| 41 |
+
SwiGLU = SwiGLUFFN
|
| 42 |
+
XFORMERS_AVAILABLE = False
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class SwiGLUFFNFused(SwiGLU):
|
| 46 |
+
def __init__(
|
| 47 |
+
self,
|
| 48 |
+
in_features: int,
|
| 49 |
+
hidden_features: Optional[int] = None,
|
| 50 |
+
out_features: Optional[int] = None,
|
| 51 |
+
act_layer: Callable[..., nn.Module] = None,
|
| 52 |
+
drop: float = 0.0,
|
| 53 |
+
bias: bool = True,
|
| 54 |
+
) -> None:
|
| 55 |
+
out_features = out_features or in_features
|
| 56 |
+
hidden_features = hidden_features or in_features
|
| 57 |
+
hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8
|
| 58 |
+
super().__init__(
|
| 59 |
+
in_features=in_features,
|
| 60 |
+
hidden_features=hidden_features,
|
| 61 |
+
out_features=out_features,
|
| 62 |
+
bias=bias,
|
| 63 |
+
)
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/dpt.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
from .dinov2 import DINOv2
|
| 6 |
+
from .util.blocks import FeatureFusionBlock, _make_scratch
|
| 7 |
+
|
| 8 |
+
import comfy.ops
|
| 9 |
+
ops = comfy.ops.manual_cast
|
| 10 |
+
|
| 11 |
+
def _make_fusion_block(features, use_bn, size=None):
|
| 12 |
+
return FeatureFusionBlock(
|
| 13 |
+
features,
|
| 14 |
+
nn.ReLU(False),
|
| 15 |
+
deconv=False,
|
| 16 |
+
bn=use_bn,
|
| 17 |
+
expand=False,
|
| 18 |
+
align_corners=True,
|
| 19 |
+
size=size,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
class ConvBlock(nn.Module):
|
| 23 |
+
def __init__(self, in_feature, out_feature):
|
| 24 |
+
super().__init__()
|
| 25 |
+
|
| 26 |
+
self.conv_block = nn.Sequential(
|
| 27 |
+
ops.Conv2d(in_feature, out_feature, kernel_size=3, stride=1, padding=1),
|
| 28 |
+
nn.BatchNorm2d(out_feature),
|
| 29 |
+
nn.ReLU(True)
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
def forward(self, x):
|
| 33 |
+
return self.conv_block(x)
|
| 34 |
+
|
| 35 |
+
class DPTHead(nn.Module):
|
| 36 |
+
def __init__(
|
| 37 |
+
self,
|
| 38 |
+
in_channels,
|
| 39 |
+
features=256,
|
| 40 |
+
use_bn=False,
|
| 41 |
+
out_channels=[256, 512, 1024, 1024],
|
| 42 |
+
use_clstoken=False,
|
| 43 |
+
is_metric=False
|
| 44 |
+
):
|
| 45 |
+
super(DPTHead, self).__init__()
|
| 46 |
+
|
| 47 |
+
self.use_clstoken = use_clstoken
|
| 48 |
+
self.is_metric=is_metric
|
| 49 |
+
|
| 50 |
+
self.projects = nn.ModuleList([
|
| 51 |
+
ops.Conv2d(
|
| 52 |
+
in_channels=in_channels,
|
| 53 |
+
out_channels=out_channel,
|
| 54 |
+
kernel_size=1,
|
| 55 |
+
stride=1,
|
| 56 |
+
padding=0,
|
| 57 |
+
) for out_channel in out_channels
|
| 58 |
+
])
|
| 59 |
+
|
| 60 |
+
self.resize_layers = nn.ModuleList([
|
| 61 |
+
nn.ConvTranspose2d(
|
| 62 |
+
in_channels=out_channels[0],
|
| 63 |
+
out_channels=out_channels[0],
|
| 64 |
+
kernel_size=4,
|
| 65 |
+
stride=4,
|
| 66 |
+
padding=0),
|
| 67 |
+
nn.ConvTranspose2d(
|
| 68 |
+
in_channels=out_channels[1],
|
| 69 |
+
out_channels=out_channels[1],
|
| 70 |
+
kernel_size=2,
|
| 71 |
+
stride=2,
|
| 72 |
+
padding=0),
|
| 73 |
+
nn.Identity(),
|
| 74 |
+
ops.Conv2d(
|
| 75 |
+
in_channels=out_channels[3],
|
| 76 |
+
out_channels=out_channels[3],
|
| 77 |
+
kernel_size=3,
|
| 78 |
+
stride=2,
|
| 79 |
+
padding=1)
|
| 80 |
+
])
|
| 81 |
+
|
| 82 |
+
if use_clstoken:
|
| 83 |
+
self.readout_projects = nn.ModuleList()
|
| 84 |
+
for _ in range(len(self.projects)):
|
| 85 |
+
self.readout_projects.append(
|
| 86 |
+
nn.Sequential(
|
| 87 |
+
ops.Linear(2 * in_channels, in_channels),
|
| 88 |
+
nn.GELU()))
|
| 89 |
+
|
| 90 |
+
self.scratch = _make_scratch(
|
| 91 |
+
out_channels,
|
| 92 |
+
features,
|
| 93 |
+
groups=1,
|
| 94 |
+
expand=False,
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
self.scratch.stem_transpose = None
|
| 98 |
+
|
| 99 |
+
self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
|
| 100 |
+
self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
|
| 101 |
+
self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
|
| 102 |
+
self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
|
| 103 |
+
|
| 104 |
+
head_features_1 = features
|
| 105 |
+
head_features_2 = 32
|
| 106 |
+
|
| 107 |
+
self.scratch.output_conv1 = ops.Conv2d(head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1)
|
| 108 |
+
if self.is_metric:
|
| 109 |
+
self.scratch.output_conv2 = nn.Sequential(
|
| 110 |
+
ops.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1),
|
| 111 |
+
nn.ReLU(True),
|
| 112 |
+
ops.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0),
|
| 113 |
+
nn.Sigmoid()
|
| 114 |
+
)
|
| 115 |
+
else:
|
| 116 |
+
self.scratch.output_conv2 = nn.Sequential(
|
| 117 |
+
ops.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1),
|
| 118 |
+
nn.ReLU(True),
|
| 119 |
+
ops.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0),
|
| 120 |
+
nn.ReLU(True),
|
| 121 |
+
nn.Identity(),
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
def forward(self, out_features, patch_h, patch_w):
|
| 125 |
+
out = []
|
| 126 |
+
for i, x in enumerate(out_features):
|
| 127 |
+
if self.use_clstoken:
|
| 128 |
+
x, cls_token = x[0], x[1]
|
| 129 |
+
readout = cls_token.unsqueeze(1).expand_as(x)
|
| 130 |
+
x = self.readout_projects[i](torch.cat((x, readout), -1))
|
| 131 |
+
else:
|
| 132 |
+
x = x[0]
|
| 133 |
+
|
| 134 |
+
x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w))
|
| 135 |
+
|
| 136 |
+
x = self.projects[i](x)
|
| 137 |
+
x = self.resize_layers[i](x)
|
| 138 |
+
|
| 139 |
+
out.append(x)
|
| 140 |
+
|
| 141 |
+
layer_1, layer_2, layer_3, layer_4 = out
|
| 142 |
+
|
| 143 |
+
layer_1_rn = self.scratch.layer1_rn(layer_1)
|
| 144 |
+
layer_2_rn = self.scratch.layer2_rn(layer_2)
|
| 145 |
+
layer_3_rn = self.scratch.layer3_rn(layer_3)
|
| 146 |
+
layer_4_rn = self.scratch.layer4_rn(layer_4)
|
| 147 |
+
|
| 148 |
+
path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:])
|
| 149 |
+
path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:])
|
| 150 |
+
path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:])
|
| 151 |
+
path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
|
| 152 |
+
|
| 153 |
+
out = self.scratch.output_conv1(path_1)
|
| 154 |
+
out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True)
|
| 155 |
+
out = self.scratch.output_conv2(out)
|
| 156 |
+
|
| 157 |
+
return out
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
class DepthAnythingV2(nn.Module):
|
| 161 |
+
def __init__(
|
| 162 |
+
self,
|
| 163 |
+
encoder='vitl',
|
| 164 |
+
features=256,
|
| 165 |
+
out_channels=[256, 512, 1024, 1024],
|
| 166 |
+
use_bn=False,
|
| 167 |
+
use_clstoken=False,
|
| 168 |
+
is_metric=False,
|
| 169 |
+
max_depth=20.0
|
| 170 |
+
):
|
| 171 |
+
super(DepthAnythingV2, self).__init__()
|
| 172 |
+
|
| 173 |
+
self.intermediate_layer_idx = {
|
| 174 |
+
'vits': [2, 5, 8, 11],
|
| 175 |
+
'vitb': [2, 5, 8, 11],
|
| 176 |
+
'vitl': [4, 11, 17, 23],
|
| 177 |
+
'vitg': [9, 19, 29, 39]
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
self.is_metric = is_metric
|
| 181 |
+
self.max_depth = max_depth
|
| 182 |
+
|
| 183 |
+
self.encoder = encoder
|
| 184 |
+
self.pretrained = DINOv2(model_name=encoder)
|
| 185 |
+
|
| 186 |
+
self.depth_head = DPTHead(self.pretrained.embed_dim, features, use_bn, out_channels=out_channels, use_clstoken=use_clstoken, is_metric=is_metric)
|
| 187 |
+
|
| 188 |
+
def forward(self, x):
|
| 189 |
+
patch_h, patch_w = x.shape[-2] // 14, x.shape[-1] // 14
|
| 190 |
+
|
| 191 |
+
features = self.pretrained.get_intermediate_layers(x, self.intermediate_layer_idx[self.encoder], return_class_token=True)
|
| 192 |
+
|
| 193 |
+
if self.is_metric:
|
| 194 |
+
depth = self.depth_head(features, patch_h, patch_w) * self.max_depth
|
| 195 |
+
else:
|
| 196 |
+
depth = self.depth_head(features, patch_h, patch_w)
|
| 197 |
+
depth = F.relu(depth)
|
| 198 |
+
|
| 199 |
+
return depth.squeeze(1)
|
custom_nodes/ComfyUI-DepthAnythingV2/depth_anything_v2/util/blocks.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch.nn as nn
|
| 2 |
+
import comfy.ops
|
| 3 |
+
ops = comfy.ops.manual_cast
|
| 4 |
+
|
| 5 |
+
def _make_scratch(in_shape, out_shape, groups=1, expand=False):
|
| 6 |
+
scratch = nn.Module()
|
| 7 |
+
|
| 8 |
+
out_shape1 = out_shape
|
| 9 |
+
out_shape2 = out_shape
|
| 10 |
+
out_shape3 = out_shape
|
| 11 |
+
if len(in_shape) >= 4:
|
| 12 |
+
out_shape4 = out_shape
|
| 13 |
+
|
| 14 |
+
if expand:
|
| 15 |
+
out_shape1 = out_shape
|
| 16 |
+
out_shape2 = out_shape * 2
|
| 17 |
+
out_shape3 = out_shape * 4
|
| 18 |
+
if len(in_shape) >= 4:
|
| 19 |
+
out_shape4 = out_shape * 8
|
| 20 |
+
|
| 21 |
+
scratch.layer1_rn = ops.Conv2d(in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
|
| 22 |
+
scratch.layer2_rn = ops.Conv2d(in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
|
| 23 |
+
scratch.layer3_rn = ops.Conv2d(in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
|
| 24 |
+
if len(in_shape) >= 4:
|
| 25 |
+
scratch.layer4_rn = ops.Conv2d(in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
|
| 26 |
+
|
| 27 |
+
return scratch
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class ResidualConvUnit(nn.Module):
|
| 31 |
+
"""Residual convolution module.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
def __init__(self, features, activation, bn):
|
| 35 |
+
"""Init.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
features (int): number of features
|
| 39 |
+
"""
|
| 40 |
+
super().__init__()
|
| 41 |
+
|
| 42 |
+
self.bn = bn
|
| 43 |
+
|
| 44 |
+
self.groups=1
|
| 45 |
+
|
| 46 |
+
self.conv1 = ops.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
|
| 47 |
+
|
| 48 |
+
self.conv2 = ops.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
|
| 49 |
+
|
| 50 |
+
if self.bn == True:
|
| 51 |
+
self.bn1 = nn.BatchNorm2d(features)
|
| 52 |
+
self.bn2 = nn.BatchNorm2d(features)
|
| 53 |
+
|
| 54 |
+
self.activation = activation
|
| 55 |
+
|
| 56 |
+
self.skip_add = nn.quantized.FloatFunctional()
|
| 57 |
+
|
| 58 |
+
def forward(self, x):
|
| 59 |
+
"""Forward pass.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
x (tensor): input
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
tensor: output
|
| 66 |
+
"""
|
| 67 |
+
|
| 68 |
+
out = self.activation(x)
|
| 69 |
+
out = self.conv1(out)
|
| 70 |
+
if self.bn == True:
|
| 71 |
+
out = self.bn1(out)
|
| 72 |
+
|
| 73 |
+
out = self.activation(out)
|
| 74 |
+
out = self.conv2(out)
|
| 75 |
+
if self.bn == True:
|
| 76 |
+
out = self.bn2(out)
|
| 77 |
+
|
| 78 |
+
if self.groups > 1:
|
| 79 |
+
out = self.conv_merge(out)
|
| 80 |
+
|
| 81 |
+
return self.skip_add.add(out, x)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class FeatureFusionBlock(nn.Module):
|
| 85 |
+
"""Feature fusion block.
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
def __init__(
|
| 89 |
+
self,
|
| 90 |
+
features,
|
| 91 |
+
activation,
|
| 92 |
+
deconv=False,
|
| 93 |
+
bn=False,
|
| 94 |
+
expand=False,
|
| 95 |
+
align_corners=True,
|
| 96 |
+
size=None
|
| 97 |
+
):
|
| 98 |
+
"""Init.
|
| 99 |
+
|
| 100 |
+
Args:
|
| 101 |
+
features (int): number of features
|
| 102 |
+
"""
|
| 103 |
+
super(FeatureFusionBlock, self).__init__()
|
| 104 |
+
|
| 105 |
+
self.deconv = deconv
|
| 106 |
+
self.align_corners = align_corners
|
| 107 |
+
|
| 108 |
+
self.groups=1
|
| 109 |
+
|
| 110 |
+
self.expand = expand
|
| 111 |
+
out_features = features
|
| 112 |
+
if self.expand == True:
|
| 113 |
+
out_features = features // 2
|
| 114 |
+
|
| 115 |
+
self.out_conv = ops.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
|
| 116 |
+
|
| 117 |
+
self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
|
| 118 |
+
self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
|
| 119 |
+
|
| 120 |
+
self.skip_add = nn.quantized.FloatFunctional()
|
| 121 |
+
|
| 122 |
+
self.size=size
|
| 123 |
+
|
| 124 |
+
def forward(self, *xs, size=None):
|
| 125 |
+
"""Forward pass.
|
| 126 |
+
|
| 127 |
+
Returns:
|
| 128 |
+
tensor: output
|
| 129 |
+
"""
|
| 130 |
+
output = xs[0]
|
| 131 |
+
|
| 132 |
+
if len(xs) == 2:
|
| 133 |
+
res = self.resConfUnit1(xs[1])
|
| 134 |
+
output = self.skip_add.add(output, res)
|
| 135 |
+
|
| 136 |
+
output = self.resConfUnit2(output)
|
| 137 |
+
|
| 138 |
+
if (size is None) and (self.size is None):
|
| 139 |
+
modifier = {"scale_factor": 2}
|
| 140 |
+
elif size is None:
|
| 141 |
+
modifier = {"size": self.size}
|
| 142 |
+
else:
|
| 143 |
+
modifier = {"size": size}
|
| 144 |
+
|
| 145 |
+
output = nn.functional.interpolate(output, **modifier, mode="bilinear", align_corners=self.align_corners)
|
| 146 |
+
|
| 147 |
+
output = self.out_conv(output)
|
| 148 |
+
|
| 149 |
+
return output
|
custom_nodes/ComfyUI-DepthAnythingV2/nodes.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
from torchvision import transforms
|
| 5 |
+
import os
|
| 6 |
+
from contextlib import nullcontext
|
| 7 |
+
|
| 8 |
+
import comfy.model_management as mm
|
| 9 |
+
from comfy.utils import ProgressBar, load_torch_file
|
| 10 |
+
import folder_paths
|
| 11 |
+
|
| 12 |
+
from .depth_anything_v2.dpt import DepthAnythingV2
|
| 13 |
+
|
| 14 |
+
from contextlib import nullcontext
|
| 15 |
+
try:
|
| 16 |
+
from accelerate import init_empty_weights
|
| 17 |
+
from accelerate.utils import set_module_tensor_to_device
|
| 18 |
+
is_accelerate_available = True
|
| 19 |
+
except:
|
| 20 |
+
is_accelerate_available = False
|
| 21 |
+
pass
|
| 22 |
+
|
| 23 |
+
class DownloadAndLoadDepthAnythingV2Model:
|
| 24 |
+
@classmethod
|
| 25 |
+
def INPUT_TYPES(s):
|
| 26 |
+
return {"required": {
|
| 27 |
+
"model": (
|
| 28 |
+
[
|
| 29 |
+
'depth_anything_v2_vits_fp16.safetensors',
|
| 30 |
+
'depth_anything_v2_vits_fp32.safetensors',
|
| 31 |
+
'depth_anything_v2_vitb_fp16.safetensors',
|
| 32 |
+
'depth_anything_v2_vitb_fp32.safetensors',
|
| 33 |
+
'depth_anything_v2_vitl_fp16.safetensors',
|
| 34 |
+
'depth_anything_v2_vitl_fp32.safetensors',
|
| 35 |
+
'depth_anything_v2_metric_hypersim_vitl_fp32.safetensors',
|
| 36 |
+
'depth_anything_v2_metric_vkitti_vitl_fp32.safetensors'
|
| 37 |
+
],
|
| 38 |
+
{
|
| 39 |
+
"default": 'depth_anything_v2_vitl_fp32.safetensors'
|
| 40 |
+
}),
|
| 41 |
+
},
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
RETURN_TYPES = ("DAMODEL",)
|
| 45 |
+
RETURN_NAMES = ("da_v2_model",)
|
| 46 |
+
FUNCTION = "loadmodel"
|
| 47 |
+
CATEGORY = "DepthAnythingV2"
|
| 48 |
+
DESCRIPTION = """
|
| 49 |
+
Models autodownload to `ComfyUI\models\depthanything` from
|
| 50 |
+
https://huggingface.co/Kijai/DepthAnythingV2-safetensors/tree/main
|
| 51 |
+
|
| 52 |
+
fp16 reduces quality by a LOT, not recommended.
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
def loadmodel(self, model):
|
| 56 |
+
device = mm.get_torch_device()
|
| 57 |
+
dtype = torch.float16 if "fp16" in model else torch.float32
|
| 58 |
+
model_configs = {
|
| 59 |
+
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
|
| 60 |
+
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
|
| 61 |
+
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
|
| 62 |
+
#'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
|
| 63 |
+
}
|
| 64 |
+
custom_config = {
|
| 65 |
+
'model_name': model,
|
| 66 |
+
}
|
| 67 |
+
if not hasattr(self, 'model') or self.model == None or custom_config != self.current_config:
|
| 68 |
+
self.current_config = custom_config
|
| 69 |
+
download_path = os.path.join(folder_paths.models_dir, "depthanything")
|
| 70 |
+
model_path = os.path.join(download_path, model)
|
| 71 |
+
|
| 72 |
+
if not os.path.exists(model_path):
|
| 73 |
+
print(f"Downloading model to: {model_path}")
|
| 74 |
+
from huggingface_hub import snapshot_download
|
| 75 |
+
snapshot_download(repo_id="Kijai/DepthAnythingV2-safetensors",
|
| 76 |
+
allow_patterns=[f"*{model}*"],
|
| 77 |
+
local_dir=download_path,
|
| 78 |
+
local_dir_use_symlinks=False)
|
| 79 |
+
|
| 80 |
+
print(f"Loading model from: {model_path}")
|
| 81 |
+
|
| 82 |
+
if "vitl" in model:
|
| 83 |
+
encoder = "vitl"
|
| 84 |
+
elif "vitb" in model:
|
| 85 |
+
encoder = "vitb"
|
| 86 |
+
elif "vits" in model:
|
| 87 |
+
encoder = "vits"
|
| 88 |
+
|
| 89 |
+
if "hypersim" in model:
|
| 90 |
+
max_depth = 20.0
|
| 91 |
+
else:
|
| 92 |
+
max_depth = 80.0
|
| 93 |
+
|
| 94 |
+
with (init_empty_weights() if is_accelerate_available else nullcontext()):
|
| 95 |
+
if 'metric' in model:
|
| 96 |
+
self.model = DepthAnythingV2(**{**model_configs[encoder], 'is_metric': True, 'max_depth': max_depth})
|
| 97 |
+
else:
|
| 98 |
+
self.model = DepthAnythingV2(**model_configs[encoder])
|
| 99 |
+
|
| 100 |
+
state_dict = load_torch_file(model_path)
|
| 101 |
+
if is_accelerate_available:
|
| 102 |
+
for key in state_dict:
|
| 103 |
+
set_module_tensor_to_device(self.model, key, device=device, dtype=dtype, value=state_dict[key])
|
| 104 |
+
else:
|
| 105 |
+
self.model.load_state_dict(state_dict)
|
| 106 |
+
|
| 107 |
+
self.model.eval()
|
| 108 |
+
|
| 109 |
+
da_model = {
|
| 110 |
+
"model": self.model,
|
| 111 |
+
"dtype": dtype,
|
| 112 |
+
"is_metric": self.model.is_metric
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
return (da_model,)
|
| 116 |
+
|
| 117 |
+
class DepthAnything_V2:
|
| 118 |
+
@classmethod
|
| 119 |
+
def INPUT_TYPES(s):
|
| 120 |
+
return {"required": {
|
| 121 |
+
"da_model": ("DAMODEL", ),
|
| 122 |
+
"images": ("IMAGE", ),
|
| 123 |
+
},
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
RETURN_TYPES = ("IMAGE",)
|
| 127 |
+
RETURN_NAMES =("image",)
|
| 128 |
+
FUNCTION = "process"
|
| 129 |
+
CATEGORY = "DepthAnythingV2"
|
| 130 |
+
DESCRIPTION = """
|
| 131 |
+
https://depth-anything-v2.github.io
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
def process(self, da_model, images):
|
| 135 |
+
device = mm.get_torch_device()
|
| 136 |
+
offload_device = mm.unet_offload_device()
|
| 137 |
+
model = da_model['model']
|
| 138 |
+
dtype=da_model['dtype']
|
| 139 |
+
|
| 140 |
+
B, H, W, C = images.shape
|
| 141 |
+
|
| 142 |
+
#images = images.to(device)
|
| 143 |
+
images = images.permute(0, 3, 1, 2)
|
| 144 |
+
|
| 145 |
+
orig_H, orig_W = H, W
|
| 146 |
+
if W % 14 != 0:
|
| 147 |
+
W = W - (W % 14)
|
| 148 |
+
if H % 14 != 0:
|
| 149 |
+
H = H - (H % 14)
|
| 150 |
+
if orig_H % 14 != 0 or orig_W % 14 != 0:
|
| 151 |
+
images = F.interpolate(images, size=(H, W), mode="bilinear")
|
| 152 |
+
|
| 153 |
+
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
| 154 |
+
normalized_images = normalize(images)
|
| 155 |
+
pbar = ProgressBar(B)
|
| 156 |
+
out = []
|
| 157 |
+
model.to(device)
|
| 158 |
+
autocast_condition = (dtype != torch.float32) and not mm.is_device_mps(device)
|
| 159 |
+
with torch.autocast(mm.get_autocast_device(device), dtype=dtype) if autocast_condition else nullcontext():
|
| 160 |
+
for img in normalized_images:
|
| 161 |
+
depth = model(img.unsqueeze(0).to(device))
|
| 162 |
+
depth = (depth - depth.min()) / (depth.max() - depth.min())
|
| 163 |
+
out.append(depth.cpu())
|
| 164 |
+
pbar.update(1)
|
| 165 |
+
model.to(offload_device)
|
| 166 |
+
depth_out = torch.cat(out, dim=0)
|
| 167 |
+
depth_out = depth_out.unsqueeze(-1).repeat(1, 1, 1, 3).cpu().float()
|
| 168 |
+
|
| 169 |
+
final_H = (orig_H // 2) * 2
|
| 170 |
+
final_W = (orig_W // 2) * 2
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
if depth_out.shape[1] != final_H or depth_out.shape[2] != final_W:
|
| 175 |
+
depth_out = F.interpolate(depth_out.permute(0, 3, 1, 2), size=(final_H, final_W), mode="bilinear").permute(0, 2, 3, 1)
|
| 176 |
+
depth_out = (depth_out - depth_out.min()) / (depth_out.max() - depth_out.min())
|
| 177 |
+
depth_out = torch.clamp(depth_out, 0, 1)
|
| 178 |
+
if da_model['is_metric']:
|
| 179 |
+
depth_out = 1 - depth_out
|
| 180 |
+
return (depth_out,)
|
| 181 |
+
|
| 182 |
+
NODE_CLASS_MAPPINGS = {
|
| 183 |
+
"DepthAnything_V2": DepthAnything_V2,
|
| 184 |
+
"DownloadAndLoadDepthAnythingV2Model": DownloadAndLoadDepthAnythingV2Model
|
| 185 |
+
}
|
| 186 |
+
NODE_DISPLAY_NAME_MAPPINGS = {
|
| 187 |
+
"DepthAnything_V2": "Depth Anything V2",
|
| 188 |
+
"DownloadAndLoadDepthAnythingV2Model": "DownloadAndLoadDepthAnythingV2Model"
|
| 189 |
+
}
|
custom_nodes/ComfyUI-DepthAnythingV2/pyproject.toml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "comfyui-depthanythingv2"
|
| 3 |
+
description = "ComfyUI nodes to use [a/DepthAnythingV2](https://depth-anything-v2.github.io/)\nNOTE:Models autodownload to ComfyUI/models/depthanything from [a/https://huggingface.co/Kijai/DepthAnythingV2-safetensors/tree/main](https://huggingface.co/Kijai/DepthAnythingV2-safetensors/tree/main)"
|
| 4 |
+
version = "1.0.1"
|
| 5 |
+
license = "LICENSE"
|
| 6 |
+
dependencies = ["huggingface_hub", "accelerate"]
|
| 7 |
+
|
| 8 |
+
[project.urls]
|
| 9 |
+
Repository = "https://github.com/kijai/ComfyUI-DepthAnythingV2"
|
| 10 |
+
# Used by Comfy Registry https://comfyregistry.org
|
| 11 |
+
|
| 12 |
+
[tool.comfy]
|
| 13 |
+
PublisherId = "kijai"
|
| 14 |
+
DisplayName = "ComfyUI-DepthAnythingV2"
|
| 15 |
+
Icon = ""
|
custom_nodes/ComfyUI-DepthAnythingV2/requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
huggingface_hub
|
| 2 |
+
accelerate
|
custom_nodes/ComfyUI-F5-TTS/=1.31.14
ADDED
|
File without changes
|
custom_nodes/ComfyUI-Frame-Interpolation/ckpts/flavr/FLAVR_2x.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45509c279355866b46021b112301c4d1862e13569b4e0d58d826171d94695412
|
| 3 |
+
size 504778983
|
custom_nodes/ComfyUI-Frame-Interpolation/ckpts/rife/rife49.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e55fd00f3cc184e3c65961f4bb827a9da022e78eed36b055242c0ac30000d533
|
| 3 |
+
size 21345274
|
custom_nodes/ComfyUI-Frame-Interpolation/ckpts/rife/sudo_rife4_269.662_testV1_scale1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c9dff599a1c05c38cdc52930a739e7144ce8210fdd098a364e0155c1d23c27c
|
| 3 |
+
size 33719173
|