Eji-Sensei14 commited on 14 days ago

Commit

edd3cd4

verified ·

1 Parent(s): 5263104

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

.gitattributes +1 -0
ComfyUI-WanAnimatePreprocess/.gitattributes +2 -0
ComfyUI-WanAnimatePreprocess/.github/workflows/publish.yml +25 -0
ComfyUI-WanAnimatePreprocess/.gitignore +13 -0
ComfyUI-WanAnimatePreprocess/LICENSE +201 -0
ComfyUI-WanAnimatePreprocess/__init__.py +3 -0
ComfyUI-WanAnimatePreprocess/example.png +3 -0
ComfyUI-WanAnimatePreprocess/example_workflows/WanAnimate_native_example_01.json +3797 -0
ComfyUI-WanAnimatePreprocess/models/onnx_models.py +282 -0
ComfyUI-WanAnimatePreprocess/nodes.py +494 -0
ComfyUI-WanAnimatePreprocess/onetoall/infer_function.py +508 -0
ComfyUI-WanAnimatePreprocess/onetoall/utils.py +347 -0
ComfyUI-WanAnimatePreprocess/pose_utils/human_visualization.py +1272 -0
ComfyUI-WanAnimatePreprocess/pose_utils/pose2d_utils.py +1110 -0
ComfyUI-WanAnimatePreprocess/pyproject.toml +15 -0
ComfyUI-WanAnimatePreprocess/readme.md +29 -0
ComfyUI-WanAnimatePreprocess/requirements.txt +3 -0
ComfyUI-WanAnimatePreprocess/retarget_pose.py +843 -0
ComfyUI-WanAnimatePreprocess/utils.py +317 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 dev/ltx-2-3-22b-dev-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 dev/ltx-2-3-22b-dev-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+ComfyUI-WanAnimatePreprocess/example.png filter=lfs diff=lfs merge=lfs -text

ComfyUI-WanAnimatePreprocess/.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Auto detect text files and perform LF normalization
2	+ * text=auto

ComfyUI-WanAnimatePreprocess/.github/workflows/publish.yml ADDED Viewed

	@@ -0,0 +1,25 @@

+name: Publish to Comfy registry
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+    paths:
+      - "pyproject.toml"
+permissions:
+  issues: write
+jobs:
+  publish-node:
+    name: Publish Custom Node to registry
+    runs-on: ubuntu-latest
+    if: ${{ github.repository_owner == 'kijai' }}
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+      - name: Publish Custom Node
+        uses: Comfy-Org/publish-node-action@v1
+        with:
+          ## Add your own personal access token to your Github Repository secrets and reference it here.
+          personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}

ComfyUI-WanAnimatePreprocess/.gitignore ADDED Viewed

	@@ -0,0 +1,13 @@

+output/
+*__pycache__/
+samples*/
+runs/
+checkpoints/
+master_ip
+logs/
+*.DS_Store
+.idea
+tools/
+.vscode/
+convert_*
+*.pt

ComfyUI-WanAnimatePreprocess/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

ComfyUI-WanAnimatePreprocess/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
2	+
3	+ __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]

ComfyUI-WanAnimatePreprocess/example.png ADDED Viewed

Git LFS Details

SHA256: e89afdcc2a5c34b6a576ffe018720197914bb0c59cb3ba2ac7f33e2a56ec2396
Pointer size: 131 Bytes
Size of remote file: 569 kB

ComfyUI-WanAnimatePreprocess/example_workflows/WanAnimate_native_example_01.json ADDED Viewed

	@@ -0,0 +1,3797 @@

+{
+  "id": "8b7a9a57-2303-4ef5-9fc2-bf41713bd1fc",
+  "revision": 0,
+  "last_node_id": 207,
+  "last_link_id": 362,
+  "nodes": [
+    {
+      "id": 66,
+      "type": "ImageConcatMulti",
+      "pos": [
+        2660.1005859375,
+        -950.2750244140625
+      ],
+      "size": [
+        270,
+        150
+      ],
+      "flags": {},
+      "order": 64,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image_1",
+          "type": "IMAGE",
+          "link": 87
+        },
+        {
+          "name": "image_2",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 107
+        }
+      ],
+      "outputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "links": [
+            89
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "468fcc86f0b29e79a8510e8239eb15714d6747a6"
+      },
+      "widgets_values": [
+        2,
+        "left",
+        true,
+        null
+      ]
+    },
+    {
+      "id": 137,
+      "type": "GetNode",
+      "pos": [
+        2441.813232421875,
+        -1173.92919921875
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 0,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            239
+          ]
+        }
+      ],
+      "title": "Get_face_images",
+      "properties": {},
+      "widgets_values": [
+        "face_images"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 140,
+      "type": "GetNode",
+      "pos": [
+        2441.813232421875,
+        -1124.576904296875
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 1,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            243
+          ]
+        }
+      ],
+      "title": "Get_pose_images",
+      "properties": {},
+      "widgets_values": [
+        "pose_images"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 134,
+      "type": "GetNode",
+      "pos": [
+        2443.561279296875,
+        -1227.2171630859375
+      ],
+      "size": [
+        210,
+        34
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 2,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            236
+          ]
+        }
+      ],
+      "title": "Get_reference_image",
+      "properties": {},
+      "widgets_values": [
+        "reference_image"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 148,
+      "type": "SetNode",
+      "pos": [
+        -548.7736206054688,
+        -2964.476318359375
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 40,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "AUDIO",
+          "type": "AUDIO",
+          "link": 255
+        }
+      ],
+      "outputs": [
+        {
+          "name": "*",
+          "type": "*",
+          "links": null
+        }
+      ],
+      "title": "Set_input_audio",
+      "properties": {
+        "previousName": "input_audio"
+      },
+      "widgets_values": [
+        "input_audio"
+      ]
+    },
+    {
+      "id": 149,
+      "type": "GetNode",
+      "pos": [
+        3043.859130859375,
+        -1156.882080078125
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 3,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "AUDIO",
+          "type": "AUDIO",
+          "links": [
+            256
+          ]
+        }
+      ],
+      "title": "Get_input_audio",
+      "properties": {},
+      "widgets_values": [
+        "input_audio"
+      ]
+    },
+    {
+      "id": 153,
+      "type": "SetNode",
+      "pos": [
+        -1605.1839599609375,
+        -2832.218994140625
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 33,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "INT",
+          "type": "INT",
+          "link": 263
+        }
+      ],
+      "outputs": [
+        {
+          "name": "*",
+          "type": "*",
+          "links": null
+        }
+      ],
+      "title": "Set_width",
+      "properties": {
+        "previousName": "width"
+      },
+      "widgets_values": [
+        "width"
+      ],
+      "color": "#1b4669",
+      "bgcolor": "#29699c"
+    },
+    {
+      "id": 154,
+      "type": "SetNode",
+      "pos": [
+        -1617.8741455078125,
+        -2533.993408203125
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 31,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "INT",
+          "type": "INT",
+          "link": 264
+        }
+      ],
+      "outputs": [
+        {
+          "name": "*",
+          "type": "*",
+          "links": null
+        }
+      ],
+      "title": "Set_height",
+      "properties": {
+        "previousName": "height"
+      },
+      "widgets_values": [
+        "height"
+      ],
+      "color": "#1b4669",
+      "bgcolor": "#29699c"
+    },
+    {
+      "id": 63,
+      "type": "VHS_LoadVideo",
+      "pos": [
+        -876.9246826171875,
+        -3084.905517578125
+      ],
+      "size": [
+        315.8014221191406,
+        491.6708679199219
+      ],
+      "flags": {},
+      "order": 32,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "meta_batch",
+          "shape": 7,
+          "type": "VHS_BatchManager",
+          "link": null
+        },
+        {
+          "name": "vae",
+          "shape": 7,
+          "type": "VAE",
+          "link": null
+        },
+        {
+          "name": "custom_width",
+          "type": "INT",
+          "widget": {
+            "name": "custom_width"
+          },
+          "link": 257
+        },
+        {
+          "name": "custom_height",
+          "type": "INT",
+          "widget": {
+            "name": "custom_height"
+          },
+          "link": 258
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            248
+          ]
+        },
+        {
+          "name": "frame_count",
+          "type": "INT",
+          "links": [
+            267
+          ]
+        },
+        {
+          "name": "audio",
+          "type": "AUDIO",
+          "links": [
+            255
+          ]
+        },
+        {
+          "name": "video_info",
+          "type": "VHS_VIDEOINFO",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-videohelpersuite",
+        "ver": "8e4d79471bf1952154768e8435a9300077b534fa",
+        "Node name for S&R": "VHS_LoadVideo"
+      },
+      "widgets_values": {
+        "video": "raw.mp4",
+        "force_rate": 16,
+        "custom_width": 960,
+        "custom_height": 544,
+        "frame_load_cap": 0,
+        "skip_first_frames": 0,
+        "select_every_nth": 1,
+        "format": "AnimateDiff",
+        "choose video to upload": "image",
+        "videopreview": {
+          "hidden": false,
+          "paused": false,
+          "params": {
+            "filename": "raw.mp4",
+            "type": "input",
+            "format": "video/mp4",
+            "force_rate": 16,
+            "custom_width": 960,
+            "custom_height": 544,
+            "frame_load_cap": 0,
+            "skip_first_frames": 0,
+            "select_every_nth": 1
+          }
+        }
+      }
+    },
+    {
+      "id": 157,
+      "type": "SetNode",
+      "pos": [
+        -528.8223266601562,
+        -3030.21337890625
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 39,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "INT",
+          "type": "INT",
+          "link": 267
+        }
+      ],
+      "outputs": [
+        {
+          "name": "*",
+          "type": "*",
+          "links": null
+        }
+      ],
+      "title": "Set_frame_count",
+      "properties": {
+        "previousName": "frame_count"
+      },
+      "widgets_values": [
+        "frame_count"
+      ],
+      "color": "#1b4669",
+      "bgcolor": "#29699c"
+    },
+    {
+      "id": 144,
+      "type": "SetNode",
+      "pos": [
+        -522.720947265625,
+        -3107.148681640625
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 38,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "link": 248
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            306
+          ]
+        }
+      ],
+      "title": "Set_input_video",
+      "properties": {
+        "previousName": "input_video"
+      },
+      "widgets_values": [
+        "input_video"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 128,
+      "type": "SetNode",
+      "pos": [
+        -459.54620361328125,
+        -1650.783935546875
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 41,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "link": 231
+        }
+      ],
+      "outputs": [
+        {
+          "name": "*",
+          "type": "*",
+          "links": null
+        }
+      ],
+      "title": "Set_reference_image",
+      "properties": {
+        "previousName": "reference_image"
+      },
+      "widgets_values": [
+        "reference_image"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 64,
+      "type": "ImageResizeKJv2",
+      "pos": [
+        -772.3116455078125,
+        -1675.555419921875
+      ],
+      "size": [
+        270,
+        336
+      ],
+      "flags": {},
+      "order": 34,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 82
+        },
+        {
+          "name": "mask",
+          "shape": 7,
+          "type": "MASK",
+          "link": null
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": 286
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": 287
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            231
+          ]
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "links": []
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "links": []
+        },
+        {
+          "name": "mask",
+          "type": "MASK",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "468fcc86f0b29e79a8510e8239eb15714d6747a6",
+        "Node name for S&R": "ImageResizeKJv2"
+      },
+      "widgets_values": [
+        832,
+        480,
+        "lanczos",
+        "pad_edge_pixel",
+        "0, 0, 0",
+        "top",
+        16,
+        "cpu",
+        "<tr><td>Output: </td><td><b>1</b> x <b>832</b> x <b>480 | 4.57MB</b></td></tr>"
+      ]
+    },
+    {
+      "id": 57,
+      "type": "LoadImage",
+      "pos": [
+        -1116.435791015625,
+        -1679.51318359375
+      ],
+      "size": [
+        274.080078125,
+        314
+      ],
+      "flags": {},
+      "order": 4,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            82
+          ]
+        },
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.57",
+        "Node name for S&R": "LoadImage"
+      },
+      "widgets_values": [
+        "refer.jpeg",
+        "image"
+      ]
+    },
+    {
+      "id": 142,
+      "type": "SetNode",
+      "pos": [
+        1086.6927490234375,
+        -2124.5263671875
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 61,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "link": 245
+        }
+      ],
+      "outputs": [
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "links": [
+            246
+          ]
+        }
+      ],
+      "title": "Set_mask",
+      "properties": {
+        "previousName": "mask"
+      },
+      "widgets_values": [
+        "mask"
+      ],
+      "color": "#1c5715",
+      "bgcolor": "#1f401b"
+    },
+    {
+      "id": 99,
+      "type": "DrawMaskOnImage",
+      "pos": [
+        1222.5340576171875,
+        -2132.91455078125
+      ],
+      "size": [
+        270,
+        78
+      ],
+      "flags": {},
+      "order": 63,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 250
+        },
+        {
+          "name": "mask",
+          "type": "MASK",
+          "link": 246
+        }
+      ],
+      "outputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "links": [
+            233
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "623b5913dc7f240fd8b26422e99f8849a21c5473",
+        "Node name for S&R": "DrawMaskOnImage"
+      },
+      "widgets_values": [
+        "0, 0, 0"
+      ]
+    },
+    {
+      "id": 146,
+      "type": "GetNode",
+      "pos": [
+        1086.932861328125,
+        -2184.602783203125
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 5,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            250
+          ]
+        }
+      ],
+      "title": "Get_input_video",
+      "properties": {},
+      "widgets_values": [
+        "input_video"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 130,
+      "type": "SetNode",
+      "pos": [
+        1294.1051025390625,
+        -2211.976806640625
+      ],
+      "size": [
+        211.05747985839844,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 65,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "link": 233
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            251
+          ]
+        }
+      ],
+      "title": "Set_background_image",
+      "properties": {
+        "previousName": "background_image"
+      },
+      "widgets_values": [
+        "background_image"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 77,
+      "type": "ImageConcatMulti",
+      "pos": [
+        2653.37939453125,
+        -1220.90087890625
+      ],
+      "size": [
+        270,
+        190
+      ],
+      "flags": {},
+      "order": 30,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image_1",
+          "type": "IMAGE",
+          "link": 236
+        },
+        {
+          "name": "image_2",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 239
+        },
+        {
+          "name": "image_3",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 243
+        },
+        {
+          "name": "image_4",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 249
+        }
+      ],
+      "outputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "links": [
+            107
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "468fcc86f0b29e79a8510e8239eb15714d6747a6"
+      },
+      "widgets_values": [
+        4,
+        "down",
+        true,
+        null
+      ]
+    },
+    {
+      "id": 145,
+      "type": "GetNode",
+      "pos": [
+        2463.91552734375,
+        -1073.7054443359375
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 6,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            249
+          ]
+        }
+      ],
+      "title": "Get_input_video",
+      "properties": {},
+      "widgets_values": [
+        "input_video"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 42,
+      "type": "GetImageSizeAndCount",
+      "pos": [
+        2555.719482421875,
+        -692.73095703125
+      ],
+      "size": [
+        277.20001220703125,
+        86
+      ],
+      "flags": {},
+      "order": 62,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 346
+        }
+      ],
+      "outputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "slot_index": 0,
+          "links": [
+            87
+          ]
+        },
+        {
+          "label": "832 width",
+          "name": "width",
+          "type": "INT",
+          "links": null
+        },
+        {
+          "label": "480 height",
+          "name": "height",
+          "type": "INT",
+          "links": null
+        },
+        {
+          "label": "109 count",
+          "name": "count",
+          "type": "INT",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "36f6fdd7d4c393675ac622bd300ef667ee65d8b8",
+        "Node name for S&R": "GetImageSizeAndCount"
+      },
+      "widgets_values": []
+    },
+    {
+      "id": 151,
+      "type": "INTConstant",
+      "pos": [
+        -1704.931640625,
+        -2640.54638671875
+      ],
+      "size": [
+        210,
+        58
+      ],
+      "flags": {
+        "collapsed": false
+      },
+      "order": 7,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "value",
+          "type": "INT",
+          "links": [
+            258,
+            264,
+            287
+          ]
+        }
+      ],
+      "title": "Height",
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "37659859825cea55940a58110525795ce5deb8be",
+        "Node name for S&R": "INTConstant"
+      },
+      "widgets_values": [
+        480
+      ],
+      "color": "#1b4669",
+      "bgcolor": "#29699c"
+    },
+    {
+      "id": 178,
+      "type": "OnnxDetectionModelLoader",
+      "pos": [
+        -506.44635009765625,
+        -2398.935302734375
+      ],
+      "size": [
+        351.52410888671875,
+        106
+      ],
+      "flags": {},
+      "order": 8,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "model",
+          "type": "POSEMODEL",
+          "links": [
+            290
+          ]
+        }
+      ],
+      "properties": {
+        "aux_id": "kijai/ComfyUI-WanAnimatePreprocess",
+        "ver": "e63d6e71ae4c271f3f81211a7ca7f87607b7e50d",
+        "Node name for S&R": "OnnxDetectionModelLoader"
+      },
+      "widgets_values": [
+        "vitpose-l-wholebody.onnx",
+        "onnx\\yolov10m.onnx",
+        "CUDAExecutionProvider"
+      ]
+    },
+    {
+      "id": 182,
+      "type": "GrowMaskWithBlur",
+      "pos": [
+        431.7315368652344,
+        -2222.123779296875
+      ],
+      "size": [
+        292.748046875,
+        246
+      ],
+      "flags": {},
+      "order": 56,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "mask",
+          "type": "MASK",
+          "link": 314
+        }
+      ],
+      "outputs": [
+        {
+          "name": "mask",
+          "type": "MASK",
+          "links": [
+            315
+          ]
+        },
+        {
+          "name": "mask_inverted",
+          "type": "MASK",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "bb205d809b467307b8ec3bb1a22680a4873187f8",
+        "Node name for S&R": "GrowMaskWithBlur"
+      },
+      "widgets_values": [
+        10,
+        0,
+        true,
+        false,
+        0,
+        1,
+        1,
+        false
+      ]
+    },
+    {
+      "id": 108,
+      "type": "BlockifyMask",
+      "pos": [
+        779.2421264648438,
+        -2222.2099609375
+      ],
+      "size": [
+        270,
+        58
+      ],
+      "flags": {},
+      "order": 59,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "masks",
+          "type": "MASK",
+          "link": 315
+        }
+      ],
+      "outputs": [
+        {
+          "name": "mask",
+          "type": "MASK",
+          "links": [
+            245
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "00da1910634fbf314d407608efb281ae6f7f1ba2",
+        "Node name for S&R": "BlockifyMask"
+      },
+      "widgets_values": [
+        32
+      ]
+    },
+    {
+      "id": 127,
+      "type": "Note",
+      "pos": [
+        983.2726440429688,
+        -2067.319580078125
+      ],
+      "size": [
+        210,
+        88
+      ],
+      "flags": {},
+      "order": 9,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [],
+      "properties": {},
+      "widgets_values": [
+        "These are new nodes in KJNodes"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 104,
+      "type": "Sam2Segmentation",
+      "pos": [
+        83.82372283935547,
+        -2220.965576171875
+      ],
+      "size": [
+        272.087890625,
+        182
+      ],
+      "flags": {},
+      "order": 52,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "sam2_model",
+          "type": "SAM2MODEL",
+          "link": 185
+        },
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 316
+        },
+        {
+          "name": "coordinates_positive",
+          "shape": 7,
+          "type": "STRING",
+          "link": null
+        },
+        {
+          "name": "coordinates_negative",
+          "shape": 7,
+          "type": "STRING",
+          "link": null
+        },
+        {
+          "name": "bboxes",
+          "shape": 7,
+          "type": "BBOX",
+          "link": 321
+        },
+        {
+          "name": "mask",
+          "shape": 7,
+          "type": "MASK",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "mask",
+          "type": "MASK",
+          "links": [
+            314
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-segment-anything-2",
+        "ver": "c59676b008a76237002926f684d0ca3a9b29ac54",
+        "Node name for S&R": "Sam2Segmentation"
+      },
+      "widgets_values": [
+        false,
+        false
+      ]
+    },
+    {
+      "id": 180,
+      "type": "GetImageSizeAndCount",
+      "pos": [
+        -246.24586486816406,
+        -3011.47705078125
+      ],
+      "size": [
+        190.86483764648438,
+        86
+      ],
+      "flags": {},
+      "order": 44,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 306
+        }
+      ],
+      "outputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "links": [
+            309,
+            316
+          ]
+        },
+        {
+          "label": "832 width",
+          "name": "width",
+          "type": "INT",
+          "links": [
+            307,
+            310
+          ]
+        },
+        {
+          "label": "480 height",
+          "name": "height",
+          "type": "INT",
+          "links": [
+            308,
+            311
+          ]
+        },
+        {
+          "label": "109 count",
+          "name": "count",
+          "type": "INT",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "bb205d809b467307b8ec3bb1a22680a4873187f8",
+        "Node name for S&R": "GetImageSizeAndCount"
+      },
+      "widgets_values": []
+    },
+    {
+      "id": 173,
+      "type": "DrawViTPose",
+      "pos": [
+        120.56317138671875,
+        -2811.226318359375
+      ],
+      "size": [
+        270,
+        178
+      ],
+      "flags": {},
+      "order": 50,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "pose_data",
+          "type": "POSEDATA",
+          "link": 294
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": 307
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": 308
+        }
+      ],
+      "outputs": [
+        {
+          "name": "pose_images",
+          "type": "IMAGE",
+          "links": [
+            319
+          ]
+        }
+      ],
+      "properties": {
+        "aux_id": "kijai/ComfyUI-WanAnimatePreprocess",
+        "ver": "e63d6e71ae4c271f3f81211a7ca7f87607b7e50d",
+        "Node name for S&R": "DrawViTPose"
+      },
+      "widgets_values": [
+        832,
+        480,
+        16,
+        -1,
+        -1,
+        "True"
+      ]
+    },
+    {
+      "id": 75,
+      "type": "VHS_VideoCombine",
+      "pos": [
+        2010.67431640625,
+        -3041.57568359375
+      ],
+      "size": [
+        743.6680297851562,
+        765.5007934570312
+      ],
+      "flags": {},
+      "order": 67,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 251
+        },
+        {
+          "name": "audio",
+          "shape": 7,
+          "type": "AUDIO",
+          "link": null
+        },
+        {
+          "name": "meta_batch",
+          "shape": 7,
+          "type": "VHS_BatchManager",
+          "link": null
+        },
+        {
+          "name": "vae",
+          "shape": 7,
+          "type": "VAE",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "Filenames",
+          "type": "VHS_FILENAMES",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-videohelpersuite",
+        "ver": "8e4d79471bf1952154768e8435a9300077b534fa",
+        "Node name for S&R": "VHS_VideoCombine"
+      },
+      "widgets_values": {
+        "frame_rate": 16,
+        "loop_count": 0,
+        "filename_prefix": "WanVideo2_1_T2V",
+        "format": "video/h264-mp4",
+        "pix_fmt": "yuv420p",
+        "crf": 19,
+        "save_metadata": true,
+        "trim_to_audio": false,
+        "pingpong": false,
+        "save_output": false,
+        "videopreview": {
+          "hidden": false,
+          "paused": false,
+          "params": {
+            "filename": "WanVideo2_1_T2V_00010.mp4",
+            "subfolder": "",
+            "type": "temp",
+            "format": "video/h264-mp4",
+            "frame_rate": 16,
+            "workflow": "WanVideo2_1_T2V_00010.png",
+            "fullpath": "N:\\AI\\ComfyUI\\temp\\WanVideo2_1_T2V_00010.mp4"
+          }
+        }
+      }
+    },
+    {
+      "id": 181,
+      "type": "VHS_VideoCombine",
+      "pos": [
+        1137.4183349609375,
+        -3026.80517578125
+      ],
+      "size": [
+        743.6680297851562,
+        765.5007934570312
+      ],
+      "flags": {},
+      "order": 58,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 320
+        },
+        {
+          "name": "audio",
+          "shape": 7,
+          "type": "AUDIO",
+          "link": null
+        },
+        {
+          "name": "meta_batch",
+          "shape": 7,
+          "type": "VHS_BatchManager",
+          "link": null
+        },
+        {
+          "name": "vae",
+          "shape": 7,
+          "type": "VAE",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "Filenames",
+          "type": "VHS_FILENAMES",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-videohelpersuite",
+        "ver": "8e4d79471bf1952154768e8435a9300077b534fa",
+        "Node name for S&R": "VHS_VideoCombine"
+      },
+      "widgets_values": {
+        "frame_rate": 16,
+        "loop_count": 0,
+        "filename_prefix": "WanVideo2_1_T2V",
+        "format": "video/h264-mp4",
+        "pix_fmt": "yuv420p",
+        "crf": 19,
+        "save_metadata": true,
+        "trim_to_audio": false,
+        "pingpong": false,
+        "save_output": false,
+        "videopreview": {
+          "hidden": false,
+          "paused": false,
+          "params": {
+            "filename": "WanVideo2_1_T2V_00009.mp4",
+            "subfolder": "",
+            "type": "temp",
+            "format": "video/h264-mp4",
+            "frame_rate": 16,
+            "workflow": "WanVideo2_1_T2V_00009.png",
+            "fullpath": "N:\\AI\\ComfyUI\\temp\\WanVideo2_1_T2V_00009.mp4"
+          }
+        }
+      }
+    },
+    {
+      "id": 174,
+      "type": "VHS_VideoCombine",
+      "pos": [
+        714.7760009765625,
+        -3057.50341796875
+      ],
+      "size": [
+        214.7587890625,
+        542.7587890625
+      ],
+      "flags": {},
+      "order": 55,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 318
+        },
+        {
+          "name": "audio",
+          "shape": 7,
+          "type": "AUDIO",
+          "link": null
+        },
+        {
+          "name": "meta_batch",
+          "shape": 7,
+          "type": "VHS_BatchManager",
+          "link": null
+        },
+        {
+          "name": "vae",
+          "shape": 7,
+          "type": "VAE",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "Filenames",
+          "type": "VHS_FILENAMES",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-videohelpersuite",
+        "ver": "0edce8ef7ce173ac97a3ed3d6f4636029d1a4530",
+        "Node name for S&R": "VHS_VideoCombine"
+      },
+      "widgets_values": {
+        "frame_rate": 16,
+        "loop_count": 0,
+        "filename_prefix": "vitpose",
+        "format": "video/h264-mp4",
+        "pix_fmt": "yuv420p",
+        "crf": 19,
+        "save_metadata": true,
+        "trim_to_audio": false,
+        "pingpong": false,
+        "save_output": false,
+        "videopreview": {
+          "hidden": false,
+          "paused": false,
+          "params": {
+            "filename": "vitpose_00007.mp4",
+            "subfolder": "",
+            "type": "temp",
+            "format": "video/h264-mp4",
+            "frame_rate": 16,
+            "workflow": "vitpose_00007.png",
+            "fullpath": "N:\\AI\\ComfyUI\\temp\\vitpose_00007.mp4"
+          }
+        }
+      }
+    },
+    {
+      "id": 183,
+      "type": "SetNode",
+      "pos": [
+        464.6632080078125,
+        -3038.54296875
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 51,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "link": 317
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            318
+          ]
+        }
+      ],
+      "title": "Set_face_images",
+      "properties": {
+        "previousName": "face_images"
+      },
+      "widgets_values": [
+        "face_images"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 184,
+      "type": "SetNode",
+      "pos": [
+        442.45526123046875,
+        -2784.734375
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 54,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "link": 319
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            320
+          ]
+        }
+      ],
+      "title": "Set_pose_images",
+      "properties": {
+        "previousName": "pose_images"
+      },
+      "widgets_values": [
+        "pose_images"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 172,
+      "type": "PoseAndFaceDetection",
+      "pos": [
+        104.5530014038086,
+        -3028.416015625
+      ],
+      "size": [
+        313.125,
+        142
+      ],
+      "flags": {},
+      "order": 47,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "POSEMODEL",
+          "link": 290
+        },
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 309
+        },
+        {
+          "name": "retarget_image",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": null
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": 310
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": 311
+        }
+      ],
+      "outputs": [
+        {
+          "name": "pose_data",
+          "type": "POSEDATA",
+          "links": [
+            294
+          ]
+        },
+        {
+          "name": "face_images",
+          "type": "IMAGE",
+          "links": [
+            317
+          ]
+        },
+        {
+          "name": "key_frame_body_points",
+          "type": "STRING",
+          "links": null
+        },
+        {
+          "name": "bboxes",
+          "type": "BBOX",
+          "links": [
+            321
+          ]
+        }
+      ],
+      "properties": {
+        "aux_id": "kijai/ComfyUI-WanAnimatePreprocess",
+        "ver": "e63d6e71ae4c271f3f81211a7ca7f87607b7e50d",
+        "Node name for S&R": "PoseAndFaceDetection"
+      },
+      "widgets_values": [
+        832,
+        480
+      ]
+    },
+    {
+      "id": 102,
+      "type": "DownloadAndLoadSAM2Model",
+      "pos": [
+        -470.4329528808594,
+        -2221.738037109375
+      ],
+      "size": [
+        334.4137268066406,
+        130
+      ],
+      "flags": {},
+      "order": 10,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "sam2_model",
+          "type": "SAM2MODEL",
+          "links": [
+            185
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-segment-anything-2",
+        "ver": "c59676b008a76237002926f684d0ca3a9b29ac54",
+        "Node name for S&R": "DownloadAndLoadSAM2Model"
+      },
+      "widgets_values": [
+        "sam2.1_hiera_base_plus.safetensors",
+        "video",
+        "cuda",
+        "fp16"
+      ]
+    },
+    {
+      "id": 185,
+      "type": "Note",
+      "pos": [
+        257.0601806640625,
+        -2465.42041015625
+      ],
+      "size": [
+        236.14007568359375,
+        88
+      ],
+      "flags": {},
+      "order": 11,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [],
+      "title": "Note: SAM2 detection",
+      "properties": {},
+      "widgets_values": [
+        "You can use either the detected bbox or the kay_frame_body_points to positive coordinates, if one fails to creater proper mask, try the other"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 150,
+      "type": "INTConstant",
+      "pos": [
+        -1695.39013671875,
+        -2773.4970703125
+      ],
+      "size": [
+        210,
+        58
+      ],
+      "flags": {},
+      "order": 12,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "value",
+          "type": "INT",
+          "links": [
+            257,
+            263,
+            286
+          ]
+        }
+      ],
+      "title": "Width",
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "37659859825cea55940a58110525795ce5deb8be",
+        "Node name for S&R": "INTConstant"
+      },
+      "widgets_values": [
+        832
+      ],
+      "color": "#1b4669",
+      "bgcolor": "#29699c"
+    },
+    {
+      "id": 30,
+      "type": "VHS_VideoCombine",
+      "pos": [
+        3064.7763671875,
+        -1021.487548828125
+      ],
+      "size": [
+        1478.035400390625,
+        1042.3026123046875
+      ],
+      "flags": {},
+      "order": 66,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 89
+        },
+        {
+          "name": "audio",
+          "shape": 7,
+          "type": "AUDIO",
+          "link": 256
+        },
+        {
+          "name": "meta_batch",
+          "shape": 7,
+          "type": "VHS_BatchManager",
+          "link": null
+        },
+        {
+          "name": "vae",
+          "shape": 7,
+          "type": "VAE",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "Filenames",
+          "type": "VHS_FILENAMES",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-videohelpersuite",
+        "ver": "8e4d79471bf1952154768e8435a9300077b534fa",
+        "Node name for S&R": "VHS_VideoCombine"
+      },
+      "widgets_values": {
+        "frame_rate": 16,
+        "loop_count": 0,
+        "filename_prefix": "Wanimate",
+        "format": "video/h264-mp4",
+        "pix_fmt": "yuv420p",
+        "crf": 19,
+        "save_metadata": true,
+        "trim_to_audio": true,
+        "pingpong": false,
+        "save_output": true,
+        "videopreview": {
+          "hidden": false,
+          "paused": false,
+          "params": {
+            "filename": "Wanimate_00008-audio.mp4",
+            "subfolder": "",
+            "type": "output",
+            "format": "video/h264-mp4",
+            "frame_rate": 16,
+            "workflow": "Wanimate_00008.png",
+            "fullpath": "N:\\AI\\ComfyUI\\output\\Wanimate_00008-audio.mp4"
+          }
+        }
+      }
+    },
+    {
+      "id": 177,
+      "type": "MarkdownNote",
+      "pos": [
+        -1088.8204345703125,
+        -2393.302978515625
+      ],
+      "size": [
+        536.27783203125,
+        330.03546142578125
+      ],
+      "flags": {},
+      "order": 13,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [],
+      "title": "Preprocessor links",
+      "properties": {},
+      "widgets_values": [
+        "Nodes:\n\n[https://github.com/kijai/ComfyUI-WanAnimatePreprocess](https://github.com/kijai/ComfyUI-WanAnimatePreprocess)\n\nModels:\n\nYOLO:\n\n[https://huggingface.co/Wan-AI/Wan2.2-Animate-14B/blob/main/process_checkpoint/det/yolov10m.onnx](https://huggingface.co/Wan-AI/Wan2.2-Animate-14B/blob/main/process_checkpoint/det/yolov10m.onnx)\n\nViTPose\n\nLarge:\n\n[https://huggingface.co/JunkyByte/easy_ViTPose/blob/main/onnx/wholebody/vitpose-l-wholebody.onnx](https://huggingface.co/JunkyByte/easy_ViTPose/blob/main/onnx/wholebody/vitpose-l-wholebody.onnx)\n\nHuge (needs both files):\n\n[https://huggingface.co/Kijai/vitpose_comfy/blob/main/onnx/vitpose_h_wholebody_model.onnx](https://huggingface.co/Kijai/vitpose_comfy/blob/main/onnx/vitpose_h_wholebody_model.onnx)\n\n[https://huggingface.co/Kijai/vitpose_comfy/blob/main/onnx/vitpose_h_wholebody_data.bin](https://huggingface.co/Kijai/vitpose_comfy/blob/main/onnx/vitpose_h_wholebody_data.bin)"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 191,
+      "type": "LoraLoaderModelOnly",
+      "pos": [
+        -155.92713928222656,
+        -787.0674438476562
+      ],
+      "size": [
+        632.4478759765625,
+        82
+      ],
+      "flags": {},
+      "order": 43,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MODEL",
+          "link": 323
+        }
+      ],
+      "outputs": [
+        {
+          "name": "MODEL",
+          "type": "MODEL",
+          "links": [
+            325
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "LoraLoaderModelOnly"
+      },
+      "widgets_values": [
+        "WanVideo\\Lightx2v\\lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors",
+        1.2
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 190,
+      "type": "LoraLoaderModelOnly",
+      "pos": [
+        -160.91363525390625,
+        -918.007568359375
+      ],
+      "size": [
+        487.27642822265625,
+        82
+      ],
+      "flags": {},
+      "order": 37,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MODEL",
+          "link": 324
+        }
+      ],
+      "outputs": [
+        {
+          "name": "MODEL",
+          "type": "MODEL",
+          "links": [
+            323
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "LoraLoaderModelOnly"
+      },
+      "widgets_values": [
+        "WanVideo\\wan2.2_animate_14B_relight_lora_bf16.safetensors",
+        1
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 204,
+      "type": "VAEDecode",
+      "pos": [
+        2335.7841796875,
+        -647.9304809570312
+      ],
+      "size": [
+        140,
+        46
+      ],
+      "flags": {},
+      "order": 60,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": 362
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "link": 345
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            346
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "VAEDecode"
+      },
+      "widgets_values": [],
+      "color": "#322",
+      "bgcolor": "#533"
+    },
+    {
+      "id": 162,
+      "type": "GetNode",
+      "pos": [
+        2193.002197265625,
+        -617.4148559570312
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 14,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "VAE",
+          "type": "VAE",
+          "links": [
+            345
+          ]
+        }
+      ],
+      "title": "Get_VAE",
+      "properties": {},
+      "widgets_values": [
+        "VAE"
+      ],
+      "color": "#322",
+      "bgcolor": "#533"
+    },
+    {
+      "id": 195,
+      "type": "RandomNoise",
+      "pos": [
+        1681.1666259765625,
+        -731.4764404296875
+      ],
+      "size": [
+        270,
+        82
+      ],
+      "flags": {},
+      "order": 15,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "NOISE",
+          "type": "NOISE",
+          "links": [
+            326
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "RandomNoise"
+      },
+      "widgets_values": [
+        42,
+        "fixed"
+      ]
+    },
+    {
+      "id": 192,
+      "type": "TorchCompileModelWanVideoV2",
+      "pos": [
+        -158.32838439941406,
+        -606.775146484375
+      ],
+      "size": [
+        342.74609375,
+        178
+      ],
+      "flags": {},
+      "order": 46,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MODEL",
+          "link": 325
+        }
+      ],
+      "outputs": [
+        {
+          "name": "MODEL",
+          "type": "MODEL",
+          "links": [
+            332,
+            350
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "bb205d809b467307b8ec3bb1a22680a4873187f8",
+        "Node name for S&R": "TorchCompileModelWanVideoV2"
+      },
+      "widgets_values": [
+        "inductor",
+        false,
+        "default",
+        false,
+        true,
+        64
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 206,
+      "type": "BasicScheduler",
+      "pos": [
+        1683.2818603515625,
+        -903.85498046875
+      ],
+      "size": [
+        270,
+        106
+      ],
+      "flags": {},
+      "order": 49,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MODEL",
+          "link": 350
+        }
+      ],
+      "outputs": [
+        {
+          "name": "SIGMAS",
+          "type": "SIGMAS",
+          "links": [
+            349
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "BasicScheduler"
+      },
+      "widgets_values": [
+        "simple",
+        4,
+        1
+      ]
+    },
+    {
+      "id": 138,
+      "type": "GetNode",
+      "pos": [
+        1322,
+        -333.85980224609375
+      ],
+      "size": [
+        210,
+        50
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 16,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            352
+          ]
+        }
+      ],
+      "title": "Get_face_images",
+      "properties": {},
+      "widgets_values": [
+        "face_images"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 141,
+      "type": "GetNode",
+      "pos": [
+        1333.7083740234375,
+        -296.18280029296875
+      ],
+      "size": [
+        210,
+        34
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 17,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            353
+          ]
+        }
+      ],
+      "title": "Get_pose_images",
+      "properties": {},
+      "widgets_values": [
+        "pose_images"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 131,
+      "type": "GetNode",
+      "pos": [
+        1313.332275390625,
+        -253.8428955078125
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 18,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            354
+          ]
+        }
+      ],
+      "title": "Get_background_image",
+      "properties": {},
+      "widgets_values": [
+        "background_image"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 158,
+      "type": "GetNode",
+      "pos": [
+        1379.7850341796875,
+        -51.34074783325195
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 19,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "INT",
+          "type": "INT",
+          "links": [
+            358
+          ]
+        }
+      ],
+      "title": "Get_frame_count",
+      "properties": {},
+      "widgets_values": [
+        "frame_count"
+      ],
+      "color": "#1b4669",
+      "bgcolor": "#29699c"
+    },
+    {
+      "id": 156,
+      "type": "GetNode",
+      "pos": [
+        1378.34033203125,
+        -103.323486328125
+      ],
+      "size": [
+        210,
+        50
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 20,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "INT",
+          "type": "INT",
+          "links": [
+            357
+          ]
+        }
+      ],
+      "title": "Get_height",
+      "properties": {},
+      "widgets_values": [
+        "height"
+      ],
+      "color": "#1b4669",
+      "bgcolor": "#29699c"
+    },
+    {
+      "id": 155,
+      "type": "GetNode",
+      "pos": [
+        1379.78515625,
+        -156.0265655517578
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 21,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "INT",
+          "type": "INT",
+          "links": [
+            356
+          ]
+        }
+      ],
+      "title": "Get_width",
+      "properties": {},
+      "widgets_values": [
+        "width"
+      ],
+      "color": "#1b4669",
+      "bgcolor": "#29699c"
+    },
+    {
+      "id": 143,
+      "type": "GetNode",
+      "pos": [
+        1379.7850341796875,
+        -204.57044982910156
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 22,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "links": [
+            355
+          ]
+        }
+      ],
+      "title": "Get_mask",
+      "properties": {},
+      "widgets_values": [
+        "mask"
+      ],
+      "color": "#1c5715",
+      "bgcolor": "#1f401b"
+    },
+    {
+      "id": 202,
+      "type": "GetNode",
+      "pos": [
+        1368.9952392578125,
+        -417.7628173828125
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 23,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "VAE",
+          "type": "VAE",
+          "links": [
+            340
+          ]
+        }
+      ],
+      "title": "Get_VAE",
+      "properties": {},
+      "widgets_values": [
+        "VAE"
+      ],
+      "color": "#322",
+      "bgcolor": "#533"
+    },
+    {
+      "id": 201,
+      "type": "ConditioningZeroOut",
+      "pos": [
+        986.3633422851562,
+        -341.2440490722656
+      ],
+      "size": [
+        197.712890625,
+        26
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 42,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "conditioning",
+          "type": "CONDITIONING",
+          "link": 334
+        }
+      ],
+      "outputs": [
+        {
+          "name": "CONDITIONING",
+          "type": "CONDITIONING",
+          "links": [
+            359
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "ConditioningZeroOut"
+      },
+      "widgets_values": [],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 198,
+      "type": "CLIPTextEncode",
+      "pos": [
+        1035.0103759765625,
+        -664.8511352539062
+      ],
+      "size": [
+        400,
+        200
+      ],
+      "flags": {},
+      "order": 35,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "clip",
+          "type": "CLIP",
+          "link": 331
+        }
+      ],
+      "outputs": [
+        {
+          "name": "CONDITIONING",
+          "type": "CONDITIONING",
+          "links": [
+            334,
+            338
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "CLIPTextEncode"
+      },
+      "widgets_values": [
+        "man is walking, style is soft 3D render style, night time, moonlight"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 193,
+      "type": "CLIPLoader",
+      "pos": [
+        225.7170867919922,
+        -533.3831787109375
+      ],
+      "size": [
+        270,
+        106
+      ],
+      "flags": {},
+      "order": 24,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "CLIP",
+          "type": "CLIP",
+          "links": [
+            331
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "CLIPLoader"
+      },
+      "widgets_values": [
+        "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+        "wan",
+        "default"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 189,
+      "type": "SetNode",
+      "pos": [
+        86.28633117675781,
+        -338.70849609375
+      ],
+      "size": [
+        210,
+        60
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 36,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "VAE",
+          "type": "VAE",
+          "link": 322
+        }
+      ],
+      "outputs": [
+        {
+          "name": "*",
+          "type": "*",
+          "links": null
+        }
+      ],
+      "title": "Set_VAE",
+      "properties": {
+        "previousName": "VAE"
+      },
+      "widgets_values": [
+        "VAE"
+      ],
+      "color": "#322",
+      "bgcolor": "#533"
+    },
+    {
+      "id": 188,
+      "type": "VAELoader",
+      "pos": [
+        -232.2764892578125,
+        -368.0868225097656
+      ],
+      "size": [
+        270,
+        58
+      ],
+      "flags": {},
+      "order": 25,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "VAE",
+          "type": "VAE",
+          "links": [
+            322
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "VAELoader"
+      },
+      "widgets_values": [
+        "wanvideo\\Wan2_1_VAE_bf16.safetensors"
+      ],
+      "color": "#322",
+      "bgcolor": "#533"
+    },
+    {
+      "id": 200,
+      "type": "CFGGuider",
+      "pos": [
+        1680.1092529296875,
+        -571.7877807617188
+      ],
+      "size": [
+        270,
+        98
+      ],
+      "flags": {},
+      "order": 48,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MODEL",
+          "link": 332
+        },
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 336
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "link": 337
+        }
+      ],
+      "outputs": [
+        {
+          "name": "GUIDER",
+          "type": "GUIDER",
+          "links": [
+            347
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "CFGGuider"
+      },
+      "widgets_values": [
+        1
+      ]
+    },
+    {
+      "id": 199,
+      "type": "WanAnimateToVideo",
+      "pos": [
+        1614.5421142578125,
+        -375.08544921875
+      ],
+      "size": [
+        324.751953125,
+        358
+      ],
+      "flags": {},
+      "order": 45,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 338
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "link": 359
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "link": 340
+        },
+        {
+          "name": "clip_vision_output",
+          "shape": 7,
+          "type": "CLIP_VISION_OUTPUT",
+          "link": null
+        },
+        {
+          "name": "reference_image",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 351
+        },
+        {
+          "name": "face_video",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 352
+        },
+        {
+          "name": "pose_video",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 353
+        },
+        {
+          "name": "background_video",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 354
+        },
+        {
+          "name": "character_mask",
+          "shape": 7,
+          "type": "MASK",
+          "link": 355
+        },
+        {
+          "name": "continue_motion",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": null
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": 356
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": 357
+        },
+        {
+          "name": "length",
+          "type": "INT",
+          "widget": {
+            "name": "length"
+          },
+          "link": 358
+        }
+      ],
+      "outputs": [
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "links": [
+            336
+          ]
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "links": [
+            337
+          ]
+        },
+        {
+          "name": "latent",
+          "type": "LATENT",
+          "links": [
+            339
+          ]
+        },
+        {
+          "name": "trim_latent",
+          "type": "INT",
+          "links": [
+            361
+          ]
+        },
+        {
+          "name": "trim_image",
+          "type": "INT",
+          "links": null
+        },
+        {
+          "name": "video_frame_offset",
+          "type": "INT",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "WanAnimateToVideo"
+      },
+      "widgets_values": [
+        832,
+        480,
+        77,
+        1,
+        5,
+        0
+      ]
+    },
+    {
+      "id": 205,
+      "type": "KSamplerSelect",
+      "pos": [
+        2024.867919921875,
+        -748.396728515625
+      ],
+      "size": [
+        270,
+        58
+      ],
+      "flags": {},
+      "order": 26,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "SAMPLER",
+          "type": "SAMPLER",
+          "links": [
+            348
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "KSamplerSelect"
+      },
+      "widgets_values": [
+        "lcm"
+      ]
+    },
+    {
+      "id": 187,
+      "type": "DiffusionModelLoaderKJ",
+      "pos": [
+        -918.0835571289062,
+        -920.0172119140625
+      ],
+      "size": [
+        589.9105834960938,
+        178
+      ],
+      "flags": {},
+      "order": 27,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "extra_state_dict",
+          "shape": 7,
+          "type": "STRING",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "MODEL",
+          "type": "MODEL",
+          "links": [
+            324
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfyui-kjnodes",
+        "ver": "bb205d809b467307b8ec3bb1a22680a4873187f8",
+        "Node name for S&R": "DiffusionModelLoaderKJ"
+      },
+      "widgets_values": [
+        "WanVideo\\2_2\\Wan2_2-Animate-14B_high_fp8_e4m3fn_native_KJ.safetensors",
+        "fp16",
+        "fp16",
+        false,
+        "auto",
+        true
+      ],
+      "color": "#223",
+      "bgcolor": "#335"
+    },
+    {
+      "id": 133,
+      "type": "GetNode",
+      "pos": [
+        991.7420654296875,
+        -281.65087890625
+      ],
+      "size": [
+        210,
+        50
+      ],
+      "flags": {
+        "collapsed": true
+      },
+      "order": 28,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            351
+          ]
+        }
+      ],
+      "title": "Get_reference_image",
+      "properties": {},
+      "widgets_values": [
+        "reference_image"
+      ],
+      "color": "#2a363b",
+      "bgcolor": "#3f5159"
+    },
+    {
+      "id": 194,
+      "type": "SamplerCustomAdvanced",
+      "pos": [
+        2041.89306640625,
+        -441.8531188964844
+      ],
+      "size": [
+        461.3559875488281,
+        382.6284484863281
+      ],
+      "flags": {},
+      "order": 53,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "noise",
+          "type": "NOISE",
+          "link": 326
+        },
+        {
+          "name": "guider",
+          "type": "GUIDER",
+          "link": 347
+        },
+        {
+          "name": "sampler",
+          "type": "SAMPLER",
+          "link": 348
+        },
+        {
+          "name": "sigmas",
+          "type": "SIGMAS",
+          "link": 349
+        },
+        {
+          "name": "latent_image",
+          "type": "LATENT",
+          "link": 339
+        }
+      ],
+      "outputs": [
+        {
+          "name": "output",
+          "type": "LATENT",
+          "links": [
+            360
+          ]
+        },
+        {
+          "name": "denoised_output",
+          "type": "LATENT",
+          "links": null
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "SamplerCustomAdvanced"
+      },
+      "widgets_values": []
+    },
+    {
+      "id": 207,
+      "type": "TrimVideoLatent",
+      "pos": [
+        2556.41162109375,
+        -444.12176513671875
+      ],
+      "size": [
+        270,
+        58
+      ],
+      "flags": {},
+      "order": 57,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": 360
+        },
+        {
+          "name": "trim_amount",
+          "type": "INT",
+          "widget": {
+            "name": "trim_amount"
+          },
+          "link": 361
+        }
+      ],
+      "outputs": [
+        {
+          "name": "LATENT",
+          "type": "LATENT",
+          "links": [
+            362
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "comfy-core",
+        "ver": "0.3.59",
+        "Node name for S&R": "TrimVideoLatent"
+      },
+      "widgets_values": [
+        0
+      ]
+    },
+    {
+      "id": 164,
+      "type": "MarkdownNote",
+      "pos": [
+        -1191.340087890625,
+        -515.903564453125
+      ],
+      "size": [
+        884.3952026367188,
+        400.1950378417969
+      ],
+      "flags": {},
+      "order": 29,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [],
+      "title": "Markdown Note: Model Links",
+      "properties": {},
+      "widgets_values": [
+        "Model links:\n\n!!IMPORTANT!!\nMy initial upload of the fp8_scaled model works poorly in native workflows due to some face layer quantization, I've uploaded fixed version \"v2\" which should be used if you want to use fp8_scaled:\n\n[https://huggingface.co/Kijai/WanVideo_comfy_fp8_scaled/blob/main/Wan22Animate/Wan2_2-Animate-14B_fp8_scaled_e4m3fn_KJ_v2.safetensors](https://huggingface.co/Kijai/WanVideo_comfy_fp8_scaled/blob/main/Wan22Animate/Wan2_2-Animate-14B_fp8_scaled_e4m3fn_KJ_v2.safetensors)\n\n\nbf16:\n\n[https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_animate_14B_bf16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_animate_14B_bf16.safetensors)\n\n\nLoRA:\n\n[https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Lightx2v](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Lightx2v)\n\n[https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/loras/wan2.2_animate_14B_relight_lora_bf16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/loras/wan2.2_animate_14B_relight_lora_bf16.safetensors)\n\nText encoder:\n\n[https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors)\n\nVAE:\n\n[https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors)"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    }
+  ],
+  "links": [
+    [
+      82,
+      57,
+      0,
+      64,
+      0,
+      "IMAGE"
+    ],
+    [
+      87,
+      42,
+      0,
+      66,
+      0,
+      "IMAGE"
+    ],
+    [
+      89,
+      66,
+      0,
+      30,
+      0,
+      "IMAGE"
+    ],
+    [
+      107,
+      77,
+      0,
+      66,
+      1,
+      "IMAGE"
+    ],
+    [
+      185,
+      102,
+      0,
+      104,
+      0,
+      "SAM2MODEL"
+    ],
+    [
+      231,
+      64,
+      0,
+      128,
+      0,
+      "*"
+    ],
+    [
+      233,
+      99,
+      0,
+      130,
+      0,
+      "*"
+    ],
+    [
+      236,
+      134,
+      0,
+      77,
+      0,
+      "IMAGE"
+    ],
+    [
+      239,
+      137,
+      0,
+      77,
+      1,
+      "IMAGE"
+    ],
+    [
+      243,
+      140,
+      0,
+      77,
+      2,
+      "IMAGE"
+    ],
+    [
+      245,
+      108,
+      0,
+      142,
+      0,
+      "*"
+    ],
+    [
+      246,
+      142,
+      0,
+      99,
+      1,
+      "MASK"
+    ],
+    [
+      248,
+      63,
+      0,
+      144,
+      0,
+      "*"
+    ],
+    [
+      249,
+      145,
+      0,
+      77,
+      3,
+      "IMAGE"
+    ],
+    [
+      250,
+      146,
+      0,
+      99,
+      0,
+      "IMAGE"
+    ],
+    [
+      251,
+      130,
+      0,
+      75,
+      0,
+      "IMAGE"
+    ],
+    [
+      255,
+      63,
+      2,
+      148,
+      0,
+      "*"
+    ],
+    [
+      256,
+      149,
+      0,
+      30,
+      1,
+      "AUDIO"
+    ],
+    [
+      257,
+      150,
+      0,
+      63,
+      2,
+      "INT"
+    ],
+    [
+      258,
+      151,
+      0,
+      63,
+      3,
+      "INT"
+    ],
+    [
+      263,
+      150,
+      0,
+      153,
+      0,
+      "*"
+    ],
+    [
+      264,
+      151,
+      0,
+      154,
+      0,
+      "*"
+    ],
+    [
+      267,
+      63,
+      1,
+      157,
+      0,
+      "*"
+    ],
+    [
+      286,
+      150,
+      0,
+      64,
+      2,
+      "INT"
+    ],
+    [
+      287,
+      151,
+      0,
+      64,
+      3,
+      "INT"
+    ],
+    [
+      290,
+      178,
+      0,
+      172,
+      0,
+      "POSEMODEL"
+    ],
+    [
+      294,
+      172,
+      0,
+      173,
+      0,
+      "POSEDATA"
+    ],
+    [
+      306,
+      144,
+      0,
+      180,
+      0,
+      "IMAGE"
+    ],
+    [
+      307,
+      180,
+      1,
+      173,
+      1,
+      "INT"
+    ],
+    [
+      308,
+      180,
+      2,
+      173,
+      2,
+      "INT"
+    ],
+    [
+      309,
+      180,
+      0,
+      172,
+      1,
+      "IMAGE"
+    ],
+    [
+      310,
+      180,
+      1,
+      172,
+      3,
+      "INT"
+    ],
+    [
+      311,
+      180,
+      2,
+      172,
+      4,
+      "INT"
+    ],
+    [
+      314,
+      104,
+      0,
+      182,
+      0,
+      "MASK"
+    ],
+    [
+      315,
+      182,
+      0,
+      108,
+      0,
+      "MASK"
+    ],
+    [
+      316,
+      180,
+      0,
+      104,
+      1,
+      "IMAGE"
+    ],
+    [
+      317,
+      172,
+      1,
+      183,
+      0,
+      "*"
+    ],
+    [
+      318,
+      183,
+      0,
+      174,
+      0,
+      "IMAGE"
+    ],
+    [
+      319,
+      173,
+      0,
+      184,
+      0,
+      "*"
+    ],
+    [
+      320,
+      184,
+      0,
+      181,
+      0,
+      "IMAGE"
+    ],
+    [
+      321,
+      172,
+      3,
+      104,
+      4,
+      "BBOX"
+    ],
+    [
+      322,
+      188,
+      0,
+      189,
+      0,
+      "*"
+    ],
+    [
+      323,
+      190,
+      0,
+      191,
+      0,
+      "MODEL"
+    ],
+    [
+      324,
+      187,
+      0,
+      190,
+      0,
+      "MODEL"
+    ],
+    [
+      325,
+      191,
+      0,
+      192,
+      0,
+      "MODEL"
+    ],
+    [
+      326,
+      195,
+      0,
+      194,
+      0,
+      "NOISE"
+    ],
+    [
+      331,
+      193,
+      0,
+      198,
+      0,
+      "CLIP"
+    ],
+    [
+      332,
+      192,
+      0,
+      200,
+      0,
+      "MODEL"
+    ],
+    [
+      334,
+      198,
+      0,
+      201,
+      0,
+      "CONDITIONING"
+    ],
+    [
+      336,
+      199,
+      0,
+      200,
+      1,
+      "CONDITIONING"
+    ],
+    [
+      337,
+      199,
+      1,
+      200,
+      2,
+      "CONDITIONING"
+    ],
+    [
+      338,
+      198,
+      0,
+      199,
+      0,
+      "CONDITIONING"
+    ],
+    [
+      339,
+      199,
+      2,
+      194,
+      4,
+      "LATENT"
+    ],
+    [
+      340,
+      202,
+      0,
+      199,
+      2,
+      "VAE"
+    ],
+    [
+      345,
+      162,
+      0,
+      204,
+      1,
+      "VAE"
+    ],
+    [
+      346,
+      204,
+      0,
+      42,
+      0,
+      "IMAGE"
+    ],
+    [
+      347,
+      200,
+      0,
+      194,
+      1,
+      "GUIDER"
+    ],
+    [
+      348,
+      205,
+      0,
+      194,
+      2,
+      "SAMPLER"
+    ],
+    [
+      349,
+      206,
+      0,
+      194,
+      3,
+      "SIGMAS"
+    ],
+    [
+      350,
+      192,
+      0,
+      206,
+      0,
+      "MODEL"
+    ],
+    [
+      351,
+      133,
+      0,
+      199,
+      4,
+      "IMAGE"
+    ],
+    [
+      352,
+      138,
+      0,
+      199,
+      5,
+      "IMAGE"
+    ],
+    [
+      353,
+      141,
+      0,
+      199,
+      6,
+      "IMAGE"
+    ],
+    [
+      354,
+      131,
+      0,
+      199,
+      7,
+      "IMAGE"
+    ],
+    [
+      355,
+      143,
+      0,
+      199,
+      8,
+      "MASK"
+    ],
+    [
+      356,
+      155,
+      0,
+      199,
+      10,
+      "INT"
+    ],
+    [
+      357,
+      156,
+      0,
+      199,
+      11,
+      "INT"
+    ],
+    [
+      358,
+      158,
+      0,
+      199,
+      12,
+      "INT"
+    ],
+    [
+      359,
+      201,
+      0,
+      199,
+      1,
+      "CONDITIONING"
+    ],
+    [
+      360,
+      194,
+      0,
+      207,
+      0,
+      "LATENT"
+    ],
+    [
+      361,
+      199,
+      3,
+      207,
+      1,
+      "INT"
+    ],
+    [
+      362,
+      207,
+      0,
+      204,
+      0,
+      "LATENT"
+    ]
+  ],
+  "groups": [
+    {
+      "id": 1,
+      "title": "Reference Image",
+      "bounding": [
+        -1209.306884765625,
+        -1833.3065185546875,
+        990.079833984375,
+        724.450439453125
+      ],
+      "color": "#3f789e",
+      "font_size": 24,
+      "flags": {}
+    },
+    {
+      "id": 3,
+      "title": "Preprocessing",
+      "bounding": [
+        -1227.062744140625,
+        -3202.685302734375,
+        4104.810546875,
+        1281.6610107421875
+      ],
+      "color": "#3f789e",
+      "font_size": 24,
+      "flags": {}
+    },
+    {
+      "id": 4,
+      "title": "Models",
+      "bounding": [
+        -1224.449951171875,
+        -1055.772705078125,
+        2156.392578125,
+        1012.5536499023438
+      ],
+      "color": "#88A",
+      "font_size": 24,
+      "flags": {}
+    },
+    {
+      "id": 5,
+      "title": "Result collage",
+      "bounding": [
+        2370.66357421875,
+        -1369.016845703125,
+        629.7467041015625,
+        605.8086547851562
+      ],
+      "color": "#3f789e",
+      "font_size": 24,
+      "flags": {}
+    }
+  ],
+  "config": {},
+  "extra": {
+    "ds": {
+      "scale": 0.39142513012204794,
+      "offset": [
+        2614.7614997967285,
+        1809.0478995517838
+      ]
+    },
+    "frontendVersion": "1.28.1",
+    "node_versions": {
+      "ComfyUI-WanVideoWrapper": "5a2383621a05825d0d0437781afcb8552d9590fd",
+      "ComfyUI-KJNodes": "a5bd3c86c8ed6b83c55c2d0e7a59515b15a0137f",
+      "ComfyUI-VideoHelperSuite": "0a75c7958fe320efcb052f1d9f8451fd20c730a8"
+    },
+    "VHS_latentpreview": true,
+    "VHS_latentpreviewrate": 0,
+    "VHS_MetadataImage": true,
+    "VHS_KeepIntermediate": true
+  },
+  "version": 0.4
+}

ComfyUI-WanAnimatePreprocess/models/onnx_models.py ADDED Viewed

	@@ -0,0 +1,282 @@

+# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
+import cv2
+import numpy as np
+import torch
+import onnxruntime
+from ..pose_utils.pose2d_utils import box_convert_simple, keypoints_from_heatmaps
+class SimpleOnnxInference(object):
+    def __init__(self, checkpoint, device='CUDAExecutionProvider', **kwargs):
+        # Store initialization parameters for potential reinit
+        self.checkpoint = checkpoint
+        self.init_kwargs = kwargs
+        provider = [device, 'CPUExecutionProvider'] if device == 'CUDAExecutionProvider' else [device]
+        self.provider = provider
+        self.session = onnxruntime.InferenceSession(checkpoint, providers=provider)
+        self.input_name = self.session.get_inputs()[0].name
+        self.output_name = self.session.get_outputs()[0].name
+        self.input_resolution = self.session.get_inputs()[0].shape[2:]
+        self.input_resolution = np.array(self.input_resolution)
+    def __call__(self, *args, **kwargs):
+        return self.forward(*args, **kwargs)
+    def get_output_names(self):
+        output_names = []
+        for node in self.session.get_outputs():
+            output_names.append(node.name)
+        return output_names
+    def cleanup(self):
+        if hasattr(self, 'session') and self.session is not None:
+            # Close the ONNX Runtime session
+            del self.session
+            self.session = None
+    def reinit(self, provider=None):
+        # Use provided provider or fall back to original provider
+        if provider is not None:
+            self.provider = provider
+        if self.session is None:
+            checkpoint = self.checkpoint
+            self.session = onnxruntime.InferenceSession(checkpoint, providers=self.provider)
+            self.input_name = self.session.get_inputs()[0].name
+            self.output_name = self.session.get_outputs()[0].name
+            self.input_resolution = self.session.get_inputs()[0].shape[2:]
+            self.input_resolution = np.array(self.input_resolution)
+class Yolo(SimpleOnnxInference):
+    def __init__(self, checkpoint, device='cuda', threshold_conf=0.05, threshold_multi_persons=0.1, input_resolution=(640, 640), threshold_iou=0.5, threshold_bbox_shape_ratio=0.4, cat_id=[1], select_type='max', strict=True, sorted_func=None, **kwargs):
+        super(Yolo, self).__init__(checkpoint, device=device, **kwargs)
+        model_inputs = self.session.get_inputs()
+        input_shape = model_inputs[0].shape
+        self.input_width = 640
+        self.input_height = 640
+        self.threshold_multi_persons = threshold_multi_persons
+        self.threshold_conf = threshold_conf
+        self.threshold_iou = threshold_iou
+        self.threshold_bbox_shape_ratio = threshold_bbox_shape_ratio
+        self.input_resolution = input_resolution
+        self.cat_id = cat_id
+        self.select_type = select_type
+        self.strict = strict
+        self.sorted_func = sorted_func
+    def postprocess(self, output, shape_raw, cat_id=[1]):
+        """
+        Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs.
+        Args:
+            input_image (numpy.ndarray): The input image.
+            output (numpy.ndarray): The output of the model.
+        Returns:
+            numpy.ndarray: The input image with detections drawn on it.
+        """
+        # Transpose and squeeze the output to match the expected shape
+        outputs = np.squeeze(output)
+        if len(outputs.shape) == 1:
+            outputs = outputs[None]
+        if output.shape[-1] != 6 and output.shape[1] == 84:
+            outputs = np.transpose(outputs)
+        # Get the number of rows in the outputs array
+        rows = outputs.shape[0]
+        # Calculate the scaling factors for the bounding box coordinates
+        x_factor = shape_raw[1] / self.input_width
+        y_factor = shape_raw[0] / self.input_height
+        # Lists to store the bounding boxes, scores, and class IDs of the detections
+        boxes = []
+        scores = []
+        class_ids = []
+        if outputs.shape[-1] == 6:
+            max_scores = outputs[:, 4]
+            classid = outputs[:, -1]
+            threshold_conf_masks = max_scores >= self.threshold_conf
+            classid_masks = classid[threshold_conf_masks] != 3.14159
+            max_scores = max_scores[threshold_conf_masks][classid_masks]
+            classid = classid[threshold_conf_masks][classid_masks]
+            boxes = outputs[:, :4][threshold_conf_masks][classid_masks]
+            boxes[:, [0, 2]] *= x_factor
+            boxes[:, [1, 3]] *= y_factor
+            boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+            boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+            boxes = boxes.astype(np.int32)
+        else:
+            classes_scores = outputs[:, 4:]
+            max_scores = np.amax(classes_scores, -1)
+            threshold_conf_masks = max_scores >= self.threshold_conf
+            classid = np.argmax(classes_scores[threshold_conf_masks], -1)
+            classid_masks = classid!=3.14159
+            classes_scores = classes_scores[threshold_conf_masks][classid_masks]
+            max_scores = max_scores[threshold_conf_masks][classid_masks]
+            classid = classid[classid_masks]
+            xywh = outputs[:, :4][threshold_conf_masks][classid_masks]
+            x = xywh[:, 0:1]
+            y = xywh[:, 1:2]
+            w = xywh[:, 2:3]
+            h = xywh[:, 3:4]
+            left = ((x - w / 2) * x_factor)
+            top = ((y - h / 2) * y_factor)
+            width = (w * x_factor)
+            height = (h * y_factor)
+            boxes = np.concatenate([left, top, width, height], axis=-1).astype(np.int32)
+        boxes = boxes.tolist()
+        scores = max_scores.tolist()
+        class_ids = classid.tolist()
+        # Apply non-maximum suppression to filter out overlapping bounding boxes
+        indices = cv2.dnn.NMSBoxes(boxes, scores, self.threshold_conf, self.threshold_iou)
+        # Iterate over the selected indices after non-maximum suppression
+        results = []
+        for i in indices:
+            # Get the box, score, and class ID corresponding to the index
+            box = box_convert_simple(boxes[i], 'xywh2xyxy')
+            score = scores[i]
+            class_id = class_ids[i]
+            results.append(box + [score] + [class_id])
+            # # Draw the detection on the input image
+        # Return the modified input image
+        return np.array(results)
+    def process_results(self, results, shape_raw, cat_id=[1], single_person=True):
+        if isinstance(results, tuple):
+            det_results = results[0]
+        else:
+            det_results = results
+        person_results = []
+        person_count = 0
+        if len(results):
+            max_idx = -1
+            max_bbox_size = shape_raw[0] * shape_raw[1] * -10
+            max_bbox_shape = -1
+            bboxes = []
+            idx_list = []
+            for i in range(results.shape[0]):
+                bbox = results[i]
+                if (bbox[-1] + 1 in cat_id) and (bbox[-2] > self.threshold_conf):
+                    idx_list.append(i)
+                    bbox_shape = max((bbox[2] - bbox[0]), ((bbox[3] - bbox[1])))
+                    if bbox_shape > max_bbox_shape:
+                        max_bbox_shape = bbox_shape
+            results = results[idx_list]
+            for i in range(results.shape[0]):
+                bbox = results[i]
+                bboxes.append(bbox)
+                if self.select_type == 'max':
+                    bbox_size = (bbox[2] - bbox[0]) * ((bbox[3] - bbox[1]))
+                elif self.select_type == 'center':
+                    bbox_size = (abs((bbox[2] + bbox[0]) / 2 - shape_raw[1]/2)) * -1
+                bbox_shape = max((bbox[2] - bbox[0]), ((bbox[3] - bbox[1])))
+                if bbox_size > max_bbox_size:
+                    if (self.strict or max_idx != -1) and bbox_shape < max_bbox_shape * self.threshold_bbox_shape_ratio:
+                        continue
+                    max_bbox_size = bbox_size
+                    max_bbox_shape = bbox_shape
+                    max_idx = i
+            if self.sorted_func is not None and len(bboxes) > 0:
+                max_idx = self.sorted_func(bboxes, shape_raw)
+                bbox = bboxes[max_idx]
+                if self.select_type == 'max':
+                    max_bbox_size = (bbox[2] - bbox[0]) * ((bbox[3] - bbox[1]))
+                elif self.select_type == 'center':
+                    max_bbox_size = (abs((bbox[2] + bbox[0]) / 2 - shape_raw[1]/2)) * -1
+            if max_idx != -1:
+                person_count = 1
+            if max_idx != -1:
+                person = {}
+                person['bbox'] = results[max_idx, :5]
+                person['track_id'] = int(0)
+                person_results.append(person)
+            for i in range(results.shape[0]):
+                bbox = results[i]
+                if (bbox[-1] + 1 in cat_id) and (bbox[-2] > self.threshold_conf):
+                    if self.select_type == 'max':
+                        bbox_size = (bbox[2] - bbox[0]) * ((bbox[3] - bbox[1]))
+                    elif self.select_type == 'center':
+                        bbox_size = (abs((bbox[2] + bbox[0]) / 2 - shape_raw[1]/2)) * -1
+                    if i != max_idx and bbox_size > max_bbox_size * self.threshold_multi_persons and bbox_size < max_bbox_size:
+                        person_count += 1
+                        if not single_person:
+                            person = {}
+                            person['bbox'] = results[i, :5]
+                            person['track_id'] = int(person_count - 1)
+                            person_results.append(person)
+            return person_results
+        else:
+            return None
+    def postprocess_threading(self, outputs, shape_raw, person_results, i, single_person=True, **kwargs):
+        result = self.postprocess(outputs[i], shape_raw[i], cat_id=self.cat_id)
+        result = self.process_results(result, shape_raw[i], cat_id=self.cat_id, single_person=single_person)
+        if result is not None and len(result) != 0:
+            person_results[i] = result
+    def forward(self, img, shape_raw, **kwargs):
+        """
+        Performs inference using an ONNX model and returns the output image with drawn detections.
+        Returns:
+            output_img: The output image with drawn detections.
+        """
+        if isinstance(img, torch.Tensor):
+            img = img.cpu().numpy()
+            shape_raw = shape_raw.cpu().numpy()
+        outputs = self.session.run(None, {self.session.get_inputs()[0].name: img})[0]
+        person_results = [[{'bbox': np.array([0., 0., 1.*shape_raw[i][1], 1.*shape_raw[i][0], -1]), 'track_id': -1}] for i in range(len(outputs))]
+        for i in range(len(outputs)):
+            self.postprocess_threading(outputs, shape_raw, person_results, i, **kwargs)
+        return person_results
+class ViTPose(SimpleOnnxInference):
+    def __init__(self, checkpoint, device='cuda', **kwargs):
+        super(ViTPose, self).__init__(checkpoint, device=device)
+    def forward(self, img, center, scale, **kwargs):
+        heatmaps = self.session.run([], {self.session.get_inputs()[0].name: img})[0]
+        points, prob = keypoints_from_heatmaps(heatmaps=heatmaps,
+                                            center=center,
+                                            scale=scale*200,
+                                            unbiased=True,
+                                            use_udp=False)
+        return np.concatenate([points, prob], axis=2)

ComfyUI-WanAnimatePreprocess/nodes.py ADDED Viewed

	@@ -0,0 +1,494 @@

+import os
+import torch
+from tqdm import tqdm
+import numpy as np
+import folder_paths
+import cv2
+import json
+import logging
+script_directory = os.path.dirname(os.path.abspath(__file__))
+from comfy import model_management as mm
+from comfy.utils import ProgressBar
+device = mm.get_torch_device()
+offload_device = mm.unet_offload_device()
+folder_paths.add_model_folder_path("detection", os.path.join(folder_paths.models_dir, "detection"))
+from .models.onnx_models import ViTPose, Yolo
+from .pose_utils.pose2d_utils import load_pose_metas_from_kp2ds_seq, crop, bbox_from_detector
+from .utils import get_face_bboxes, padding_resize, resize_by_area, resize_to_bounds
+from .pose_utils.human_visualization import AAPoseMeta, draw_aapose_by_meta_new
+from .retarget_pose import get_retarget_pose
+class OnnxDetectionModelLoader:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "vitpose_model": (folder_paths.get_filename_list("detection"), {"tooltip": "These models are loaded from the 'ComfyUI/models/detection' -folder",}),
+                "yolo_model": (folder_paths.get_filename_list("detection"), {"tooltip": "These models are loaded from the 'ComfyUI/models/detection' -folder",}),
+                "onnx_device": (["CUDAExecutionProvider", "CPUExecutionProvider"], {"default": "CUDAExecutionProvider", "tooltip": "Device to run the ONNX models on"}),
+            },
+        }
+    RETURN_TYPES = ("POSEMODEL",)
+    RETURN_NAMES = ("model", )
+    FUNCTION = "loadmodel"
+    CATEGORY = "WanAnimatePreprocess"
+    DESCRIPTION = "Loads ONNX models for pose and face detection. ViTPose for pose estimation and YOLO for object detection."
+    def loadmodel(self, vitpose_model, yolo_model, onnx_device):
+        vitpose_model_path = folder_paths.get_full_path_or_raise("detection", vitpose_model)
+        yolo_model_path = folder_paths.get_full_path_or_raise("detection", yolo_model)
+        vitpose = ViTPose(vitpose_model_path, onnx_device)
+        yolo = Yolo(yolo_model_path, onnx_device)
+        model = {
+            "vitpose": vitpose,
+            "yolo": yolo,
+        }
+        return (model, )
+class PoseAndFaceDetection:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "model": ("POSEMODEL",),
+                "images": ("IMAGE",),
+                "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 1, "tooltip": "Width of the generation"}),
+                "height": ("INT", {"default": 480, "min": 64, "max": 2048, "step": 1, "tooltip": "Height of the generation"}),
+            },
+            "optional": {
+                "retarget_image": ("IMAGE", {"default": None, "tooltip": "Optional reference image for pose retargeting"}),
+                "face_padding": ("INT", {"default": 0, "min": 0, "max": 512, "step": 1, "tooltip": "When > 0, the detected face images are padded and resized to 512x512"}),
+            },
+        }
+    RETURN_TYPES = ("POSEDATA", "IMAGE", "STRING", "BBOX", "BBOX,")
+    RETURN_NAMES = ("pose_data", "face_images", "key_frame_body_points", "bboxes", "face_bboxes")
+    FUNCTION = "process"
+    CATEGORY = "WanAnimatePreprocess"
+    DESCRIPTION = "Detects human poses and face images from input images. Optionally retargets poses based on a reference image."
+    def process(self, model, images, width, height, retarget_image=None, face_padding=0):
+        detector = model["yolo"]
+        pose_model = model["vitpose"]
+        B, H, W, C = images.shape
+        shape = np.array([H, W])[None]
+        images_np = images.numpy()
+        IMG_NORM_MEAN = np.array([0.485, 0.456, 0.406])
+        IMG_NORM_STD = np.array([0.229, 0.224, 0.225])
+        input_resolution=(256, 192)
+        rescale = 1.25
+        detector.reinit()
+        pose_model.reinit()
+        if retarget_image is not None:
+            refer_img = resize_by_area(retarget_image[0].numpy() * 255, width * height, divisor=16) / 255.0
+            ref_bbox = (detector(
+                cv2.resize(refer_img.astype(np.float32), (640, 640)).transpose(2, 0, 1)[None],
+                shape
+                )[0][0]["bbox"])
+            if ref_bbox is None or ref_bbox[-1] <= 0 or (ref_bbox[2] - ref_bbox[0]) < 10 or (ref_bbox[3] - ref_bbox[1]) < 10:
+                ref_bbox = np.array([0, 0, refer_img.shape[1], refer_img.shape[0]])
+            center, scale = bbox_from_detector(ref_bbox, input_resolution, rescale=rescale)
+            refer_img = crop(refer_img, center, scale, (input_resolution[0], input_resolution[1]))[0]
+            img_norm = (refer_img - IMG_NORM_MEAN) / IMG_NORM_STD
+            img_norm = img_norm.transpose(2, 0, 1).astype(np.float32)
+            ref_keypoints = pose_model(img_norm[None], np.array(center)[None], np.array(scale)[None])
+            refer_pose_meta = load_pose_metas_from_kp2ds_seq(ref_keypoints, width=retarget_image.shape[2], height=retarget_image.shape[1])[0]
+        comfy_pbar = ProgressBar(B*2)
+        progress = 0
+        bboxes = []
+        for img in tqdm(images_np, total=len(images_np), desc="Detecting bboxes"):
+            bboxes.append(detector(
+                cv2.resize(img, (640, 640)).transpose(2, 0, 1)[None],
+                shape
+                )[0][0]["bbox"])
+            progress += 1
+            if progress % 10 == 0:
+                comfy_pbar.update_absolute(progress)
+        detector.cleanup()
+        kp2ds = []
+        for img, bbox in tqdm(zip(images_np, bboxes), total=len(images_np), desc="Extracting keypoints"):
+            if bbox is None or bbox[-1] <= 0 or (bbox[2] - bbox[0]) < 10 or (bbox[3] - bbox[1]) < 10:
+                bbox = np.array([0, 0, img.shape[1], img.shape[0]])
+            bbox_xywh = bbox
+            center, scale = bbox_from_detector(bbox_xywh, input_resolution, rescale=rescale)
+            img = crop(img, center, scale, (input_resolution[0], input_resolution[1]))[0]
+            img_norm = (img - IMG_NORM_MEAN) / IMG_NORM_STD
+            img_norm = img_norm.transpose(2, 0, 1).astype(np.float32)
+            keypoints = pose_model(img_norm[None], np.array(center)[None], np.array(scale)[None])
+            kp2ds.append(keypoints)
+            progress += 1
+            if progress % 10 == 0:
+                comfy_pbar.update_absolute(progress)
+        pose_model.cleanup()
+        kp2ds = np.concatenate(kp2ds, 0)
+        pose_metas = load_pose_metas_from_kp2ds_seq(kp2ds, width=W, height=H)
+        face_images = []
+        face_bboxes = []
+        for idx, meta in enumerate(pose_metas):
+            face_bbox_for_image = get_face_bboxes(meta['keypoints_face'][:, :2], scale=1.3, image_shape=(H, W))
+            x1, x2, y1, y2 = face_bbox_for_image
+            if face_padding > 0:
+                x1 = max(0, x1 - face_padding)
+                y1 = max(0, y1 - face_padding)
+                x2 = min(W, x2 + face_padding)
+                y2 = min(H, y2 + face_padding)
+            face_bboxes.append((x1, y1, x2, y2))
+            face_image = images_np[idx][y1:y2, x1:x2]
+            # Check if face_image is valid before resizing
+            if face_image.size == 0 or face_image.shape[0] == 0 or face_image.shape[1] == 0:
+                logging.warning(f"Empty face crop on frame {idx}, creating fallback image.")
+                # Create a fallback image (black or use center crop)
+                fallback_size = int(min(H, W) * 0.3)
+                fallback_x1 = (W - fallback_size) // 2
+                fallback_x2 = fallback_x1 + fallback_size
+                fallback_y1 = int(H * 0.1)
+                fallback_y2 = fallback_y1 + fallback_size
+                face_image = images_np[idx][fallback_y1:fallback_y2, fallback_x1:fallback_x2]
+                # If still empty, create a black image
+                if face_image.size == 0:
+                    face_image = np.zeros((fallback_size, fallback_size, C), dtype=images_np.dtype)
+            face_image = cv2.resize(face_image, (512, 512))
+            face_images.append(face_image)
+        face_images_np = np.stack(face_images, 0)
+        face_images_tensor = torch.from_numpy(face_images_np)
+        if retarget_image is not None and refer_pose_meta is not None:
+            retarget_pose_metas = get_retarget_pose(pose_metas[0], refer_pose_meta, pose_metas, None, None)
+        else:
+            retarget_pose_metas = [AAPoseMeta.from_humanapi_meta(meta) for meta in pose_metas]
+        bbox = np.array(bboxes[0]).flatten()
+        if bbox.shape[0] >= 4:
+            bbox_ints = tuple(int(v) for v in bbox[:4])
+        else:
+            bbox_ints = (0, 0, 0, 0)
+        key_frame_num = 4 if B >= 4 else 1
+        key_frame_step = len(pose_metas) // key_frame_num
+        key_frame_index_list = list(range(0, len(pose_metas), key_frame_step))
+        key_points_index = [0, 1, 2, 5, 8, 11, 10, 13]
+        for key_frame_index in key_frame_index_list:
+            keypoints_body_list = []
+            body_key_points = pose_metas[key_frame_index]['keypoints_body']
+            for each_index in key_points_index:
+                each_keypoint = body_key_points[each_index]
+                if None is each_keypoint:
+                    continue
+                keypoints_body_list.append(each_keypoint)
+            keypoints_body = np.array(keypoints_body_list)[:, :2]
+            wh = np.array([[pose_metas[0]['width'], pose_metas[0]['height']]])
+            points = (keypoints_body * wh).astype(np.int32)
+            points_dict_list = []
+            for point in points:
+                points_dict_list.append({"x": int(point[0]), "y": int(point[1])})
+        pose_data = {
+            "retarget_image": refer_img if retarget_image is not None else None,
+            "pose_metas": retarget_pose_metas,
+            "refer_pose_meta": refer_pose_meta if retarget_image is not None else None,
+            "pose_metas_original": pose_metas,
+        }
+        return (pose_data, face_images_tensor, json.dumps(points_dict_list), [bbox_ints], face_bboxes)
+class DrawViTPose:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "pose_data": ("POSEDATA",),
+                "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 1, "tooltip": "Width of the generation"}),
+                "height": ("INT", {"default": 480, "min": 64, "max": 2048, "step": 1, "tooltip": "Height of the generation"}),
+                "retarget_padding": ("INT", {"default": 16, "min": 0, "max": 512, "step": 1, "tooltip": "When > 0, the retargeted pose image is padded and resized to the target size"}),
+                "body_stick_width": ("INT", {"default": -1, "min": -1, "max": 20, "step": 1, "tooltip": "Width of the body sticks. Set to 0 to disable body drawing, -1 for auto"}),
+                "hand_stick_width": ("INT", {"default": -1, "min": -1, "max": 20, "step": 1, "tooltip": "Width of the hand sticks. Set to 0 to disable hand drawing, -1 for auto"}),
+                "draw_head": ("BOOLEAN", {"default": "True", "tooltip": "Whether to draw head keypoints"}),
+            },
+        }
+    RETURN_TYPES = ("IMAGE", )
+    RETURN_NAMES = ("pose_images", )
+    FUNCTION = "process"
+    CATEGORY = "WanAnimatePreprocess"
+    DESCRIPTION = "Draws pose images from pose data."
+    def process(self, pose_data, width, height, body_stick_width, hand_stick_width, draw_head, retarget_padding=64):
+        retarget_image = pose_data.get("retarget_image", None)
+        pose_metas = pose_data["pose_metas"]
+        draw_hand = hand_stick_width != 0
+        use_retarget_resize = retarget_padding > 0 and retarget_image is not None
+        comfy_pbar = ProgressBar(len(pose_metas))
+        progress = 0
+        crop_target_image = None
+        pose_images = []
+        for idx, meta in enumerate(tqdm(pose_metas, desc="Drawing pose images")):
+            canvas = np.zeros((height, width, 3), dtype=np.uint8)
+            pose_image = draw_aapose_by_meta_new(canvas, meta, draw_hand=draw_hand, draw_head=draw_head, body_stick_width=body_stick_width, hand_stick_width=hand_stick_width)
+            if crop_target_image is None:
+                crop_target_image = pose_image
+            if use_retarget_resize:
+                pose_image = resize_to_bounds(pose_image, height, width, crop_target_image=crop_target_image, extra_padding=retarget_padding)
+            else:
+                pose_image = padding_resize(pose_image, height, width)
+            pose_images.append(pose_image)
+            progress += 1
+            if progress % 10 == 0:
+                comfy_pbar.update_absolute(progress)
+        pose_images_np = np.stack(pose_images, 0)
+        pose_images_tensor = torch.from_numpy(pose_images_np).float() / 255.0
+        return (pose_images_tensor, )
+class PoseRetargetPromptHelper:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "pose_data": ("POSEDATA",),
+            },
+        }
+    RETURN_TYPES = ("STRING", "STRING", )
+    RETURN_NAMES = ("prompt", "retarget_prompt", )
+    FUNCTION = "process"
+    CATEGORY = "WanAnimatePreprocess"
+    DESCRIPTION = "Generates text prompts for pose retargeting based on visibility of arms and legs in the template pose. Originally used for Flux Kontext"
+    def process(self, pose_data):
+        refer_pose_meta = pose_data.get("refer_pose_meta", None)
+        if refer_pose_meta is None:
+            return ("Change the person to face forward.", "Change the person to face forward.", )
+        tpl_pose_metas = pose_data["pose_metas_original"]
+        arm_visible = False
+        leg_visible = False
+        for tpl_pose_meta in tpl_pose_metas:
+            tpl_keypoints = tpl_pose_meta['keypoints_body']
+            tpl_keypoints = np.array(tpl_keypoints)
+            if np.any(tpl_keypoints[3]) != 0 or np.any(tpl_keypoints[4]) != 0 or np.any(tpl_keypoints[6]) != 0 or np.any(tpl_keypoints[7]) != 0:
+                if (tpl_keypoints[3][0] <= 1 and tpl_keypoints[3][1] <= 1 and tpl_keypoints[3][2] >= 0.75) or (tpl_keypoints[4][0] <= 1 and tpl_keypoints[4][1] <= 1 and tpl_keypoints[4][2] >= 0.75) or \
+                    (tpl_keypoints[6][0] <= 1 and tpl_keypoints[6][1] <= 1 and tpl_keypoints[6][2] >= 0.75) or (tpl_keypoints[7][0] <= 1 and tpl_keypoints[7][1] <= 1 and tpl_keypoints[7][2] >= 0.75):
+                    arm_visible = True
+            if np.any(tpl_keypoints[9]) != 0 or np.any(tpl_keypoints[12]) != 0 or np.any(tpl_keypoints[10]) != 0 or np.any(tpl_keypoints[13]) != 0:
+                if (tpl_keypoints[9][0] <= 1 and tpl_keypoints[9][1] <= 1 and tpl_keypoints[9][2] >= 0.75) or (tpl_keypoints[12][0] <= 1 and tpl_keypoints[12][1] <= 1 and tpl_keypoints[12][2] >= 0.75) or \
+                    (tpl_keypoints[10][0] <= 1 and tpl_keypoints[10][1] <= 1 and tpl_keypoints[10][2] >= 0.75) or (tpl_keypoints[13][0] <= 1 and tpl_keypoints[13][1] <= 1 and tpl_keypoints[13][2] >= 0.75):
+                    leg_visible = True
+            if arm_visible and leg_visible:
+                break
+        if leg_visible:
+            if tpl_pose_meta['width'] > tpl_pose_meta['height']:
+                tpl_prompt = "Change the person to a standard T-pose (facing forward with arms extended). The person is standing. Feet and Hands are visible in the image."
+            else:
+                tpl_prompt = "Change the person to a standard pose with the face oriented forward and arms extending straight down by the sides. The person is standing. Feet and Hands are visible in the image."
+            if refer_pose_meta['width'] > refer_pose_meta['height']:
+                refer_prompt = "Change the person to a standard T-pose (facing forward with arms extended). The person is standing. Feet and Hands are visible in the image."
+            else:
+                refer_prompt = "Change the person to a standard pose with the face oriented forward and arms extending straight down by the sides. The person is standing. Feet and Hands are visible in the image."
+        elif arm_visible:
+            if tpl_pose_meta['width'] > tpl_pose_meta['height']:
+                tpl_prompt = "Change the person to a standard T-pose (facing forward with arms extended). Hands are visible in the image."
+            else:
+                tpl_prompt = "Change the person to a standard pose with the face oriented forward and arms extending straight down by the sides. Hands are visible in the image."
+            if refer_pose_meta['width'] > refer_pose_meta['height']:
+                refer_prompt = "Change the person to a standard T-pose (facing forward with arms extended). Hands are visible in the image."
+            else:
+                refer_prompt = "Change the person to a standard pose with the face oriented forward and arms extending straight down by the sides. Hands are visible in the image."
+        else:
+            tpl_prompt = "Change the person to face forward."
+            refer_prompt = "Change the person to face forward."
+        return (tpl_prompt, refer_prompt, )
+class PoseDetectionOneToAllAnimation:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "model": ("POSEMODEL",),
+                "images": ("IMAGE",),
+                "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 2, "tooltip": "Width of the generation"}),
+                "height": ("INT", {"default": 480, "min": 64, "max": 2048, "step": 2, "tooltip": "Height of the generation"}),
+                "align_to": (["ref", "pose", "none"], {"default": "ref", "tooltip": "Alignment mode for poses"}),
+                "draw_face_points": (["full", "weak", "none"], {"default": "full", "tooltip": "Whether to draw face keypoints on the pose images"}),
+                "draw_head": (["full", "weak", "none"], {"default": "full", "tooltip": "Whether to draw head keypoints on the pose images"}),
+            },
+            "optional": {
+                "ref_image": ("IMAGE", {"default": None, "tooltip": "Optional reference image for pose retargeting"}),
+            },
+        }
+    RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE", "MASK",)
+    RETURN_NAMES = ("pose_images", "ref_pose_image", "ref_image", "ref_mask")
+    FUNCTION = "process"
+    CATEGORY = "WanAnimatePreprocess"
+    DESCRIPTION = "Specialized pose detection and alignment for OneToAllAnimation model https://github.com/ssj9596/One-to-All-Animation. Detects poses from input images and aligns them based on a reference image if provided."
+    def process(self, model, images, width, height, align_to, draw_face_points, draw_head, ref_image=None):
+        from .onetoall.infer_function import aaposemeta_to_dwpose, align_to_reference, align_to_pose
+        from .onetoall.utils import draw_pose_aligned, warp_ref_to_pose
+        detector = model["yolo"]
+        pose_model = model["vitpose"]
+        B, H, W, C = images.shape
+        shape = np.array([H, W])[None]
+        images_np = images.numpy()
+        IMG_NORM_MEAN = np.array([0.485, 0.456, 0.406])
+        IMG_NORM_STD = np.array([0.229, 0.224, 0.225])
+        input_resolution=(256, 192)
+        rescale = 1.25
+        detector.reinit()
+        pose_model.reinit()
+        if ref_image is not None:
+            refer_img_np = ref_image[0].numpy() * 255
+            refer_img = resize_by_area(refer_img_np, width * height, divisor=16) / 255.0
+            ref_bbox = (detector(
+                cv2.resize(refer_img.astype(np.float32), (640, 640)).transpose(2, 0, 1)[None],
+                shape
+                )[0][0]["bbox"])
+            if ref_bbox is None or ref_bbox[-1] <= 0 or (ref_bbox[2] - ref_bbox[0]) < 10 or (ref_bbox[3] - ref_bbox[1]) < 10:
+                ref_bbox = np.array([0, 0, refer_img.shape[1], refer_img.shape[0]])
+            center, scale = bbox_from_detector(ref_bbox, input_resolution, rescale=rescale)
+            refer_img = crop(refer_img, center, scale, (input_resolution[0], input_resolution[1]))[0]
+            img_norm = (refer_img - IMG_NORM_MEAN) / IMG_NORM_STD
+            img_norm = img_norm.transpose(2, 0, 1).astype(np.float32)
+            ref_keypoints = pose_model(img_norm[None], np.array(center)[None], np.array(scale)[None])
+            refer_pose_meta = load_pose_metas_from_kp2ds_seq(ref_keypoints, width=ref_image.shape[2], height=ref_image.shape[1])[0]
+            ref_dwpose = aaposemeta_to_dwpose(refer_pose_meta)
+        comfy_pbar = ProgressBar(B*2)
+        progress = 0
+        bboxes = []
+        for img in tqdm(images_np, total=len(images_np), desc="Detecting bboxes"):
+            bboxes.append(detector(
+                cv2.resize(img, (640, 640)).transpose(2, 0, 1)[None],
+                shape
+                )[0][0]["bbox"])
+            progress += 1
+            if progress % 10 == 0:
+                comfy_pbar.update_absolute(progress)
+        detector.cleanup()
+        kp2ds = []
+        for img, bbox in tqdm(zip(images_np, bboxes), total=len(images_np), desc="Extracting keypoints"):
+            if bbox is None or bbox[-1] <= 0 or (bbox[2] - bbox[0]) < 10 or (bbox[3] - bbox[1]) < 10:
+                bbox = np.array([0, 0, img.shape[1], img.shape[0]])
+            bbox_xywh = bbox
+            center, scale = bbox_from_detector(bbox_xywh, input_resolution, rescale=rescale)
+            img = crop(img, center, scale, (input_resolution[0], input_resolution[1]))[0]
+            img_norm = (img - IMG_NORM_MEAN) / IMG_NORM_STD
+            img_norm = img_norm.transpose(2, 0, 1).astype(np.float32)
+            keypoints = pose_model(img_norm[None], np.array(center)[None], np.array(scale)[None])
+            kp2ds.append(keypoints)
+            progress += 1
+            if progress % 10 == 0:
+                comfy_pbar.update_absolute(progress)
+        pose_model.cleanup()
+        kp2ds = np.concatenate(kp2ds, 0)
+        pose_metas = load_pose_metas_from_kp2ds_seq(kp2ds, width=W, height=H)
+        tpl_dwposes = [aaposemeta_to_dwpose(meta) for meta in pose_metas]
+        ref_pose_image_tensor = None
+        if ref_image is not None:
+            if align_to == "ref":
+                ref_pose_image =  draw_pose_aligned(ref_dwpose, height, width, without_face=True)
+                ref_pose_image_np = np.stack(ref_pose_image, 0)
+                ref_pose_image_tensor = torch.from_numpy(ref_pose_image_np).unsqueeze(0).float() / 255.0
+                tpl_dwposes = align_to_reference(refer_pose_meta, pose_metas, tpl_dwposes, anchor_idx=0)
+                image_input_tensor = ref_image
+                image_mask_tensor = torch.zeros(1, ref_image.shape[1], ref_image.shape[2], dtype=torch.float32, device="cpu")
+            elif align_to == "pose":
+                image_input, ref_pose_image_np, image_mask = warp_ref_to_pose(refer_img_np, tpl_dwposes[0], ref_dwpose)
+                ref_pose_image_np = np.stack(ref_pose_image_np, 0)
+                ref_pose_image_tensor = torch.from_numpy(ref_pose_image_np).unsqueeze(0).float() / 255.0
+                tpl_dwposes = align_to_pose(ref_dwpose, tpl_dwposes, anchor_idx=0)
+                image_input_tensor = torch.from_numpy(image_input).unsqueeze(0).float() / 255.0
+                image_mask_tensor = torch.from_numpy(image_mask).unsqueeze(0).float() / 255.0
+            elif align_to == "none":
+                ref_pose_image =  draw_pose_aligned(ref_dwpose, height, width, without_face=True)
+                ref_pose_image_np = np.stack(ref_pose_image, 0)
+                ref_pose_image_tensor = torch.from_numpy(ref_pose_image_np).unsqueeze(0).float() / 255.0
+                image_input_tensor = ref_image
+                image_mask_tensor = torch.zeros(1, ref_image.shape[1], ref_image.shape[2], dtype=torch.float32, device="cpu")
+        else:
+            ref_pose_image_tensor = torch.zeros((1, height, width, 3), dtype=torch.float32, device="cpu")
+            image_input_tensor = torch.zeros((1, height, width, 3), dtype=torch.float32, device="cpu")
+            image_mask_tensor = torch.zeros(1, height, width, dtype=torch.float32, device="cpu")
+        pose_imgs = []
+        for pose_np in tpl_dwposes:
+            pose_img = draw_pose_aligned(pose_np, height, width, without_face=(draw_face_points=="none"), face_change=(draw_face_points=="weak"), head_strength=draw_head)
+            pose_img = torch.from_numpy(np.array(pose_img))
+            pose_imgs.append(pose_img)
+        pose_tensor = torch.stack(pose_imgs).cpu().float() / 255.0
+        return (pose_tensor, ref_pose_image_tensor, image_input_tensor, image_mask_tensor)
+NODE_CLASS_MAPPINGS = {
+    "OnnxDetectionModelLoader": OnnxDetectionModelLoader,
+    "PoseAndFaceDetection": PoseAndFaceDetection,
+    "DrawViTPose": DrawViTPose,
+    "PoseRetargetPromptHelper": PoseRetargetPromptHelper,
+    "PoseDetectionOneToAllAnimation": PoseDetectionOneToAllAnimation,
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "OnnxDetectionModelLoader": "ONNX Detection Model Loader",
+    "PoseAndFaceDetection": "Pose and Face Detection",
+    "DrawViTPose": "Draw ViT Pose",
+    "PoseRetargetPromptHelper": "Pose Retarget Prompt Helper",
+    "PoseDetectionOneToAllAnimation": "Pose Detection OneToAll Animation",
+}

ComfyUI-WanAnimatePreprocess/onetoall/infer_function.py ADDED Viewed

	@@ -0,0 +1,508 @@

+# https://github.com/ssj9596/One-to-All-Animation
+import numpy as np
+import copy
+from ..retarget_pose import get_retarget_pose
+L_EYE_IDXS = list(range(36, 42))
+R_EYE_IDXS = list(range(42, 48))
+NOSE_TIP = 30
+MOUTH_L = 48
+MOUTH_R = 54
+JAW_LINE = list(range(0, 17))
+# ===========================Convert wanpose format into our dwpose-like format======================
+def aaposemeta_to_dwpose(meta):
+    candidate_body = meta['keypoints_body'][:-2][:, :2]
+    score_body = meta['keypoints_body'][:-2][:, 2]
+    subset_body = np.arange(len(candidate_body), dtype=float)
+    subset_body[score_body <= 0] = -1
+    bodies = {
+        "candidate": candidate_body,
+        "subset": np.expand_dims(subset_body, axis=0),   # shape (1, N)
+        "score": np.expand_dims(score_body, axis=0)      # shape (1, N)
+    }
+    hands_coords = np.stack([
+        meta['keypoints_right_hand'][:, :2],
+        meta['keypoints_left_hand'][:, :2]
+    ])
+    hands_score = np.stack([
+        meta['keypoints_right_hand'][:, 2],
+        meta['keypoints_left_hand'][:, 2]
+    ])
+    faces_coords = np.expand_dims(meta['keypoints_face'][1:][:, :2], axis=0)
+    faces_score = np.expand_dims(meta['keypoints_face'][1:][:, 2], axis=0)
+    dwpose_format = {
+        "bodies": bodies,
+        "hands": hands_coords,
+        "hands_score": hands_score,
+        "faces": faces_coords,
+        "faces_score": faces_score
+    }
+    return dwpose_format
+def aaposemeta_obj_to_dwpose(pose_meta):
+    """
+    Convert an AAPoseMeta object into a dwpose-like data structure
+    Restore coordinates to relative coordinates (divide by width, height)
+    Only handle None -> fill with zeros
+    """
+    w = pose_meta.width
+    h = pose_meta.height
+    # If None, fill with all zeros
+    def safe(arr, like_shape):
+        if arr is None:
+            return np.zeros(like_shape, dtype=np.float32)
+        arr_np = np.array(arr, dtype=np.float32)
+        arr_np = np.nan_to_num(arr_np, nan=0.0)
+        return arr_np
+    # body
+    kps_body = safe(pose_meta.kps_body, (pose_meta.kps_body_p.shape[0], 2))
+    candidate_body = kps_body / np.array([w, h])
+    score_body = safe(pose_meta.kps_body_p, (candidate_body.shape[0],))
+    subset_body = np.arange(len(candidate_body), dtype=float)
+    subset_body[score_body <= 0] = -1
+    bodies = {
+        "candidate": candidate_body,
+        "subset": np.expand_dims(subset_body, axis=0),
+        "score": np.expand_dims(score_body, axis=0)
+    }
+    # hands
+    kps_rhand = safe(pose_meta.kps_rhand, (pose_meta.kps_rhand_p.shape[0], 2))
+    kps_lhand = safe(pose_meta.kps_lhand, (pose_meta.kps_lhand_p.shape[0], 2))
+    hands_coords = np.stack([
+        kps_rhand / np.array([w, h]),
+        kps_lhand / np.array([w, h])
+    ])
+    hands_score = np.stack([
+        safe(pose_meta.kps_rhand_p, (kps_rhand.shape[0],)),
+        safe(pose_meta.kps_lhand_p, (kps_lhand.shape[0],))
+    ])
+    dwpose_format = {
+        "bodies": bodies,
+        "hands": hands_coords,
+        "hands_score": hands_score,
+        "faces": None,
+        "faces_score": None
+    }
+    return dwpose_format
+# ===============================Face Rough alignment======================
+def _to_68x2(arr):
+    if arr.shape == (1, 68, 2):
+        def to_orig(x):
+            x = np.asarray(x, dtype=np.float64)
+            if x.shape != (68, 2):
+                raise ValueError("to_orig expects (68,2)")
+            return x[np.newaxis, :, :]
+        return arr[0].astype(np.float64), to_orig
+    if arr.shape == (68, 2):
+        def to_orig(x):
+            x = np.asarray(x, dtype=np.float64)
+            if x.shape != (68, 2):
+                raise ValueError("to_orig expects (68,2)")
+            return x
+        return arr.astype(np.float64), to_orig
+    if arr.shape == (2, 68):
+        def to_orig(x):
+            x = np.asarray(x, dtype=np.float64)
+            if x.shape != (68, 2):
+                raise ValueError("to_orig expects (68,2)")
+            return x.T
+        return arr.T.astype(np.float64), to_orig
+    raise ValueError(f"faces shape {arr.shape} not supported; expected (1,68,2) or (68,2) or (2,68)")
+def _eye_center(face68, idxs):
+    return face68[idxs].mean(axis=0)
+def _anchors(face68):
+    le = _eye_center(face68, L_EYE_IDXS)
+    re = _eye_center(face68, R_EYE_IDXS)
+    nose = face68[NOSE_TIP]
+    lm = face68[MOUTH_L]
+    rm = face68[MOUTH_R]
+    if re[0] < le[0]:
+        le, re = re, le
+    return np.stack([le, re, nose, lm, rm], axis=0)
+def _face_scale_only(src68, ref68, target_nose_pos, alpha=1.0, anchor_pairs=[[36, 45], [27, 8]]):
+    """
+    Rough alignment - adjust the shape of the source face according to the proportions of the reference, and align the nose tip to target_nose_pos.
+    anchor_pairs:
+      - [36, 45] for x
+      - [27, 8] for y
+    """
+    src = np.asarray(src68, dtype=np.float64)
+    ref = np.asarray(ref68, dtype=np.float64)
+    center = _anchors(src).mean(axis=0)
+    src_centered = src - center
+    src_w = np.linalg.norm(src[anchor_pairs[0][0]] - src[anchor_pairs[0][1]])
+    ref_w = np.linalg.norm(ref[anchor_pairs[0][0]] - ref[anchor_pairs[0][1]])
+    src_h = np.linalg.norm(src[anchor_pairs[1][0]] - src[anchor_pairs[1][1]])
+    ref_h = np.linalg.norm(ref[anchor_pairs[1][0]] - ref[anchor_pairs[1][1]])
+    scale_x = ref_w / src_w if src_w > 1e-6 else 1.0
+    scale_y = ref_h / src_h if src_h > 1e-6 else 1.0
+    scaled_local = src_centered.copy()
+    scaled_local[:, 0] *= (1 - alpha) + scale_x * alpha
+    scaled_local[:, 1] *= (1 - alpha) + scale_y * alpha
+    scaled_global = scaled_local + center
+    nose_idx = NOSE_TIP
+    current_nose = scaled_global[nose_idx]
+    offset = target_nose_pos - current_nose
+    scaled_global += offset
+    return scaled_global
+# ===============================Reference Img Pre-Process======================
+def scale_and_translate_pose(tgt_pose, ref_pose, conf_th=0.9, return_ratio=False):
+    aligned_pose = copy.deepcopy(tgt_pose)
+    th = 1e-6
+    ref_kpt = ref_pose['bodies']['candidate'].astype(np.float32)
+    tgt_kpt = aligned_pose['bodies']['candidate'].astype(np.float32)
+    ref_sc = ref_pose['bodies'].get('score', np.ones(ref_kpt.shape[0])).astype(np.float32).reshape(-1)
+    tgt_sc = tgt_pose['bodies'].get('score', np.ones(tgt_kpt.shape[0])).astype(np.float32).reshape(-1)
+    ref_shoulder_valid = (ref_sc[2] >= conf_th) and (ref_sc[5] >= conf_th)
+    tgt_shoulder_valid = (tgt_sc[2] >= conf_th) and (tgt_sc[5] >= conf_th)
+    shoulder_ok = ref_shoulder_valid and tgt_shoulder_valid
+    ref_hip_valid = (ref_sc[8] >= conf_th) and (ref_sc[11] >= conf_th)
+    tgt_hip_valid = (tgt_sc[8] >= conf_th) and (tgt_sc[11] >= conf_th)
+    hip_ok = ref_hip_valid and tgt_hip_valid
+    if shoulder_ok and hip_ok:
+        ref_shoulder_w = abs(ref_kpt[5, 0] - ref_kpt[2, 0])
+        tgt_shoulder_w = abs(tgt_kpt[5, 0] - tgt_kpt[2, 0])
+        x_ratio = ref_shoulder_w / tgt_shoulder_w if tgt_shoulder_w > th else 1.0
+        ref_torso_h = abs(np.mean(ref_kpt[[8, 11], 1]) - np.mean(ref_kpt[[2, 5], 1]))
+        tgt_torso_h = abs(np.mean(tgt_kpt[[8, 11], 1]) - np.mean(tgt_kpt[[2, 5], 1]))
+        y_ratio = ref_torso_h / tgt_torso_h if tgt_torso_h > th else 1.0
+        scale_ratio = (x_ratio + y_ratio) / 2
+    elif shoulder_ok:
+        ref_sh_dist = np.linalg.norm(ref_kpt[2] - ref_kpt[5])
+        tgt_sh_dist = np.linalg.norm(tgt_kpt[2] - tgt_kpt[5])
+        scale_ratio = ref_sh_dist / tgt_sh_dist if tgt_sh_dist > th else 1.0
+    else:
+        ref_ear_dist = np.linalg.norm(ref_kpt[16] - ref_kpt[17])
+        tgt_ear_dist = np.linalg.norm(tgt_kpt[16] - tgt_kpt[17])
+        scale_ratio = ref_ear_dist / tgt_ear_dist if tgt_ear_dist > th else 1.0
+    if return_ratio:
+        return scale_ratio
+    # scale
+    anchor_idx = 1
+    anchor_pt_before_scale = tgt_kpt[anchor_idx].copy()
+    def scale(arr):
+        if arr is not None and arr.size > 0:
+            arr[..., 0] = anchor_pt_before_scale[0] + (arr[..., 0] - anchor_pt_before_scale[0]) * scale_ratio
+            arr[..., 1] = anchor_pt_before_scale[1] + (arr[..., 1] - anchor_pt_before_scale[1]) * scale_ratio
+    scale(tgt_kpt)
+    scale(aligned_pose.get('faces'))
+    scale(aligned_pose.get('hands'))
+    # offset
+    offset = ref_kpt[anchor_idx] - tgt_kpt[anchor_idx]
+    def translate(arr):
+        if arr is not None and arr.size > 0:
+            arr += offset
+    translate(tgt_kpt)
+    translate(aligned_pose.get('faces'))
+    translate(aligned_pose.get('hands'))
+    aligned_pose['bodies']['candidate'] = tgt_kpt
+    return aligned_pose, shoulder_ok, hip_ok
+# ===============================Align to Ref Driven Pose Retarget ======================
+def align_to_reference(ref_pose_meta, tpl_pose_metas, tpl_dwposes, anchor_idx=None):
+    # pose retarget + face rough align
+    ref_pose_dw = aaposemeta_to_dwpose(ref_pose_meta)
+    best_idx = anchor_idx
+    tpl_pose_meta_best = tpl_pose_metas[best_idx]
+    tpl_retarget_pose_metas = get_retarget_pose(
+        tpl_pose_meta_best,
+        ref_pose_meta,
+        tpl_pose_metas,
+        None, None
+    )
+    retarget_dwposes = [aaposemeta_obj_to_dwpose(pm) for pm in tpl_retarget_pose_metas]
+    if ref_pose_dw['faces'] is not None:
+        ref68, _ = _to_68x2(ref_pose_dw['faces'])
+        for frame_idx, (tpl_dw, rt_dw) in enumerate(zip(tpl_dwposes, retarget_dwposes)):
+            if tpl_dw['faces'] is None:
+                continue
+            src68, to_orig = _to_68x2(tpl_dw['faces'])
+            target_nose_pos = rt_dw['bodies']['candidate'][0]
+            scaled68 = _face_scale_only(src68, ref68, target_nose_pos, alpha=1.0)
+            rt_dw['faces'] = to_orig(scaled68)
+            rt_dw['faces_score'] = tpl_dw['faces_score']
+    return retarget_dwposes
+# ===============================Rescale-Ref && Change part of pose(Option)======================
+def compute_ratios_stepwise(ref_scores, source_scores, ref_pts, src_pts, conf_th=0.9, th=1e-6):
+    def keypoint_valid(idx):
+        return ref_scores[0, idx] >= conf_th and source_scores[0, idx] >= conf_th
+    def safe_ratio(p1, p2):
+        len_ref = np.linalg.norm(ref_pts[p1] - ref_pts[p2])
+        len_src = np.linalg.norm(src_pts[p1] - src_pts[p2])
+        if len_src > th:
+            return len_ref / len_src
+        else:
+            return 1.0
+    ratio_pairs = [
+        (0,1),(1,2),(1,5),(2,3),(3,4),(5,6),(6,7),
+        (0,14),(0,15),(14,16),(15,17),
+        (8,9),(9,10),(11,12),(12,13),
+        (1,8),(1,11)
+    ]
+    ratios = {p: 1.0 for p in ratio_pairs}
+    parent_map = {
+        (3, 4): (2, 3),
+        (6, 7): (5, 6),
+        (9, 10): (8, 9),
+        (12, 13): (11, 12)
+    }
+    # Group 1 — head only
+    if all(keypoint_valid(i) for i in [0,1,14,15,16,17]):
+        ratios[(0,1)]  = safe_ratio(0,1)
+        ratios[(0,14)] = safe_ratio(0,14)
+        ratios[(0,15)] = safe_ratio(0,15)
+        ratios[(14,16)]= safe_ratio(14,16)
+        ratios[(15,17)]= safe_ratio(15,17)
+    # Group 2 — +shoulder
+    if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17]):
+        ratios[(1,2)] = safe_ratio(1,2)
+        ratios[(1,5)] = safe_ratio(1,5)
+    # Group 3 — +upper arm
+    if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6]):
+        ratios[(2,3)] = safe_ratio(2,3)
+        ratios[(5,6)] = safe_ratio(5,6)
+        ratios[(3,4)] = ratios[parent_map[(3,4)]]
+        ratios[(6,7)] = ratios[parent_map[(6,7)]]
+    # Group 4 — +hips
+    if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11]):
+        ratios[(1,8)] = safe_ratio(1,8)
+        ratios[(1,11)] = safe_ratio(1,11)
+    # Group 5 — forearm own
+    if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11,4,7]):
+        ratios[(3,4)] = safe_ratio(3,4)
+        ratios[(6,7)] = safe_ratio(6,7)
+    # Group 6 — knees
+    if all(keypoint_valid(i) for i in [0,1,2,5,14,15,16,17,3,6,8,11,4,7,9,12]):
+        ratios[(8,9)] = safe_ratio(8,9)
+        ratios[(11,12)] = safe_ratio(11,12)
+        ratios[(9,10)] = ratios[parent_map[(9,10)]]
+        ratios[(12,13)]= ratios[parent_map[(12,13)]]
+    # Full body — all ratios
+    if all(keypoint_valid(i) for i in range(18)):
+        for p in ratio_pairs:
+            ratios[p] = safe_ratio(*p)
+    symmetric_pairs = [
+        ((1, 2), (1, 5)),    # 两肩
+        ((2, 3), (5, 6)),    # 上臂
+        ((3, 4), (6, 7)),    # 前臂
+        ((8, 9), (11, 12)),  # 大腿
+        ((9, 10), (12, 13))  # 小腿
+    ]
+    for left_key, right_key in symmetric_pairs:
+        left_val = ratios.get(left_key)
+        right_val = ratios.get(right_key)
+        if left_val is not None and right_val is not None:
+            avg_val = (left_val + right_val) / 2.0
+            ratios[left_key] = avg_val
+            ratios[right_key] = avg_val
+    eye_pairs = [
+        ((13, 15), (14, 16))
+    ]
+    for left_key, right_key in eye_pairs:
+        left_val = ratios.get(left_key)
+        right_val = ratios.get(right_key)
+        if left_val is not None and right_val is not None:
+            avg_val = (left_val + right_val) / 2.0
+            ratios[left_key] = avg_val
+            ratios[right_key] = avg_val
+    return ratios
+def align_to_pose(ref_dwpose, tpl_dwposes,anchor_idx=None,conf_th=0.9,):
+    detected_poses = copy.deepcopy(tpl_dwposes)
+    best_pose = tpl_dwposes[anchor_idx]
+    ref_pose_scaled, _, _ = scale_and_translate_pose(ref_dwpose, best_pose, conf_th=conf_th)
+    ref_candidate = ref_pose_scaled['bodies']['candidate'].astype(np.float32)
+    ref_scores    = ref_pose_scaled['bodies']['score'].astype(np.float32)
+    source_candidate = best_pose['bodies']['candidate'].astype(np.float32)
+    source_scores = best_pose['bodies']['score'].astype(np.float32)
+    has_ref_face = 'faces' in ref_pose_scaled and ref_pose_scaled['faces'] is not None and ref_pose_scaled['faces'].size > 0
+    if has_ref_face:
+        try:
+            ref68, _ = _to_68x2(ref_pose_scaled['faces'])
+        except Exception as e:
+            print("Reference face conversion failed:", e)
+            has_ref_face = False
+    ratios = compute_ratios_stepwise(ref_scores, source_scores, ref_candidate, source_candidate, conf_th=conf_th, th=1e-6)
+    for pose in detected_poses:
+        candidate = pose['bodies']['candidate']
+        hands = pose['hands']
+        # ===== Neck =====
+        ratio = ratios[(0, 1)]
+        x_offset = (candidate[1][0] - candidate[0][0]) * (1. - ratio)
+        y_offset = (candidate[1][1] - candidate[0][1]) * (1. - ratio)
+        candidate[[0, 14, 15, 16, 17], 0] += x_offset
+        candidate[[0, 14, 15, 16, 17], 1] += y_offset
+        # ===== Shoulder Right =====
+        ratio = ratios[(1, 2)]
+        x_offset = (candidate[1][0] - candidate[2][0]) * (1. - ratio)
+        y_offset = (candidate[1][1] - candidate[2][1]) * (1. - ratio)
+        candidate[[2, 3, 4], 0] += x_offset
+        candidate[[2, 3, 4], 1] += y_offset
+        hands[1, :, 0] += x_offset
+        hands[1, :, 1] += y_offset
+        # ===== Shoulder Left =====
+        ratio = ratios[(1, 5)]
+        x_offset = (candidate[1][0] - candidate[5][0]) * (1. - ratio)
+        y_offset = (candidate[1][1] - candidate[5][1]) * (1. - ratio)
+        candidate[[5, 6, 7], 0] += x_offset
+        candidate[[5, 6, 7], 1] += y_offset
+        hands[0, :, 0] += x_offset
+        hands[0, :, 1] += y_offset
+        # ===== Upper Arm Right =====
+        ratio = ratios[(2, 3)]
+        x_offset = (candidate[2][0] - candidate[3][0]) * (1. - ratio)
+        y_offset = (candidate[2][1] - candidate[3][1]) * (1. - ratio)
+        candidate[[3, 4], 0] += x_offset
+        candidate[[3, 4], 1] += y_offset
+        hands[1, :, 0] += x_offset
+        hands[1, :, 1] += y_offset
+        # ===== Forearm Right =====
+        ratio = ratios[(3, 4)]
+        x_offset = (candidate[3][0] - candidate[4][0]) * (1. - ratio)
+        y_offset = (candidate[3][1] - candidate[4][1]) * (1. - ratio)
+        candidate[4, 0] += x_offset
+        candidate[4, 1] += y_offset
+        hands[1, :, 0] += x_offset
+        hands[1, :, 1] += y_offset
+        # ===== Upper Arm Left =====
+        ratio = ratios[(5, 6)]
+        x_offset = (candidate[5][0] - candidate[6][0]) * (1. - ratio)
+        y_offset = (candidate[5][1] - candidate[6][1]) * (1. - ratio)
+        candidate[[6, 7], 0] += x_offset
+        candidate[[6, 7], 1] += y_offset
+        hands[0, :, 0] += x_offset
+        hands[0, :, 1] += y_offset
+        # ===== Forearm Left =====
+        ratio = ratios[(6, 7)]
+        x_offset = (candidate[6][0] - candidate[7][0]) * (1. - ratio)
+        y_offset = (candidate[6][1] - candidate[7][1]) * (1. - ratio)
+        candidate[7, 0] += x_offset
+        candidate[7, 1] += y_offset
+        hands[0, :, 0] += x_offset
+        hands[0, :, 1] += y_offset
+        # ===== Head parts =====
+        for (p1, p2) in [(0,14),(0,15),(14,16),(15,17)]:
+            ratio = ratios[(p1,p2)]
+            x_offset = (candidate[p1][0] - candidate[p2][0]) * (1. - ratio)
+            y_offset = (candidate[p1][1] - candidate[p2][1]) * (1. - ratio)
+            candidate[p2, 0] += x_offset
+            candidate[p2, 1] += y_offset
+        # ===== Hips (added) =====
+        ratio = ratios[(1, 8)]
+        x_offset = (candidate[1][0] - candidate[8][0]) * (1. - ratio)
+        y_offset = (candidate[1][1] - candidate[8][1]) * (1. - ratio)
+        candidate[8, 0] += x_offset
+        candidate[8, 1] += y_offset
+        ratio = ratios[(1, 11)]
+        x_offset = (candidate[1][0] - candidate[11][0]) * (1. - ratio)
+        y_offset = (candidate[1][1] - candidate[11][1]) * (1. - ratio)
+        candidate[11, 0] += x_offset
+        candidate[11, 1] += y_offset
+        # ===== Legs =====
+        ratio = ratios[(8, 9)]
+        x_offset = (candidate[9][0] - candidate[8][0]) * (ratio - 1.)
+        y_offset = (candidate[9][1] - candidate[8][1]) * (ratio - 1.)
+        candidate[[9, 10], 0] += x_offset
+        candidate[[9, 10], 1] += y_offset
+        ratio = ratios[(9, 10)]
+        x_offset = (candidate[10][0] - candidate[9][0]) * (ratio - 1.)
+        y_offset = (candidate[10][1] - candidate[9][1]) * (ratio - 1.)
+        candidate[10, 0] += x_offset
+        candidate[10, 1] += y_offset
+        ratio = ratios[(11, 12)]
+        x_offset = (candidate[12][0] - candidate[11][0]) * (ratio - 1.)
+        y_offset = (candidate[12][1] - candidate[11][1]) * (ratio - 1.)
+        candidate[[12, 13], 0] += x_offset
+        candidate[[12, 13], 1] += y_offset
+        ratio = ratios[(12, 13)]
+        x_offset = (candidate[13][0] - candidate[12][0]) * (ratio - 1.)
+        y_offset = (candidate[13][1] - candidate[12][1]) * (ratio - 1.)
+        candidate[13, 0] += x_offset
+        candidate[13, 1] += y_offset
+        # rough align
+        if has_ref_face and 'faces' in pose and pose['faces'] is not None and pose['faces'].size > 0:
+            try:
+                src68, to_orig = _to_68x2(pose['faces'])
+                scaled68 = _face_scale_only(src68, ref68, candidate[0], alpha=1.0)
+                pose['faces'] = to_orig(scaled68)
+            except Exception as e:
+                print("Reference face conversion failed:", e)
+                continue
+    return detected_poses

ComfyUI-WanAnimatePreprocess/onetoall/utils.py ADDED Viewed

	@@ -0,0 +1,347 @@

+# https://github.com/ssj9596/One-to-All-Animation
+import cv2
+import numpy as np
+import math
+import copy
+eps = 0.01
+DROP_FACE_POINTS = {0, 14, 15, 16, 17}
+DROP_UPPER_POINTS = {0, 14, 15, 16, 17, 2, 1, 5, 3, 6}
+DROP_LOWER_POINTS = {8, 9, 10, 11, 12, 13}
+def scale_and_translate_pose(tgt_pose, ref_pose, conf_th=0.9, return_ratio=False):
+    aligned_pose = copy.deepcopy(tgt_pose)
+    th = 1e-6
+    ref_kpt = ref_pose['bodies']['candidate'].astype(np.float32)
+    tgt_kpt = aligned_pose['bodies']['candidate'].astype(np.float32)
+    ref_sc = ref_pose['bodies'].get('score', np.ones(ref_kpt.shape[0])).astype(np.float32).reshape(-1)
+    tgt_sc = tgt_pose['bodies'].get('score', np.ones(tgt_kpt.shape[0])).astype(np.float32).reshape(-1)
+    ref_shoulder_valid = (ref_sc[2] >= conf_th) and (ref_sc[5] >= conf_th)
+    tgt_shoulder_valid = (tgt_sc[2] >= conf_th) and (tgt_sc[5] >= conf_th)
+    shoulder_ok = ref_shoulder_valid and tgt_shoulder_valid
+    ref_hip_valid = (ref_sc[8] >= conf_th) and (ref_sc[11] >= conf_th)
+    tgt_hip_valid = (tgt_sc[8] >= conf_th) and (tgt_sc[11] >= conf_th)
+    hip_ok = ref_hip_valid and tgt_hip_valid
+    if shoulder_ok and hip_ok:
+        ref_shoulder_w = abs(ref_kpt[5, 0] - ref_kpt[2, 0])
+        tgt_shoulder_w = abs(tgt_kpt[5, 0] - tgt_kpt[2, 0])
+        x_ratio = ref_shoulder_w / tgt_shoulder_w if tgt_shoulder_w > th else 1.0
+        ref_torso_h = abs(np.mean(ref_kpt[[8, 11], 1]) - np.mean(ref_kpt[[2, 5], 1]))
+        tgt_torso_h = abs(np.mean(tgt_kpt[[8, 11], 1]) - np.mean(tgt_kpt[[2, 5], 1]))
+        y_ratio = ref_torso_h / tgt_torso_h if tgt_torso_h > th else 1.0
+        scale_ratio = (x_ratio + y_ratio) / 2
+    elif shoulder_ok:
+        ref_sh_dist = np.linalg.norm(ref_kpt[2] - ref_kpt[5])
+        tgt_sh_dist = np.linalg.norm(tgt_kpt[2] - tgt_kpt[5])
+        scale_ratio = ref_sh_dist / tgt_sh_dist if tgt_sh_dist > th else 1.0
+    else:
+        ref_ear_dist = np.linalg.norm(ref_kpt[16] - ref_kpt[17])
+        tgt_ear_dist = np.linalg.norm(tgt_kpt[16] - tgt_kpt[17])
+        scale_ratio = ref_ear_dist / tgt_ear_dist if tgt_ear_dist > th else 1.0
+    if return_ratio:
+        return scale_ratio
+    # scale
+    anchor_idx = 1
+    anchor_pt_before_scale = tgt_kpt[anchor_idx].copy()
+    def scale(arr):
+        if arr is not None and arr.size > 0:
+            arr[..., 0] = anchor_pt_before_scale[0] + (arr[..., 0] - anchor_pt_before_scale[0]) * scale_ratio
+            arr[..., 1] = anchor_pt_before_scale[1] + (arr[..., 1] - anchor_pt_before_scale[1]) * scale_ratio
+    scale(tgt_kpt)
+    scale(aligned_pose.get('faces'))
+    scale(aligned_pose.get('hands'))
+    # offset
+    offset = ref_kpt[anchor_idx] - tgt_kpt[anchor_idx]
+    def translate(arr):
+        if arr is not None and arr.size > 0:
+            arr += offset
+    translate(tgt_kpt)
+    translate(aligned_pose.get('faces'))
+    translate(aligned_pose.get('hands'))
+    aligned_pose['bodies']['candidate'] = tgt_kpt
+    return aligned_pose, shoulder_ok, hip_ok
+def warp_ref_to_pose(tgt_img,
+                     ref_pose: dict, #driven pose
+                     tgt_pose: dict,
+                     bg_val=(0, 0, 0),
+                     conf_th=0.9,
+                     align_center=False):
+    H, W = tgt_img.shape[:2]
+    img_tgt_pose = draw_pose_aligned(tgt_pose, H, W, without_face=True)
+    tgt_kpt = tgt_pose['bodies']['candidate'].astype(np.float32)
+    ref_kpt = ref_pose['bodies']['candidate'].astype(np.float32)
+    scale_ratio = scale_and_translate_pose(tgt_pose, ref_pose, conf_th=conf_th, return_ratio=True)
+    anchor_idx = 1
+    x0 = tgt_kpt[anchor_idx][0] * W
+    y0 = tgt_kpt[anchor_idx][1] * H
+    ref_x = ref_kpt[anchor_idx][0] * W if not align_center else W/2
+    ref_y = ref_kpt[anchor_idx][1] * H
+    dx = ref_x - x0
+    dy = ref_y - y0
+    # Affine transformation matrix
+    M = np.array([[scale_ratio, 0, (1-scale_ratio)*x0 + dx],
+                  [0, scale_ratio, (1-scale_ratio)*y0 + dy]],
+                 dtype=np.float32)
+    img_warp = cv2.warpAffine(tgt_img, M, (W, H),
+                              flags=cv2.INTER_LINEAR,
+                              borderValue=bg_val)
+    img_tgt_pose_warp = cv2.warpAffine(img_tgt_pose, M, (W, H),
+                                       flags=cv2.INTER_LINEAR,
+                                       borderValue=bg_val)
+    zeros = np.zeros((H, W), dtype=np.uint8)
+    mask_warp = cv2.warpAffine(zeros, M, (W, H),
+                               flags=cv2.INTER_NEAREST,
+                               borderValue=255)
+    return img_warp, img_tgt_pose_warp, mask_warp
+def hsv_to_rgb(hsv):
+    hsv = np.asarray(hsv, dtype=np.float32)
+    in_shape = hsv.shape
+    hsv = hsv.reshape(-1, 3)
+    h, s, v = hsv[:, 0], hsv[:, 1], hsv[:, 2]
+    i = (h * 6.0).astype(int)
+    f = (h * 6.0) - i
+    i = i % 6
+    p = v * (1.0 - s)
+    q = v * (1.0 - s * f)
+    t = v * (1.0 - s * (1.0 - f))
+    rgb = np.zeros_like(hsv)
+    rgb[i == 0] = np.stack([v[i == 0], t[i == 0], p[i == 0]], axis=1)
+    rgb[i == 1] = np.stack([q[i == 1], v[i == 1], p[i == 1]], axis=1)
+    rgb[i == 2] = np.stack([p[i == 2], v[i == 2], t[i == 2]], axis=1)
+    rgb[i == 3] = np.stack([p[i == 3], q[i == 3], v[i == 3]], axis=1)
+    rgb[i == 4] = np.stack([t[i == 4], p[i == 4], v[i == 4]], axis=1)
+    rgb[i == 5] = np.stack([v[i == 5], p[i == 5], q[i == 5]], axis=1)
+    gray_mask = s == 0
+    rgb[gray_mask] = np.stack([v[gray_mask]] * 3, axis=1)
+    return (rgb.reshape(in_shape) * 255)
+def get_stickwidth(W, H, stickwidth=4):
+    if max(W, H) < 512:
+        ratio = 1.0
+    elif max(W, H) < 1080:
+        ratio = 1.5
+    elif max(W, H) < 2160:
+        ratio = 2.0
+    elif max(W, H) < 3240:
+        ratio = 2.5
+    elif max(W, H) < 4320:
+        ratio = 3.5
+    elif max(W, H) < 5400:
+        ratio = 4.5
+    else:
+        ratio = 4.0
+    return int(stickwidth * ratio)
+def alpha_blend_color(color, alpha):
+    return [int(c * alpha) for c in color]
+def draw_bodypose_aligned(canvas, candidate, subset, score, plan=None):
+    H, W, C = canvas.shape
+    candidate = np.array(candidate)
+    subset = np.array(subset)
+    stickwidth = get_stickwidth(W, H, stickwidth=3)
+    limbSeq = [
+        [2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8],
+        [2, 9], [9, 10], [10, 11], [2, 12], [12, 13], [13, 14],
+        [2, 1], [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]]
+    colors = [
+        [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
+        [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
+        [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255],
+        [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
+    HIDE_JOINTS = set()
+    stretch_limb_idx = None
+    stretch_scale = None
+    if plan:
+        if plan["mode"] == "drop_point":
+            HIDE_JOINTS.add(plan["point_idx"])
+        elif plan["mode"] == "drop_region":
+            HIDE_JOINTS |= set(plan["points"])
+        elif plan["mode"] == "stretch_limb":
+            stretch_limb_idx = plan["limb_idx"]
+            stretch_scale = plan["stretch_scale"]
+    hide_joint = np.zeros_like(subset, dtype=bool)
+    for i in range(17):
+        for n in range(len(subset)):
+            idx_pair = limbSeq[i]
+            if any(j in HIDE_JOINTS for j in idx_pair):
+                continue
+            index = subset[n][np.array(idx_pair) - 1]
+            conf = score[n][np.array(idx_pair) - 1]
+            if -1 in index:
+                continue
+            # color lighten
+            alpha = max(conf[0] * conf[1], 0) if conf[0]>0 and conf[1]>0 else 0.35
+            if conf[0] == 0 or conf[1] == 0:
+                alpha = 0
+            Y = candidate[index.astype(int), 0] * float(W)
+            X = candidate[index.astype(int), 1] * float(H)
+            if stretch_limb_idx == i:
+                vec_x = X[1] - X[0]
+                vec_y = Y[1] - Y[0]
+                X[1] = X[0] + vec_x * stretch_scale
+                Y[1] = Y[0] + vec_y * stretch_scale
+                hide_joint[n, idx_pair[1]-1] = True
+            mX = np.mean(X)
+            mY = np.mean(Y)
+            length = ((X[0]-X[1])**2 + (Y[0]-Y[1])**2) ** 0.5
+            angle = math.degrees(math.atan2(X[0]-X[1], Y[0]-Y[1]))
+            polygon = cv2.ellipse2Poly((int(mY), int(mX)),
+                                       (int(length/2), stickwidth), int(angle), 0, 360, 1)
+            cv2.fillConvexPoly(canvas, polygon, alpha_blend_color(colors[i], alpha))
+    canvas = (canvas * 0.6).astype(np.uint8)
+    for i in range(18):
+        if i in HIDE_JOINTS:
+            continue
+        for n in range(len(subset)):
+            if hide_joint[n, i]:
+                continue
+            index = int(subset[n][i])
+            if index == -1:
+                continue
+            x, y = candidate[index][0:2]
+            conf = score[n][i]
+            alpha = 0 if conf==-2 else max(conf, 0)
+            x = int(x * W)
+            y = int(y * H)
+            cv2.circle(canvas, (x, y), stickwidth, alpha_blend_color(colors[i], alpha), thickness=-1)
+    return canvas
+def draw_handpose_aligned(canvas, all_hand_peaks, all_hand_scores, draw_th=0.3):
+    H, W, C = canvas.shape
+    stickwidth = get_stickwidth(W, H, stickwidth=2)
+    line_thickness = get_stickwidth(W, H, stickwidth=2)
+    edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
+             [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
+    for peaks, scores in zip(all_hand_peaks, all_hand_scores):
+        for ie, e in enumerate(edges):
+            if scores[e[0]] < draw_th or scores[e[1]] < draw_th:
+                    continue
+            x1, y1 = peaks[e[0]]
+            x2, y2 = peaks[e[1]]
+            x1 = int(x1 * W)
+            y1 = int(y1 * H)
+            x2 = int(x2 * W)
+            y2 = int(y2 * H)
+            score = int(scores[e[0]] * scores[e[1]] * 255)
+            if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
+                color = hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]).flatten()
+                color = tuple(int(c * score / 255) for c in color)
+                cv2.line(canvas, (x1, y1), (x2, y2), color, thickness=line_thickness)
+        for i, keyponit in enumerate(peaks):
+            if scores[i] < draw_th:
+                continue
+            x, y = keyponit
+            x = int(x * W)
+            y = int(y * H)
+            score = int(scores[i] * 255)
+            if x > eps and y > eps:
+                cv2.circle(canvas, (x, y), stickwidth, (0, 0, score), thickness=-1)
+    return canvas
+def draw_facepose_aligned(canvas, all_lmks, all_scores, draw_th=0.3,face_change=False):
+    H, W, C = canvas.shape
+    stickwidth = get_stickwidth(W, H, stickwidth=2)
+    SKIP_IDX = set(range(0, 17))
+    SKIP_IDX |= set(range(27, 36))
+    for lmks, scores in zip(all_lmks, all_scores):
+        for idx, (lmk, score) in enumerate(zip(lmks, scores)):
+            # skip chin
+            if idx in SKIP_IDX:
+                continue
+            if score < draw_th:
+                continue
+            x, y = lmk
+            x = int(x * W)
+            y = int(y * H)
+            conf = int(score * 255)
+            # color lighten
+            if face_change:
+                conf = int(conf * 0.35)
+            if x > eps and y > eps:
+                cv2.circle(canvas, (x, y), stickwidth, (conf, conf, conf), thickness=-1)
+    return canvas
+def draw_pose_aligned(pose, H, W, ref_w=2160, without_face=False, pose_plan=None, head_strength="full", face_change=False):
+    bodies = pose['bodies']
+    faces = pose['faces']
+    hands = pose['hands']
+    candidate = bodies['candidate']
+    subset = bodies['subset']
+    body_score = bodies['score'].copy()
+    # control color
+    if head_strength == "weak":
+        target_joints = [0, 14, 15, 16, 17]
+        body_score[:, target_joints] = -2
+    elif head_strength == "none":
+        target_joints = [0, 14, 15, 16, 17]
+        body_score[:, target_joints] = 0
+    sz = min(H, W)
+    sr = (ref_w / sz) if sz != ref_w else 1
+    canvas = np.zeros(shape=(int(H*sr), int(W*sr), 3), dtype=np.uint8)
+    canvas = draw_bodypose_aligned(canvas, candidate, subset,
+                                   score=body_score,
+                                   plan=pose_plan,)
+    canvas = draw_handpose_aligned(canvas, hands, pose['hands_score'])
+    if not without_face:
+        canvas = draw_facepose_aligned(canvas, faces, pose['faces_score'],face_change=face_change)
+    return cv2.resize(canvas, (W, H))

ComfyUI-WanAnimatePreprocess/pose_utils/human_visualization.py ADDED Viewed

	@@ -0,0 +1,1272 @@

+# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
+import os
+import cv2
+import time
+import math
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+from typing import Dict, List
+import random
+from .pose2d_utils import AAPoseMeta
+def draw_handpose(canvas, keypoints, hand_score_th=0.6):
+    """
+    Draw keypoints and connections representing hand pose on a given canvas.
+    Args:
+        canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the hand pose.
+        keypoints (List[Keypoint]| None): A list of Keypoint objects representing the hand keypoints to be drawn
+                                          or None if no keypoints are present.
+    Returns:
+        np.ndarray: A 3D numpy array representing the modified canvas with the drawn hand pose.
+    Note:
+        The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
+    """
+    eps = 0.01
+    H, W, C = canvas.shape
+    stickwidth = max(int(min(H, W) / 200), 1)
+    edges = [
+        [0, 1],
+        [1, 2],
+        [2, 3],
+        [3, 4],
+        [0, 5],
+        [5, 6],
+        [6, 7],
+        [7, 8],
+        [0, 9],
+        [9, 10],
+        [10, 11],
+        [11, 12],
+        [0, 13],
+        [13, 14],
+        [14, 15],
+        [15, 16],
+        [0, 17],
+        [17, 18],
+        [18, 19],
+        [19, 20],
+    ]
+    for ie, (e1, e2) in enumerate(edges):
+        k1 = keypoints[e1]
+        k2 = keypoints[e2]
+        if k1 is None or k2 is None:
+            continue
+        if k1[2] < hand_score_th or k2[2] < hand_score_th:
+            continue
+        x1 = int(k1[0])
+        y1 = int(k1[1])
+        x2 = int(k2[0])
+        y2 = int(k2[1])
+        if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
+            cv2.line(
+                canvas,
+                (x1, y1),
+                (x2, y2),
+                matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255,
+                thickness=stickwidth,
+            )
+    for keypoint in keypoints:
+        if keypoint is None:
+            continue
+        if keypoint[2] < hand_score_th:
+            continue
+        x, y = keypoint[0], keypoint[1]
+        x = int(x)
+        y = int(y)
+        if x > eps and y > eps:
+            cv2.circle(canvas, (x, y), stickwidth, (0, 0, 255), thickness=-1)
+    return canvas
+def draw_handpose_new(canvas, keypoints, stickwidth_type='v2', hand_score_th=0.6, hand_stick_width=4):
+    """
+    Draw keypoints and connections representing hand pose on a given canvas.
+    Args:
+        canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the hand pose.
+        keypoints (List[Keypoint]| None): A list of Keypoint objects representing the hand keypoints to be drawn
+                                          or None if no keypoints are present.
+    Returns:
+        np.ndarray: A 3D numpy array representing the modified canvas with the drawn hand pose.
+    Note:
+        The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
+    """
+    eps = 0.01
+    H, W, C = canvas.shape
+    # if stickwidth_type == 'v1':
+    #     stickwidth = max(int(min(H, W) / 200), 1)
+    # elif stickwidth_type == 'v2':
+    #     stickwidth = max(max(int(min(H, W) / 200) - 1, 1) // 2, 1)
+    if hand_stick_width == -1:
+        stickwidth = max(max(int(min(H, W) / 200) - 1, 1) // 2, 1)
+    else:
+        stickwidth = hand_stick_width
+    edges = [
+        [0, 1],
+        [1, 2],
+        [2, 3],
+        [3, 4],
+        [0, 5],
+        [5, 6],
+        [6, 7],
+        [7, 8],
+        [0, 9],
+        [9, 10],
+        [10, 11],
+        [11, 12],
+        [0, 13],
+        [13, 14],
+        [14, 15],
+        [15, 16],
+        [0, 17],
+        [17, 18],
+        [18, 19],
+        [19, 20],
+    ]
+    for ie, (e1, e2) in enumerate(edges):
+        k1 = keypoints[e1]
+        k2 = keypoints[e2]
+        if k1 is None or k2 is None:
+            continue
+        if k1[2] < hand_score_th or k2[2] < hand_score_th:
+            continue
+        x1 = int(k1[0])
+        y1 = int(k1[1])
+        x2 = int(k2[0])
+        y2 = int(k2[1])
+        if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
+            cv2.line(
+                canvas,
+                (x1, y1),
+                (x2, y2),
+                matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255,
+                thickness=stickwidth,
+            )
+    for keypoint in keypoints:
+        if keypoint is None:
+            continue
+        if keypoint[2] < hand_score_th:
+            continue
+        x, y = keypoint[0], keypoint[1]
+        x = int(x)
+        y = int(y)
+        if x > eps and y > eps:
+            cv2.circle(canvas, (x, y), stickwidth, (0, 0, 255), thickness=-1)
+    return canvas
+def draw_ellipse_by_2kp(img, keypoint1, keypoint2, color, threshold=0.6):
+    H, W, C = img.shape
+    stickwidth = max(int(min(H, W) / 200), 1)
+    if keypoint1[-1] < threshold or keypoint2[-1] < threshold:
+        return img
+    Y = np.array([keypoint1[0], keypoint2[0]])
+    X = np.array([keypoint1[1], keypoint2[1]])
+    mX = np.mean(X)
+    mY = np.mean(Y)
+    length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+    angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+    polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+    cv2.fillConvexPoly(img, polygon, [int(float(c) * 0.6) for c in color])
+    return img
+def split_pose2d_kps_to_aa(kp2ds: np.ndarray) -> List[np.ndarray]:
+    """Convert the 133 keypoints from pose2d to body and hands keypoints.
+    Args:
+        kp2ds (np.ndarray): [133, 2]
+    Returns:
+        List[np.ndarray]: _description_
+    """
+    kp2ds_body = (
+        kp2ds[[0, 6, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 17, 20]]
+        + kp2ds[[0, 5, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 18, 21]]
+    ) / 2
+    kp2ds_lhand = kp2ds[91:112]
+    kp2ds_rhand = kp2ds[112:133]
+    return kp2ds_body.copy(), kp2ds_lhand.copy(), kp2ds_rhand.copy()
+def draw_aapose_by_meta(img, meta: AAPoseMeta, threshold=0.5, stick_width_norm=200, draw_hand=True, draw_head=True):
+    kp2ds = np.concatenate([meta.kps_body, meta.kps_body_p[:, None]], axis=1)
+    kp2ds_lhand = np.concatenate([meta.kps_lhand, meta.kps_lhand_p[:, None]], axis=1)
+    kp2ds_rhand = np.concatenate([meta.kps_rhand, meta.kps_rhand_p[:, None]], axis=1)
+    pose_img = draw_aapose(img, kp2ds, threshold, kp2ds_lhand=kp2ds_lhand, kp2ds_rhand=kp2ds_rhand, stick_width_norm=stick_width_norm, draw_hand=draw_hand, draw_head=draw_head)
+    return pose_img
+def draw_aapose_by_meta_new(img, meta: AAPoseMeta, threshold=0.5, stickwidth_type='v2', body_stick_width=-1, draw_hand=True, draw_head=True, hand_stick_width=4):
+    kp2ds = np.concatenate([meta.kps_body, meta.kps_body_p[:, None]], axis=1)
+    kp2ds_lhand = np.concatenate([meta.kps_lhand, meta.kps_lhand_p[:, None]], axis=1)
+    kp2ds_rhand = np.concatenate([meta.kps_rhand, meta.kps_rhand_p[:, None]], axis=1)
+    pose_img = draw_aapose_new(img, kp2ds, threshold, kp2ds_lhand=kp2ds_lhand, kp2ds_rhand=kp2ds_rhand, body_stick_width=body_stick_width,
+                               stickwidth_type=stickwidth_type, draw_hand=draw_hand, draw_head=draw_head, hand_stick_width=hand_stick_width)
+    return pose_img
+def draw_hand_by_meta(img, meta: AAPoseMeta, threshold=0.5, stick_width_norm=200):
+    kp2ds = np.concatenate([meta.kps_body, meta.kps_body_p[:, None] * 0], axis=1)
+    kp2ds_lhand = np.concatenate([meta.kps_lhand, meta.kps_lhand_p[:, None]], axis=1)
+    kp2ds_rhand = np.concatenate([meta.kps_rhand, meta.kps_rhand_p[:, None]], axis=1)
+    pose_img = draw_aapose(img, kp2ds, threshold, kp2ds_lhand=kp2ds_lhand, kp2ds_rhand=kp2ds_rhand, stick_width_norm=stick_width_norm, draw_hand=True, draw_head=False)
+    return pose_img
+def draw_aaface_by_meta(img, meta: AAPoseMeta, threshold=0.5, stick_width_norm=200, draw_hand=False, draw_head=True):
+    kp2ds = np.concatenate([meta.kps_body, meta.kps_body_p[:, None]], axis=1)
+    # kp2ds_lhand = np.concatenate([meta.kps_lhand, meta.kps_lhand_p[:, None]], axis=1)
+    # kp2ds_rhand = np.concatenate([meta.kps_rhand, meta.kps_rhand_p[:, None]], axis=1)
+    pose_img = draw_M(img, kp2ds, threshold, kp2ds_lhand=None, kp2ds_rhand=None, stick_width_norm=stick_width_norm, draw_hand=draw_hand, draw_head=draw_head)
+    return pose_img
+def draw_aanose_by_meta(img, meta: AAPoseMeta, threshold=0.5, stick_width_norm=100, draw_hand=False):
+    kp2ds = np.concatenate([meta.kps_body, meta.kps_body_p[:, None]], axis=1)
+    # kp2ds_lhand = np.concatenate([meta.kps_lhand, meta.kps_lhand_p[:, None]], axis=1)
+    # kp2ds_rhand = np.concatenate([meta.kps_rhand, meta.kps_rhand_p[:, None]], axis=1)
+    pose_img = draw_nose(img, kp2ds, threshold, kp2ds_lhand=None, kp2ds_rhand=None, stick_width_norm=stick_width_norm, draw_hand=draw_hand)
+    return pose_img
+def gen_face_motion_seq(img, metas: List[AAPoseMeta], threshold=0.5, stick_width_norm=200):
+    return
+def draw_M(
+    img,
+    kp2ds,
+    threshold=0.6,
+    data_to_json=None,
+    idx=-1,
+    kp2ds_lhand=None,
+    kp2ds_rhand=None,
+    draw_hand=False,
+    stick_width_norm=200,
+    draw_head=True
+):
+    """
+    Draw keypoints and connections representing hand pose on a given canvas.
+    Args:
+        canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the hand pose.
+        keypoints (List[Keypoint]| None): A list of Keypoint objects representing the hand keypoints to be drawn
+                                          or None if no keypoints are present.
+    Returns:
+        np.ndarray: A 3D numpy array representing the modified canvas with the drawn hand pose.
+    Note:
+        The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
+    """
+    new_kep_list = [
+        "Nose",
+        "Neck",
+        "RShoulder",
+        "RElbow",
+        "RWrist",  # No.4
+        "LShoulder",
+        "LElbow",
+        "LWrist",  # No.7
+        "RHip",
+        "RKnee",
+        "RAnkle",  # No.10
+        "LHip",
+        "LKnee",
+        "LAnkle",  # No.13
+        "REye",
+        "LEye",
+        "REar",
+        "LEar",
+        "LToe",
+        "RToe",
+    ]
+    # kp2ds_body = (kp2ds.copy()[[0, 6, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 17, 20]] + \
+    #              kp2ds.copy()[[0, 5, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 18, 21]]) / 2
+    kp2ds = kp2ds.copy()
+    # import ipdb; ipdb.set_trace()
+    kp2ds[[1,2,3,4,5,6,7,8,9,10,11,12,13,18,19], 2] = 0
+    if not draw_head:
+        kp2ds[[0,14,15,16,17], 2] = 0
+    kp2ds_body = kp2ds
+    # kp2ds_body = kp2ds_body[:18]
+    # kp2ds_lhand = kp2ds.copy()[91:112]
+    # kp2ds_rhand = kp2ds.copy()[112:133]
+    limbSeq = [
+        # [2, 3],
+        # [2, 6],  # shoulders
+        # [3, 4],
+        # [4, 5],  # left arm
+        # [6, 7],
+        # [7, 8],  # right arm
+        # [2, 9],
+        # [9, 10],
+        # [10, 11],  # right leg
+        # [2, 12],
+        # [12, 13],
+        # [13, 14],  # left leg
+        # [2, 1],
+        [1, 15],
+        [15, 17],
+        [1, 16],
+        [16, 18],  # face (nose, eyes, ears)
+        # [14, 19],
+        # [11, 20],  # foot
+    ]
+    colors = [
+        # [255, 0, 0],
+        # [255, 85, 0],
+        # [255, 170, 0],
+        # [255, 255, 0],
+        # [170, 255, 0],
+        # [85, 255, 0],
+        # [0, 255, 0],
+        # [0, 255, 85],
+        # [0, 255, 170],
+        # [0, 255, 255],
+        # [0, 170, 255],
+        # [0, 85, 255],
+        # [0, 0, 255],
+        # [85, 0, 255],
+        [170, 0, 255],
+        [255, 0, 255],
+        [255, 0, 170],
+        [255, 0, 85],
+        # foot
+        # [200, 200, 0],
+        # [100, 100, 0],
+    ]
+    H, W, C = img.shape
+    stickwidth = max(int(min(H, W) / stick_width_norm), 1)
+    for _idx, ((k1_index, k2_index), color) in enumerate(zip(limbSeq, colors)):
+        keypoint1 = kp2ds_body[k1_index - 1]
+        keypoint2 = kp2ds_body[k2_index - 1]
+        if keypoint1[-1] < threshold or keypoint2[-1] < threshold:
+            continue
+        Y = np.array([keypoint1[0], keypoint2[0]])
+        X = np.array([keypoint1[1], keypoint2[1]])
+        mX = np.mean(X)
+        mY = np.mean(Y)
+        length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+        angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+        polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+        cv2.fillConvexPoly(img, polygon, [int(float(c) * 0.6) for c in color])
+    for _idx, (keypoint, color) in enumerate(zip(kp2ds_body, colors)):
+        if keypoint[-1] < threshold:
+            continue
+        x, y = keypoint[0], keypoint[1]
+        # cv2.circle(canvas, (int(x), int(y)), 4, color, thickness=-1)
+        cv2.circle(img, (int(x), int(y)), stickwidth, color, thickness=-1)
+    if draw_hand:
+        img = draw_handpose(img, kp2ds_lhand, hand_score_th=threshold)
+        img = draw_handpose(img, kp2ds_rhand, hand_score_th=threshold)
+    kp2ds_body[:, 0] /= W
+    kp2ds_body[:, 1] /= H
+    if data_to_json is not None:
+        if idx == -1:
+            data_to_json.append(
+                {
+                    "image_id": "frame_{:05d}.jpg".format(len(data_to_json) + 1),
+                    "height": H,
+                    "width": W,
+                    "category_id": 1,
+                    "keypoints_body": kp2ds_body.tolist(),
+                    "keypoints_left_hand": kp2ds_lhand.tolist(),
+                    "keypoints_right_hand": kp2ds_rhand.tolist(),
+                }
+            )
+        else:
+            data_to_json[idx] = {
+                "image_id": "frame_{:05d}.jpg".format(idx + 1),
+                "height": H,
+                "width": W,
+                "category_id": 1,
+                "keypoints_body": kp2ds_body.tolist(),
+                "keypoints_left_hand": kp2ds_lhand.tolist(),
+                "keypoints_right_hand": kp2ds_rhand.tolist(),
+            }
+    return img
+def draw_nose(
+    img,
+    kp2ds,
+    threshold=0.6,
+    data_to_json=None,
+    idx=-1,
+    kp2ds_lhand=None,
+    kp2ds_rhand=None,
+    draw_hand=False,
+    stick_width_norm=200,
+):
+    """
+    Draw keypoints and connections representing hand pose on a given canvas.
+    Args:
+        canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the hand pose.
+        keypoints (List[Keypoint]| None): A list of Keypoint objects representing the hand keypoints to be drawn
+                                          or None if no keypoints are present.
+    Returns:
+        np.ndarray: A 3D numpy array representing the modified canvas with the drawn hand pose.
+    Note:
+        The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
+    """
+    new_kep_list = [
+        "Nose",
+        "Neck",
+        "RShoulder",
+        "RElbow",
+        "RWrist",  # No.4
+        "LShoulder",
+        "LElbow",
+        "LWrist",  # No.7
+        "RHip",
+        "RKnee",
+        "RAnkle",  # No.10
+        "LHip",
+        "LKnee",
+        "LAnkle",  # No.13
+        "REye",
+        "LEye",
+        "REar",
+        "LEar",
+        "LToe",
+        "RToe",
+    ]
+    # kp2ds_body = (kp2ds.copy()[[0, 6, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 17, 20]] + \
+    #              kp2ds.copy()[[0, 5, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 18, 21]]) / 2
+    kp2ds = kp2ds.copy()
+    kp2ds[1:, 2] = 0
+    # kp2ds[0, 2] = 1
+    kp2ds_body = kp2ds
+    # kp2ds_body = kp2ds_body[:18]
+    # kp2ds_lhand = kp2ds.copy()[91:112]
+    # kp2ds_rhand = kp2ds.copy()[112:133]
+    limbSeq = [
+        # [2, 3],
+        # [2, 6],  # shoulders
+        # [3, 4],
+        # [4, 5],  # left arm
+        # [6, 7],
+        # [7, 8],  # right arm
+        # [2, 9],
+        # [9, 10],
+        # [10, 11],  # right leg
+        # [2, 12],
+        # [12, 13],
+        # [13, 14],  # left leg
+        # [2, 1],
+        [1, 15],
+        [15, 17],
+        [1, 16],
+        [16, 18],  # face (nose, eyes, ears)
+        # [14, 19],
+        # [11, 20],  # foot
+    ]
+    colors = [
+        # [255, 0, 0],
+        # [255, 85, 0],
+        # [255, 170, 0],
+        # [255, 255, 0],
+        # [170, 255, 0],
+        # [85, 255, 0],
+        # [0, 255, 0],
+        # [0, 255, 85],
+        # [0, 255, 170],
+        # [0, 255, 255],
+        # [0, 170, 255],
+        # [0, 85, 255],
+        # [0, 0, 255],
+        # [85, 0, 255],
+        [170, 0, 255],
+        # [255, 0, 255],
+        # [255, 0, 170],
+        # [255, 0, 85],
+        # foot
+        # [200, 200, 0],
+        # [100, 100, 0],
+    ]
+    H, W, C = img.shape
+    stickwidth = max(int(min(H, W) / stick_width_norm), 1)
+    # for _idx, ((k1_index, k2_index), color) in enumerate(zip(limbSeq, colors)):
+    #     keypoint1 = kp2ds_body[k1_index - 1]
+    #     keypoint2 = kp2ds_body[k2_index - 1]
+    #     if keypoint1[-1] < threshold or keypoint2[-1] < threshold:
+    #         continue
+    #     Y = np.array([keypoint1[0], keypoint2[0]])
+    #     X = np.array([keypoint1[1], keypoint2[1]])
+    #     mX = np.mean(X)
+    #     mY = np.mean(Y)
+    #     length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+    #     angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+    #     polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+    #     cv2.fillConvexPoly(img, polygon, [int(float(c) * 0.6) for c in color])
+    for _idx, (keypoint, color) in enumerate(zip(kp2ds_body, colors)):
+        if keypoint[-1] < threshold:
+            continue
+        x, y = keypoint[0], keypoint[1]
+        # cv2.circle(canvas, (int(x), int(y)), 4, color, thickness=-1)
+        cv2.circle(img, (int(x), int(y)), stickwidth, color, thickness=-1)
+    if draw_hand:
+        img = draw_handpose(img, kp2ds_lhand, hand_score_th=threshold)
+        img = draw_handpose(img, kp2ds_rhand, hand_score_th=threshold)
+    kp2ds_body[:, 0] /= W
+    kp2ds_body[:, 1] /= H
+    if data_to_json is not None:
+        if idx == -1:
+            data_to_json.append(
+                {
+                    "image_id": "frame_{:05d}.jpg".format(len(data_to_json) + 1),
+                    "height": H,
+                    "width": W,
+                    "category_id": 1,
+                    "keypoints_body": kp2ds_body.tolist(),
+                    "keypoints_left_hand": kp2ds_lhand.tolist(),
+                    "keypoints_right_hand": kp2ds_rhand.tolist(),
+                }
+            )
+        else:
+            data_to_json[idx] = {
+                "image_id": "frame_{:05d}.jpg".format(idx + 1),
+                "height": H,
+                "width": W,
+                "category_id": 1,
+                "keypoints_body": kp2ds_body.tolist(),
+                "keypoints_left_hand": kp2ds_lhand.tolist(),
+                "keypoints_right_hand": kp2ds_rhand.tolist(),
+            }
+    return img
+def draw_aapose(
+    img,
+    kp2ds,
+    threshold=0.6,
+    data_to_json=None,
+    idx=-1,
+    kp2ds_lhand=None,
+    kp2ds_rhand=None,
+    draw_hand=False,
+    stick_width_norm=200,
+    draw_head=True
+):
+    """
+    Draw keypoints and connections representing hand pose on a given canvas.
+    Args:
+        canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the hand pose.
+        keypoints (List[Keypoint]| None): A list of Keypoint objects representing the hand keypoints to be drawn
+                                          or None if no keypoints are present.
+    Returns:
+        np.ndarray: A 3D numpy array representing the modified canvas with the drawn hand pose.
+    Note:
+        The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
+    """
+    new_kep_list = [
+        "Nose",
+        "Neck",
+        "RShoulder",
+        "RElbow",
+        "RWrist",  # No.4
+        "LShoulder",
+        "LElbow",
+        "LWrist",  # No.7
+        "RHip",
+        "RKnee",
+        "RAnkle",  # No.10
+        "LHip",
+        "LKnee",
+        "LAnkle",  # No.13
+        "REye",
+        "LEye",
+        "REar",
+        "LEar",
+        "LToe",
+        "RToe",
+    ]
+    # kp2ds_body = (kp2ds.copy()[[0, 6, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 17, 20]] + \
+    #              kp2ds.copy()[[0, 5, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 18, 21]]) / 2
+    kp2ds = kp2ds.copy()
+    if not draw_head:
+        kp2ds[[0,14,15,16,17], 2] = 0
+    kp2ds_body = kp2ds
+    # kp2ds_lhand = kp2ds.copy()[91:112]
+    # kp2ds_rhand = kp2ds.copy()[112:133]
+    limbSeq = [
+        [2, 3],
+        [2, 6],  # shoulders
+        [3, 4],
+        [4, 5],  # left arm
+        [6, 7],
+        [7, 8],  # right arm
+        [2, 9],
+        [9, 10],
+        [10, 11],  # right leg
+        [2, 12],
+        [12, 13],
+        [13, 14],  # left leg
+        [2, 1],
+        [1, 15],
+        [15, 17],
+        [1, 16],
+        [16, 18],  # face (nose, eyes, ears)
+        [14, 19],
+        [11, 20],  # foot
+    ]
+    colors = [
+        [255, 0, 0],
+        [255, 85, 0],
+        [255, 170, 0],
+        [255, 255, 0],
+        [170, 255, 0],
+        [85, 255, 0],
+        [0, 255, 0],
+        [0, 255, 85],
+        [0, 255, 170],
+        [0, 255, 255],
+        [0, 170, 255],
+        [0, 85, 255],
+        [0, 0, 255],
+        [85, 0, 255],
+        [170, 0, 255],
+        [255, 0, 255],
+        [255, 0, 170],
+        [255, 0, 85],
+        # foot
+        [200, 200, 0],
+        [100, 100, 0],
+    ]
+    H, W, C = img.shape
+    stickwidth = max(int(min(H, W) / stick_width_norm), 1)
+    for _idx, ((k1_index, k2_index), color) in enumerate(zip(limbSeq, colors)):
+        keypoint1 = kp2ds_body[k1_index - 1]
+        keypoint2 = kp2ds_body[k2_index - 1]
+        if keypoint1[-1] < threshold or keypoint2[-1] < threshold:
+            continue
+        Y = np.array([keypoint1[0], keypoint2[0]])
+        X = np.array([keypoint1[1], keypoint2[1]])
+        mX = np.mean(X)
+        mY = np.mean(Y)
+        length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+        angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+        polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+        cv2.fillConvexPoly(img, polygon, [int(float(c) * 0.6) for c in color])
+    for _idx, (keypoint, color) in enumerate(zip(kp2ds_body, colors)):
+        if keypoint[-1] < threshold:
+            continue
+        x, y = keypoint[0], keypoint[1]
+        # cv2.circle(canvas, (int(x), int(y)), 4, color, thickness=-1)
+        cv2.circle(img, (int(x), int(y)), stickwidth, color, thickness=-1)
+    if draw_hand:
+        img = draw_handpose(img, kp2ds_lhand, hand_score_th=threshold)
+        img = draw_handpose(img, kp2ds_rhand, hand_score_th=threshold)
+    kp2ds_body[:, 0] /= W
+    kp2ds_body[:, 1] /= H
+    if data_to_json is not None:
+        if idx == -1:
+            data_to_json.append(
+                {
+                    "image_id": "frame_{:05d}.jpg".format(len(data_to_json) + 1),
+                    "height": H,
+                    "width": W,
+                    "category_id": 1,
+                    "keypoints_body": kp2ds_body.tolist(),
+                    "keypoints_left_hand": kp2ds_lhand.tolist(),
+                    "keypoints_right_hand": kp2ds_rhand.tolist(),
+                }
+            )
+        else:
+            data_to_json[idx] = {
+                "image_id": "frame_{:05d}.jpg".format(idx + 1),
+                "height": H,
+                "width": W,
+                "category_id": 1,
+                "keypoints_body": kp2ds_body.tolist(),
+                "keypoints_left_hand": kp2ds_lhand.tolist(),
+                "keypoints_right_hand": kp2ds_rhand.tolist(),
+            }
+    return img
+def draw_aapose_new(
+    img,
+    kp2ds,
+    threshold=0.6,
+    data_to_json=None,
+    idx=-1,
+    kp2ds_lhand=None,
+    kp2ds_rhand=None,
+    draw_hand=False,
+    stickwidth_type='v2',
+    body_stick_width=-1,
+    hand_stick_width=-1,
+    draw_head=True
+):
+    """
+    Draw keypoints and connections representing hand pose on a given canvas.
+    Args:
+        canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the hand pose.
+        keypoints (List[Keypoint]| None): A list of Keypoint objects representing the hand keypoints to be drawn
+                                          or None if no keypoints are present.
+    Returns:
+        np.ndarray: A 3D numpy array representing the modified canvas with the drawn hand pose.
+    Note:
+        The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
+    """
+    new_kep_list = [
+        "Nose",
+        "Neck",
+        "RShoulder",
+        "RElbow",
+        "RWrist",  # No.4
+        "LShoulder",
+        "LElbow",
+        "LWrist",  # No.7
+        "RHip",
+        "RKnee",
+        "RAnkle",  # No.10
+        "LHip",
+        "LKnee",
+        "LAnkle",  # No.13
+        "REye",
+        "LEye",
+        "REar",
+        "LEar",
+        "LToe",
+        "RToe",
+    ]
+    # kp2ds_body = (kp2ds.copy()[[0, 6, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 17, 20]] + \
+    #              kp2ds.copy()[[0, 5, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 18, 21]]) / 2
+    kp2ds = kp2ds.copy()
+    if not draw_head:
+        kp2ds[[0,14,15,16,17], 2] = 0
+    kp2ds_body = kp2ds
+    # kp2ds_lhand = kp2ds.copy()[91:112]
+    # kp2ds_rhand = kp2ds.copy()[112:133]
+    limbSeq = [
+        [2, 3],
+        [2, 6],  # shoulders
+        [3, 4],
+        [4, 5],  # left arm
+        [6, 7],
+        [7, 8],  # right arm
+        [2, 9],
+        [9, 10],
+        [10, 11],  # right leg
+        [2, 12],
+        [12, 13],
+        [13, 14],  # left leg
+        [2, 1],
+        [1, 15],
+        [15, 17],
+        [1, 16],
+        [16, 18],  # face (nose, eyes, ears)
+        [14, 19],
+        [11, 20],  # foot
+    ]
+    colors = [
+        [255, 0, 0],
+        [255, 85, 0],
+        [255, 170, 0],
+        [255, 255, 0],
+        [170, 255, 0],
+        [85, 255, 0],
+        [0, 255, 0],
+        [0, 255, 85],
+        [0, 255, 170],
+        [0, 255, 255],
+        [0, 170, 255],
+        [0, 85, 255],
+        [0, 0, 255],
+        [85, 0, 255],
+        [170, 0, 255],
+        [255, 0, 255],
+        [255, 0, 170],
+        [255, 0, 85],
+        # foot
+        [200, 200, 0],
+        [100, 100, 0],
+    ]
+    H, W, C = img.shape
+    H, W, C = img.shape
+    #if stickwidth_type == 'v1':
+    #    stickwidth = max(int(min(H, W) / 200), 1)
+    #elif stickwidth_type == 'v2':
+    if body_stick_width == -1:
+        stickwidth = max(int(min(H, W) / 200) - 1, 1)
+    else:
+        stickwidth = body_stick_width
+    for _idx, ((k1_index, k2_index), color) in enumerate(zip(limbSeq, colors)):
+        keypoint1 = kp2ds_body[k1_index - 1]
+        keypoint2 = kp2ds_body[k2_index - 1]
+        if keypoint1[-1] < threshold or keypoint2[-1] < threshold:
+            continue
+        Y = np.array([keypoint1[0], keypoint2[0]])
+        X = np.array([keypoint1[1], keypoint2[1]])
+        mX = np.mean(X)
+        mY = np.mean(Y)
+        length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+        angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+        polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+        cv2.fillConvexPoly(img, polygon, [int(float(c) * 0.6) for c in color])
+    for _idx, (keypoint, color) in enumerate(zip(kp2ds_body, colors)):
+        if keypoint[-1] < threshold:
+            continue
+        x, y = keypoint[0], keypoint[1]
+        # cv2.circle(canvas, (int(x), int(y)), 4, color, thickness=-1)
+        cv2.circle(img, (int(x), int(y)), stickwidth, color, thickness=-1)
+    if draw_hand:
+        img = draw_handpose_new(img, kp2ds_lhand, stickwidth_type=stickwidth_type, hand_score_th=threshold, hand_stick_width=hand_stick_width)
+        img = draw_handpose_new(img, kp2ds_rhand, stickwidth_type=stickwidth_type, hand_score_th=threshold, hand_stick_width=hand_stick_width)
+    kp2ds_body[:, 0] /= W
+    kp2ds_body[:, 1] /= H
+    if data_to_json is not None:
+        if idx == -1:
+            data_to_json.append(
+                {
+                    "image_id": "frame_{:05d}.jpg".format(len(data_to_json) + 1),
+                    "height": H,
+                    "width": W,
+                    "category_id": 1,
+                    "keypoints_body": kp2ds_body.tolist(),
+                    "keypoints_left_hand": kp2ds_lhand.tolist(),
+                    "keypoints_right_hand": kp2ds_rhand.tolist(),
+                }
+            )
+        else:
+            data_to_json[idx] = {
+                "image_id": "frame_{:05d}.jpg".format(idx + 1),
+                "height": H,
+                "width": W,
+                "category_id": 1,
+                "keypoints_body": kp2ds_body.tolist(),
+                "keypoints_left_hand": kp2ds_lhand.tolist(),
+                "keypoints_right_hand": kp2ds_rhand.tolist(),
+            }
+    return img
+def draw_bbox(img, bbox, color=(255, 0, 0)):
+    img = load_image(img)
+    bbox = [int(bbox_tmp) for bbox_tmp in bbox]
+    cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
+    return img
+def draw_kp2ds(img, kp2ds, threshold=0, color=(255, 0, 0), skeleton=None, reverse=False):
+    img = load_image(img, reverse)
+    if skeleton is not None:
+        if skeleton == "coco17":
+            skeleton_list = [
+                [6, 8],
+                [8, 10],
+                [5, 7],
+                [7, 9],
+                [11, 13],
+                [13, 15],
+                [12, 14],
+                [14, 16],
+                [5, 6],
+                [6, 12],
+                [12, 11],
+                [11, 5],
+            ]
+            color_list = [
+                (255, 0, 0),
+                (0, 255, 0),
+                (0, 0, 255),
+                (255, 255, 0),
+                (255, 0, 255),
+                (0, 255, 255),
+            ]
+        elif skeleton == "cocowholebody":
+            skeleton_list = [
+                [6, 8],
+                [8, 10],
+                [5, 7],
+                [7, 9],
+                [11, 13],
+                [13, 15],
+                [12, 14],
+                [14, 16],
+                [5, 6],
+                [6, 12],
+                [12, 11],
+                [11, 5],
+                [15, 17],
+                [15, 18],
+                [15, 19],
+                [16, 20],
+                [16, 21],
+                [16, 22],
+                [91, 92, 93, 94, 95],
+                [91, 96, 97, 98, 99],
+                [91, 100, 101, 102, 103],
+                [91, 104, 105, 106, 107],
+                [91, 108, 109, 110, 111],
+                [112, 113, 114, 115, 116],
+                [112, 117, 118, 119, 120],
+                [112, 121, 122, 123, 124],
+                [112, 125, 126, 127, 128],
+                [112, 129, 130, 131, 132],
+            ]
+            color_list = [
+                (255, 0, 0),
+                (0, 255, 0),
+                (0, 0, 255),
+                (255, 255, 0),
+                (255, 0, 255),
+                (0, 255, 255),
+            ]
+        else:
+            color_list = [color]
+        for _idx, _skeleton in enumerate(skeleton_list):
+            for i in range(len(_skeleton) - 1):
+                cv2.line(
+                    img,
+                    (int(kp2ds[_skeleton[i], 0]), int(kp2ds[_skeleton[i], 1])),
+                    (int(kp2ds[_skeleton[i + 1], 0]), int(kp2ds[_skeleton[i + 1], 1])),
+                    color_list[_idx % len(color_list)],
+                    3,
+                )
+    for _idx, kp2d in enumerate(kp2ds):
+        if kp2d[2] > threshold:
+            cv2.circle(img, (int(kp2d[0]), int(kp2d[1])), 3, color, -1)
+            # cv2.putText(img,
+            #         str(_idx),
+            #         (int(kp2d[0, i, 0])*1,
+            #             int(kp2d[0, i, 1])*1),
+            #         cv2.FONT_HERSHEY_SIMPLEX,
+            #         0.75,
+            #         color,
+            #         2
+            #         )
+    return img
+def draw_pcd(pcd_list, save_path=None):
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection="3d")
+    color_list = ["r", "g", "b", "y", "p"]
+    for _idx, _pcd in enumerate(pcd_list):
+        ax.scatter(_pcd[:, 0], _pcd[:, 1], _pcd[:, 2], c=color_list[_idx], marker="o")
+    ax.set_xlabel("X")
+    ax.set_ylabel("Y")
+    ax.set_zlabel("Z")
+    if save_path is not None:
+        plt.savefig(save_path)
+    else:
+        plt.savefig("tmp.png")
+def load_image(img, reverse=False):
+    if type(img) == str:
+        img = cv2.imread(img)
+    if reverse:
+        img = img.astype(np.float32)
+        img = img[:, :, ::-1]
+        img = img.astype(np.uint8)
+    return img
+def draw_skeleten(meta):
+    kps = []
+    for i, kp in enumerate(meta["keypoints_body"]):
+        if kp is None:
+            # if kp is None:
+            kps.append([0, 0, 0])
+        else:
+            kps.append([*kp, 1])
+    kps = np.array(kps)
+    kps[:, 0] *= meta["width"]
+    kps[:, 1] *= meta["height"]
+    pose_img = np.zeros([meta["height"], meta["width"], 3], dtype=np.uint8)
+    pose_img = draw_aapose(
+        pose_img,
+        kps,
+        draw_hand=True,
+        kp2ds_lhand=meta["keypoints_left_hand"],
+        kp2ds_rhand=meta["keypoints_right_hand"],
+    )
+    return pose_img
+def draw_skeleten_with_pncc(pncc: np.ndarray, meta: Dict) -> np.ndarray:
+    """
+    Args:
+        pncc: [H,W,3]
+        meta: required keys: keypoints_body: [N, 3] keypoints_left_hand, keypoints_right_hand
+    Return:
+        np.ndarray [H, W, 3]
+    """
+    # preprocess keypoints
+    kps = []
+    for i, kp in enumerate(meta["keypoints_body"]):
+        if kp is None:
+            # if kp is None:
+            kps.append([0, 0, 0])
+        elif i in [14, 15, 16, 17]:
+            kps.append([0, 0, 0])
+        else:
+            kps.append([*kp])
+    kps = np.stack(kps)
+    kps[:, 0] *= pncc.shape[1]
+    kps[:, 1] *= pncc.shape[0]
+    # draw neck
+    canvas = np.zeros_like(pncc)
+    if kps[0][2] > 0.6 and kps[1][2] > 0.6:
+        canvas = draw_ellipse_by_2kp(canvas, kps[0], kps[1], [0, 0, 255])
+    # draw pncc
+    mask = (pncc > 0).max(axis=2)
+    canvas[mask] = pncc[mask]
+    pncc = canvas
+    # draw other skeleten
+    kps[0] = 0
+    meta["keypoints_left_hand"][:, 0] *= meta["width"]
+    meta["keypoints_left_hand"][:, 1] *= meta["height"]
+    meta["keypoints_right_hand"][:, 0] *= meta["width"]
+    meta["keypoints_right_hand"][:, 1] *= meta["height"]
+    pose_img = draw_aapose(
+        pncc,
+        kps,
+        draw_hand=True,
+        kp2ds_lhand=meta["keypoints_left_hand"],
+        kp2ds_rhand=meta["keypoints_right_hand"],
+    )
+    return pose_img
+FACE_CUSTOM_STYLE = {
+    "eyeball": {"indexs": [68, 69], "color": [255, 255, 255], "connect": False},
+    "left_eyebrow": {"indexs": [17, 18, 19, 20, 21], "color": [0, 255, 0]},
+    "right_eyebrow": {"indexs": [22, 23, 24, 25, 26], "color": [0, 0, 255]},
+    "left_eye": {"indexs": [36, 37, 38, 39, 40, 41], "color": [255, 255, 0], "close": True},
+    "right_eye": {"indexs": [42, 43, 44, 45, 46, 47], "color": [255, 0, 255], "close": True},
+    "mouth_outside": {"indexs": list(range(48, 60)), "color": [100, 255, 50], "close": True},
+    "mouth_inside": {"indexs": [60, 61, 62, 63, 64, 65, 66, 67], "color": [255, 100, 50], "close": True},
+}
+def draw_face_kp(img, kps, thickness=2, style=FACE_CUSTOM_STYLE):
+    """
+    Args:
+        img: [H, W, 3]
+        kps: [70, 2]
+    """
+    img = img.copy()
+    for key, item in style.items():
+        pts = np.array(kps[item["indexs"]]).astype(np.int32)
+        connect = item.get("connect", True)
+        color = item["color"]
+        close = item.get("close", False)
+        if connect:
+            cv2.polylines(img, [pts], close, color, thickness=thickness)
+        else:
+            for kp in pts:
+                kp = np.array(kp).astype(np.int32)
+                cv2.circle(img, kp, thickness * 2, color=color, thickness=-1)
+    return img
+def draw_traj(metas: List[AAPoseMeta], threshold=0.6):
+    colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
+                [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
+                [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [100, 255, 50], [255, 100, 50],
+                # foot
+                [200, 200, 0],
+                [100, 100, 0]
+                ]
+    limbSeq = [
+                    [1, 2], [1, 5],     # shoulders
+                    [2, 3], [3, 4],     # left arm
+                    [5, 6], [6, 7],     # right arm
+                    [1, 8], [8, 9], [9, 10],    # right leg
+                    [1, 11], [11, 12], [12, 13],  # left leg
+                     # face (nose, eyes, ears)
+                    [13, 18], [10, 19] # foot
+                ]
+    face_seq = [[1, 0], [0, 14], [14, 16], [0, 15], [15, 17]]
+    kp_body = np.array([meta.kps_body for meta in metas])
+    kp_body_p = np.array([meta.kps_body_p for meta in metas])
+    face_seq = random.sample(face_seq, 2)
+    kp_lh = np.array([meta.kps_lhand for meta in metas])
+    kp_rh = np.array([meta.kps_rhand for meta in metas])
+    kp_lh_p = np.array([meta.kps_lhand_p for meta in metas])
+    kp_rh_p = np.array([meta.kps_rhand_p for meta in metas])
+    # kp_lh = np.concatenate([kp_lh, kp_lh_p], axis=-1)
+    # kp_rh = np.concatenate([kp_rh, kp_rh_p], axis=-1)
+    new_limbSeq = []
+    key_point_list = []
+    for _idx, ((k1_index, k2_index)) in enumerate(limbSeq):
+        vis = (kp_body_p[:, k1_index] > threshold) * (kp_body_p[:, k2_index] > threshold) * 1
+        if vis.sum() * 1.0 / vis.shape[0] > 0.4:
+            new_limbSeq.append([k1_index, k2_index])
+    for _idx, ((k1_index, k2_index)) in enumerate(limbSeq):
+        keypoint1 = kp_body[:, k1_index - 1]
+        keypoint2 = kp_body[:, k2_index - 1]
+        interleave = random.randint(4, 7)
+        randind = random.randint(0, interleave - 1)
+        # randind = random.rand(range(interleave), sampling_num)
+        Y = np.array([keypoint1[:, 0], keypoint2[:, 0]])
+        X = np.array([keypoint1[:, 1], keypoint2[:, 1]])
+        vis = (keypoint1[:, -1] > threshold) * (keypoint2[:, -1] > threshold) * 1
+        # for randidx in randind:
+        t = randind / interleave
+        x = (1-t)*Y[0, :] + t*Y[1, :]
+        y = (1-t)*X[0, :] + t*X[1, :]
+        # np.array([1])
+        x = x.astype(int)
+        y = y.astype(int)
+        new_array = np.array([x, y, vis]).T
+        key_point_list.append(new_array)
+    indx_lh = random.randint(0, kp_lh.shape[1] - 1)
+    lh = kp_lh[:, indx_lh, :]
+    lh_p = kp_lh_p[:, indx_lh:indx_lh+1]
+    lh = np.concatenate([lh, lh_p], axis=-1)
+    indx_rh = random.randint(0, kp_rh.shape[1] - 1)
+    rh = kp_rh[:, random.randint(0, kp_rh.shape[1] - 1), :]
+    rh_p = kp_rh_p[:, indx_rh:indx_rh+1]
+    rh = np.concatenate([rh, rh_p], axis=-1)
+    lh[-1, :] = (lh[-1, :] > threshold) * 1
+    rh[-1, :] = (rh[-1, :] > threshold) * 1
+    # print(rh.shape, new_array.shape)
+    # exit()
+    key_point_list.append(lh.astype(int))
+    key_point_list.append(rh.astype(int))
+    key_points_list = np.stack(key_point_list)
+    num_points = len(key_points_list)
+    sample_colors = random.sample(colors, num_points)
+    stickwidth = max(int(min(metas[0].width, metas[0].height) / 150), 2)
+    image_list_ori = []
+    for i in range(key_points_list.shape[-2]):
+        _image_vis = np.zeros((metas[0].width, metas[0].height, 3))
+        points = key_points_list[:, i, :]
+        for idx, point in enumerate(points):
+            x, y, vis = point
+            if vis == 1:
+                cv2.circle(_image_vis, (x, y), stickwidth, sample_colors[idx], thickness=-1)
+        image_list_ori.append(_image_vis)
+    return image_list_ori

ComfyUI-WanAnimatePreprocess/pose_utils/pose2d_utils.py ADDED Viewed

	@@ -0,0 +1,1110 @@

+# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
+import warnings
+import cv2
+import numpy as np
+from typing import List
+def box_convert_simple(box, convert_type='xyxy2xywh'):
+    if convert_type == 'xyxy2xywh':
+        return [box[0], box[1], box[2] - box[0], box[3] - box[1]]
+    elif convert_type == 'xywh2xyxy':
+        return [box[0], box[1], box[2] + box[0], box[3] + box[1]]
+    elif convert_type == 'xyxy2ctwh':
+        return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2, box[2] - box[0], box[3] - box[1]]
+    elif convert_type == 'ctwh2xyxy':
+        return [box[0] - box[2] // 2, box[1] - box[3] // 2, box[0] + (box[2] - box[2] // 2), box[1] + (box[3] - box[3] // 2)]
+class AAPoseMeta:
+    def __init__(self, meta=None, kp2ds=None):
+        self.image_id = ""
+        self.height = 0
+        self.width = 0
+        self.kps_body: np.ndarray = None
+        self.kps_lhand: np.ndarray = None
+        self.kps_rhand: np.ndarray = None
+        self.kps_face: np.ndarray = None
+        self.kps_body_p: np.ndarray = None
+        self.kps_lhand_p: np.ndarray = None
+        self.kps_rhand_p: np.ndarray = None
+        self.kps_face_p: np.ndarray = None
+        if meta is not None:
+            self.load_from_meta(meta)
+        elif kp2ds is not None:
+            self.load_from_kp2ds(kp2ds)
+    def is_valid(self, kp, p, threshold):
+        x, y = kp
+        if x < 0 or y < 0 or x > self.width or y > self.height or p < threshold:
+            return False
+        else:
+            return True
+    def get_bbox(self, kp, kp_p, threshold=0.5):
+        kps = kp[kp_p > threshold]
+        if kps.size == 0:
+            return 0, 0, 0, 0
+        x0, y0 = kps.min(axis=0)
+        x1, y1 = kps.max(axis=0)
+        return x0, y0, x1, y1
+    def crop(self, x0, y0, x1, y1):
+        all_kps = [self.kps_body, self.kps_lhand, self.kps_rhand, self.kps_face]
+        for kps in all_kps:
+            if kps is not None:
+                kps[:, 0] -= x0
+                kps[:, 1] -= y0
+        self.width = x1 - x0
+        self.height = y1 - y0
+        return self
+    def resize(self, width, height):
+        scale_x = width / self.width
+        scale_y = height / self.height
+        all_kps = [self.kps_body, self.kps_lhand, self.kps_rhand, self.kps_face]
+        for kps in all_kps:
+            if kps is not None:
+                kps[:, 0] *= scale_x
+                kps[:, 1] *= scale_y
+        self.width = width
+        self.height = height
+        return self
+    def get_kps_body_with_p(self, normalize=False):
+        kps_body = self.kps_body.copy()
+        if normalize:
+            kps_body = kps_body / np.array([self.width, self.height])
+        return np.concatenate([kps_body, self.kps_body_p[:, None]])
+    @staticmethod
+    def from_kps_face(kps_face: np.ndarray, height: int, width: int):
+        pose_meta = AAPoseMeta()
+        pose_meta.kps_face = kps_face[:, :2]
+        if kps_face.shape[1] == 3:
+            pose_meta.kps_face_p = kps_face[:, 2]
+        else:
+            pose_meta.kps_face_p = kps_face[:, 0] * 0 + 1
+        pose_meta.height = height
+        pose_meta.width = width
+        return pose_meta
+    @staticmethod
+    def from_kps_body(kps_body: np.ndarray, height: int, width: int):
+        pose_meta = AAPoseMeta()
+        pose_meta.kps_body = kps_body[:, :2]
+        pose_meta.kps_body_p = kps_body[:, 2]
+        pose_meta.height = height
+        pose_meta.width = width
+        return pose_meta
+    @staticmethod
+    def from_humanapi_meta(meta):
+        pose_meta = AAPoseMeta()
+        width, height = meta["width"], meta["height"]
+        pose_meta.width = width
+        pose_meta.height = height
+        pose_meta.kps_body = meta["keypoints_body"][:, :2] * (width, height)
+        pose_meta.kps_body_p = meta["keypoints_body"][:, 2]
+        pose_meta.kps_lhand = meta["keypoints_left_hand"][:, :2] * (width, height)
+        pose_meta.kps_lhand_p = meta["keypoints_left_hand"][:, 2]
+        pose_meta.kps_rhand = meta["keypoints_right_hand"][:, :2] * (width, height)
+        pose_meta.kps_rhand_p = meta["keypoints_right_hand"][:, 2]
+        if 'keypoints_face' in meta:
+            pose_meta.kps_face = meta["keypoints_face"][:, :2] * (width, height)
+            pose_meta.kps_face_p = meta["keypoints_face"][:, 2]
+        return pose_meta
+    def load_from_meta(self, meta, norm_body=True, norm_hand=False):
+        self.image_id = meta.get("image_id", "00000.png")
+        self.height = meta["height"]
+        self.width = meta["width"]
+        kps_body_p = []
+        kps_body = []
+        for kp in meta["keypoints_body"]:
+            if kp is None:
+                kps_body.append([0, 0])
+                kps_body_p.append(0)
+            else:
+                kps_body.append(kp)
+                kps_body_p.append(1)
+        self.kps_body = np.array(kps_body)
+        self.kps_body[:, 0] *= self.width
+        self.kps_body[:, 1] *= self.height
+        self.kps_body_p = np.array(kps_body_p)
+        self.kps_lhand = np.array(meta["keypoints_left_hand"])[:, :2]
+        self.kps_lhand_p = np.array(meta["keypoints_left_hand"])[:, 2]
+        self.kps_rhand = np.array(meta["keypoints_right_hand"])[:, :2]
+        self.kps_rhand_p = np.array(meta["keypoints_right_hand"])[:, 2]
+    @staticmethod
+    def load_from_kp2ds(kp2ds: List[np.ndarray], width: int, height: int):
+        """input 133x3 numpy keypoints and output AAPoseMeta
+        Args:
+            kp2ds (List[np.ndarray]): _description_
+            width (int): _description_
+            height (int): _description_
+        Returns:
+            _type_: _description_
+        """
+        pose_meta = AAPoseMeta()
+        pose_meta.width = width
+        pose_meta.height = height
+        kps_body = (kp2ds[[0, 6, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 17, 20]] + kp2ds[[0, 5, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 18, 21]]) / 2
+        kps_lhand = kp2ds[91:112]
+        kps_rhand = kp2ds[112:133]
+        kps_face = np.concatenate([kp2ds[23:23+68], kp2ds[1:3]], axis=0)
+        pose_meta.kps_body = kps_body[:, :2]
+        pose_meta.kps_body_p = kps_body[:, 2]
+        pose_meta.kps_lhand = kps_lhand[:, :2]
+        pose_meta.kps_lhand_p = kps_lhand[:, 2]
+        pose_meta.kps_rhand = kps_rhand[:, :2]
+        pose_meta.kps_rhand_p = kps_rhand[:, 2]
+        pose_meta.kps_face = kps_face[:, :2]
+        pose_meta.kps_face_p = kps_face[:, 2]
+        return pose_meta
+    @staticmethod
+    def from_dwpose(dwpose_det_res, height, width):
+        pose_meta = AAPoseMeta()
+        pose_meta.kps_body = dwpose_det_res["bodies"]["candidate"]
+        pose_meta.kps_body_p = dwpose_det_res["bodies"]["score"]
+        pose_meta.kps_body[:, 0] *= width
+        pose_meta.kps_body[:, 1] *= height
+        pose_meta.kps_lhand, pose_meta.kps_rhand = dwpose_det_res["hands"]
+        pose_meta.kps_lhand[:, 0] *= width
+        pose_meta.kps_lhand[:, 1] *= height
+        pose_meta.kps_rhand[:, 0] *= width
+        pose_meta.kps_rhand[:, 1] *= height
+        pose_meta.kps_lhand_p, pose_meta.kps_rhand_p = dwpose_det_res["hands_score"]
+        pose_meta.kps_face = dwpose_det_res["faces"][0]
+        pose_meta.kps_face[:, 0] *= width
+        pose_meta.kps_face[:, 1] *= height
+        pose_meta.kps_face_p = dwpose_det_res["faces_score"][0]
+        return pose_meta
+    def save_json(self):
+        pass
+    def draw_aapose(self, img, threshold=0.5, stick_width_norm=200, draw_hand=True, draw_head=True):
+        from .human_visualization import draw_aapose_by_meta
+        return draw_aapose_by_meta(img, self, threshold, stick_width_norm, draw_hand, draw_head)
+    def translate(self, x0, y0):
+        all_kps = [self.kps_body, self.kps_lhand, self.kps_rhand, self.kps_face]
+        for kps in all_kps:
+            if kps is not None:
+                kps[:, 0] -= x0
+                kps[:, 1] -= y0
+    def scale(self, sx, sy):
+        all_kps = [self.kps_body, self.kps_lhand, self.kps_rhand, self.kps_face]
+        for kps in all_kps:
+            if kps is not None:
+                kps[:, 0] *= sx
+                kps[:, 1] *= sy
+    def padding_resize2(self, height=512, width=512):
+        """kps will be changed inplace
+        """
+        all_kps = [self.kps_body, self.kps_lhand, self.kps_rhand, self.kps_face]
+        ori_height, ori_width = self.height, self.width
+        if (ori_height / ori_width) > (height / width):
+            new_width = int(height / ori_height * ori_width)
+            padding = int((width - new_width) / 2)
+            padding_width = padding
+            padding_height = 0
+            scale = height / ori_height
+            for kps in all_kps:
+                if kps is not None:
+                    kps[:, 0] = kps[:, 0] * scale + padding
+                    kps[:, 1] = kps[:, 1] * scale
+        else:
+            new_height = int(width / ori_width * ori_height)
+            padding = int((height - new_height) / 2)
+            padding_width = 0
+            padding_height = padding
+            scale = width / ori_width
+            for kps in all_kps:
+                if kps is not None:
+                    kps[:, 1] = kps[:, 1] * scale + padding
+                    kps[:, 0] = kps[:, 0] * scale
+        self.width = width
+        self.height = height
+        return self
+def transform_preds(coords, center, scale, output_size, use_udp=False):
+    """Get final keypoint predictions from heatmaps and apply scaling and
+    translation to map them back to the image.
+    Note:
+        num_keypoints: K
+    Args:
+        coords (np.ndarray[K, ndims]):
+            * If ndims=2, corrds are predicted keypoint location.
+            * If ndims=4, corrds are composed of (x, y, scores, tags)
+            * If ndims=5, corrds are composed of (x, y, scores, tags,
+              flipped_tags)
+        center (np.ndarray[2, ]): Center of the bounding box (x, y).
+        scale (np.ndarray[2, ]): Scale of the bounding box
+            wrt [width, height].
+        output_size (np.ndarray[2, ] | list(2,)): Size of the
+            destination heatmaps.
+        use_udp (bool): Use unbiased data processing
+    Returns:
+        np.ndarray: Predicted coordinates in the images.
+    """
+    assert coords.shape[1] in (2, 4, 5)
+    assert len(center) == 2
+    assert len(scale) == 2
+    assert len(output_size) == 2
+    # Recover the scale which is normalized by a factor of 200.
+    # scale = scale * 200.0
+    if use_udp:
+        scale_x = scale[0] / (output_size[0] - 1.0)
+        scale_y = scale[1] / (output_size[1] - 1.0)
+    else:
+        scale_x = scale[0] / output_size[0]
+        scale_y = scale[1] / output_size[1]
+    target_coords = np.ones_like(coords)
+    target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[0] * 0.5
+    target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[1] * 0.5
+    return target_coords
+def _calc_distances(preds, targets, mask, normalize):
+    """Calculate the normalized distances between preds and target.
+    Note:
+        batch_size: N
+        num_keypoints: K
+        dimension of keypoints: D (normally, D=2 or D=3)
+    Args:
+        preds (np.ndarray[N, K, D]): Predicted keypoint location.
+        targets (np.ndarray[N, K, D]): Groundtruth keypoint location.
+        mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+            joints, and True for visible. Invisible joints will be ignored for
+            accuracy calculation.
+        normalize (np.ndarray[N, D]): Typical value is heatmap_size
+    Returns:
+        np.ndarray[K, N]: The normalized distances. \
+            If target keypoints are missing, the distance is -1.
+    """
+    N, K, _ = preds.shape
+    # set mask=0 when normalize==0
+    _mask = mask.copy()
+    _mask[np.where((normalize == 0).sum(1))[0], :] = False
+    distances = np.full((N, K), -1, dtype=np.float32)
+    # handle invalid values
+    normalize[np.where(normalize <= 0)] = 1e6
+    distances[_mask] = np.linalg.norm(
+        ((preds - targets) / normalize[:, None, :])[_mask], axis=-1)
+    return distances.T
+def _distance_acc(distances, thr=0.5):
+    """Return the percentage below the distance threshold, while ignoring
+    distances values with -1.
+    Note:
+        batch_size: N
+    Args:
+        distances (np.ndarray[N, ]): The normalized distances.
+        thr (float): Threshold of the distances.
+    Returns:
+        float: Percentage of distances below the threshold. \
+            If all target keypoints are missing, return -1.
+    """
+    distance_valid = distances != -1
+    num_distance_valid = distance_valid.sum()
+    if num_distance_valid > 0:
+        return (distances[distance_valid] < thr).sum() / num_distance_valid
+    return -1
+def _get_max_preds(heatmaps):
+    """Get keypoint predictions from score maps.
+    Note:
+        batch_size: N
+        num_keypoints: K
+        heatmap height: H
+        heatmap width: W
+    Args:
+        heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
+    Returns:
+        tuple: A tuple containing aggregated results.
+        - preds (np.ndarray[N, K, 2]): Predicted keypoint location.
+        - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+    """
+    assert isinstance(heatmaps,
+                      np.ndarray), ('heatmaps should be numpy.ndarray')
+    assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
+    N, K, _, W = heatmaps.shape
+    heatmaps_reshaped = heatmaps.reshape((N, K, -1))
+    idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))
+    maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))
+    preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
+    preds[:, :, 0] = preds[:, :, 0] % W
+    preds[:, :, 1] = preds[:, :, 1] // W
+    preds = np.where(np.tile(maxvals, (1, 1, 2)) > 0.0, preds, -1)
+    return preds, maxvals
+def _get_max_preds_3d(heatmaps):
+    """Get keypoint predictions from 3D score maps.
+    Note:
+        batch size: N
+        num keypoints: K
+        heatmap depth size: D
+        heatmap height: H
+        heatmap width: W
+    Args:
+        heatmaps (np.ndarray[N, K, D, H, W]): model predicted heatmaps.
+    Returns:
+        tuple: A tuple containing aggregated results.
+        - preds (np.ndarray[N, K, 3]): Predicted keypoint location.
+        - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+    """
+    assert isinstance(heatmaps, np.ndarray), \
+        ('heatmaps should be numpy.ndarray')
+    assert heatmaps.ndim == 5, 'heatmaps should be 5-ndim'
+    N, K, D, H, W = heatmaps.shape
+    heatmaps_reshaped = heatmaps.reshape((N, K, -1))
+    idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))
+    maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))
+    preds = np.zeros((N, K, 3), dtype=np.float32)
+    _idx = idx[..., 0]
+    preds[..., 2] = _idx // (H * W)
+    preds[..., 1] = (_idx // W) % H
+    preds[..., 0] = _idx % W
+    preds = np.where(maxvals > 0.0, preds, -1)
+    return preds, maxvals
+def pose_pck_accuracy(output, target, mask, thr=0.05, normalize=None):
+    """Calculate the pose accuracy of PCK for each individual keypoint and the
+    averaged accuracy across all keypoints from heatmaps.
+    Note:
+        PCK metric measures accuracy of the localization of the body joints.
+        The distances between predicted positions and the ground-truth ones
+        are typically normalized by the bounding box size.
+        The threshold (thr) of the normalized distance is commonly set
+        as 0.05, 0.1 or 0.2 etc.
+        - batch_size: N
+        - num_keypoints: K
+        - heatmap height: H
+        - heatmap width: W
+    Args:
+        output (np.ndarray[N, K, H, W]): Model output heatmaps.
+        target (np.ndarray[N, K, H, W]): Groundtruth heatmaps.
+        mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+            joints, and True for visible. Invisible joints will be ignored for
+            accuracy calculation.
+        thr (float): Threshold of PCK calculation. Default 0.05.
+        normalize (np.ndarray[N, 2]): Normalization factor for H&W.
+    Returns:
+        tuple: A tuple containing keypoint accuracy.
+        - np.ndarray[K]: Accuracy of each keypoint.
+        - float: Averaged accuracy across all keypoints.
+        - int: Number of valid keypoints.
+    """
+    N, K, H, W = output.shape
+    if K == 0:
+        return None, 0, 0
+    if normalize is None:
+        normalize = np.tile(np.array([[H, W]]), (N, 1))
+    pred, _ = _get_max_preds(output)
+    gt, _ = _get_max_preds(target)
+    return keypoint_pck_accuracy(pred, gt, mask, thr, normalize)
+def keypoint_pck_accuracy(pred, gt, mask, thr, normalize):
+    """Calculate the pose accuracy of PCK for each individual keypoint and the
+    averaged accuracy across all keypoints for coordinates.
+    Note:
+        PCK metric measures accuracy of the localization of the body joints.
+        The distances between predicted positions and the ground-truth ones
+        are typically normalized by the bounding box size.
+        The threshold (thr) of the normalized distance is commonly set
+        as 0.05, 0.1 or 0.2 etc.
+        - batch_size: N
+        - num_keypoints: K
+    Args:
+        pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+        mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+            joints, and True for visible. Invisible joints will be ignored for
+            accuracy calculation.
+        thr (float): Threshold of PCK calculation.
+        normalize (np.ndarray[N, 2]): Normalization factor for H&W.
+    Returns:
+        tuple: A tuple containing keypoint accuracy.
+        - acc (np.ndarray[K]): Accuracy of each keypoint.
+        - avg_acc (float): Averaged accuracy across all keypoints.
+        - cnt (int): Number of valid keypoints.
+    """
+    distances = _calc_distances(pred, gt, mask, normalize)
+    acc = np.array([_distance_acc(d, thr) for d in distances])
+    valid_acc = acc[acc >= 0]
+    cnt = len(valid_acc)
+    avg_acc = valid_acc.mean() if cnt > 0 else 0
+    return acc, avg_acc, cnt
+def keypoint_auc(pred, gt, mask, normalize, num_step=20):
+    """Calculate the pose accuracy of PCK for each individual keypoint and the
+    averaged accuracy across all keypoints for coordinates.
+    Note:
+        - batch_size: N
+        - num_keypoints: K
+    Args:
+        pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+        mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+            joints, and True for visible. Invisible joints will be ignored for
+            accuracy calculation.
+        normalize (float): Normalization factor.
+    Returns:
+        float: Area under curve.
+    """
+    nor = np.tile(np.array([[normalize, normalize]]), (pred.shape[0], 1))
+    x = [1.0 * i / num_step for i in range(num_step)]
+    y = []
+    for thr in x:
+        _, avg_acc, _ = keypoint_pck_accuracy(pred, gt, mask, thr, nor)
+        y.append(avg_acc)
+    auc = 0
+    for i in range(num_step):
+        auc += 1.0 / num_step * y[i]
+    return auc
+def keypoint_nme(pred, gt, mask, normalize_factor):
+    """Calculate the normalized mean error (NME).
+    Note:
+        - batch_size: N
+        - num_keypoints: K
+    Args:
+        pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+        mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+            joints, and True for visible. Invisible joints will be ignored for
+            accuracy calculation.
+        normalize_factor (np.ndarray[N, 2]): Normalization factor.
+    Returns:
+        float: normalized mean error
+    """
+    distances = _calc_distances(pred, gt, mask, normalize_factor)
+    distance_valid = distances[distances != -1]
+    return distance_valid.sum() / max(1, len(distance_valid))
+def keypoint_epe(pred, gt, mask):
+    """Calculate the end-point error.
+    Note:
+        - batch_size: N
+        - num_keypoints: K
+    Args:
+        pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+        mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+            joints, and True for visible. Invisible joints will be ignored for
+            accuracy calculation.
+    Returns:
+        float: Average end-point error.
+    """
+    distances = _calc_distances(
+        pred, gt, mask,
+        np.ones((pred.shape[0], pred.shape[2]), dtype=np.float32))
+    distance_valid = distances[distances != -1]
+    return distance_valid.sum() / max(1, len(distance_valid))
+def _taylor(heatmap, coord):
+    """Distribution aware coordinate decoding method.
+    Note:
+        - heatmap height: H
+        - heatmap width: W
+    Args:
+        heatmap (np.ndarray[H, W]): Heatmap of a particular joint type.
+        coord (np.ndarray[2,]): Coordinates of the predicted keypoints.
+    Returns:
+        np.ndarray[2,]: Updated coordinates.
+    """
+    H, W = heatmap.shape[:2]
+    px, py = int(coord[0]), int(coord[1])
+    if 1 < px < W - 2 and 1 < py < H - 2:
+        dx = 0.5 * (heatmap[py][px + 1] - heatmap[py][px - 1])
+        dy = 0.5 * (heatmap[py + 1][px] - heatmap[py - 1][px])
+        dxx = 0.25 * (
+            heatmap[py][px + 2] - 2 * heatmap[py][px] + heatmap[py][px - 2])
+        dxy = 0.25 * (
+            heatmap[py + 1][px + 1] - heatmap[py - 1][px + 1] -
+            heatmap[py + 1][px - 1] + heatmap[py - 1][px - 1])
+        dyy = 0.25 * (
+            heatmap[py + 2 * 1][px] - 2 * heatmap[py][px] +
+            heatmap[py - 2 * 1][px])
+        derivative = np.array([[dx], [dy]])
+        hessian = np.array([[dxx, dxy], [dxy, dyy]])
+        if dxx * dyy - dxy**2 != 0:
+            hessianinv = np.linalg.inv(hessian)
+            offset = -hessianinv @ derivative
+            offset = np.squeeze(np.array(offset.T), axis=0)
+            coord += offset
+    return coord
+def post_dark_udp(coords, batch_heatmaps, kernel=3):
+    """DARK post-pocessing. Implemented by udp. Paper ref: Huang et al. The
+    Devil is in the Details: Delving into Unbiased Data Processing for Human
+    Pose Estimation (CVPR 2020). Zhang et al. Distribution-Aware Coordinate
+    Representation for Human Pose Estimation (CVPR 2020).
+    Note:
+        - batch size: B
+        - num keypoints: K
+        - num persons: N
+        - height of heatmaps: H
+        - width of heatmaps: W
+        B=1 for bottom_up paradigm where all persons share the same heatmap.
+        B=N for top_down paradigm where each person has its own heatmaps.
+    Args:
+        coords (np.ndarray[N, K, 2]): Initial coordinates of human pose.
+        batch_heatmaps (np.ndarray[B, K, H, W]): batch_heatmaps
+        kernel (int): Gaussian kernel size (K) for modulation.
+    Returns:
+        np.ndarray([N, K, 2]): Refined coordinates.
+    """
+    if not isinstance(batch_heatmaps, np.ndarray):
+        batch_heatmaps = batch_heatmaps.cpu().numpy()
+    B, K, H, W = batch_heatmaps.shape
+    N = coords.shape[0]
+    assert (B == 1 or B == N)
+    for heatmaps in batch_heatmaps:
+        for heatmap in heatmaps:
+            cv2.GaussianBlur(heatmap, (kernel, kernel), 0, heatmap)
+    np.clip(batch_heatmaps, 0.001, 50, batch_heatmaps)
+    np.log(batch_heatmaps, batch_heatmaps)
+    batch_heatmaps_pad = np.pad(
+        batch_heatmaps, ((0, 0), (0, 0), (1, 1), (1, 1)),
+        mode='edge').flatten()
+    index = coords[..., 0] + 1 + (coords[..., 1] + 1) * (W + 2)
+    index += (W + 2) * (H + 2) * np.arange(0, B * K).reshape(-1, K)
+    index = index.astype(int).reshape(-1, 1)
+    i_ = batch_heatmaps_pad[index]
+    ix1 = batch_heatmaps_pad[index + 1]
+    iy1 = batch_heatmaps_pad[index + W + 2]
+    ix1y1 = batch_heatmaps_pad[index + W + 3]
+    ix1_y1_ = batch_heatmaps_pad[index - W - 3]
+    ix1_ = batch_heatmaps_pad[index - 1]
+    iy1_ = batch_heatmaps_pad[index - 2 - W]
+    dx = 0.5 * (ix1 - ix1_)
+    dy = 0.5 * (iy1 - iy1_)
+    derivative = np.concatenate([dx, dy], axis=1)
+    derivative = derivative.reshape(N, K, 2, 1)
+    dxx = ix1 - 2 * i_ + ix1_
+    dyy = iy1 - 2 * i_ + iy1_
+    dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
+    hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1)
+    hessian = hessian.reshape(N, K, 2, 2)
+    hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
+    coords -= np.einsum('ijmn,ijnk->ijmk', hessian, derivative).squeeze()
+    return coords
+def _gaussian_blur(heatmaps, kernel=11):
+    """Modulate heatmap distribution with Gaussian.
+     sigma = 0.3*((kernel_size-1)*0.5-1)+0.8
+     sigma~=3 if k=17
+     sigma=2 if k=11;
+     sigma~=1.5 if k=7;
+     sigma~=1 if k=3;
+    Note:
+        - batch_size: N
+        - num_keypoints: K
+        - heatmap height: H
+        - heatmap width: W
+    Args:
+        heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
+        kernel (int): Gaussian kernel size (K) for modulation, which should
+            match the heatmap gaussian sigma when training.
+            K=17 for sigma=3 and k=11 for sigma=2.
+    Returns:
+        np.ndarray ([N, K, H, W]): Modulated heatmap distribution.
+    """
+    assert kernel % 2 == 1
+    border = (kernel - 1) // 2
+    batch_size = heatmaps.shape[0]
+    num_joints = heatmaps.shape[1]
+    height = heatmaps.shape[2]
+    width = heatmaps.shape[3]
+    for i in range(batch_size):
+        for j in range(num_joints):
+            origin_max = np.max(heatmaps[i, j])
+            dr = np.zeros((height + 2 * border, width + 2 * border),
+                          dtype=np.float32)
+            dr[border:-border, border:-border] = heatmaps[i, j].copy()
+            dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
+            heatmaps[i, j] = dr[border:-border, border:-border].copy()
+            heatmaps[i, j] *= origin_max / np.max(heatmaps[i, j])
+    return heatmaps
+def keypoints_from_regression(regression_preds, center, scale, img_size):
+    """Get final keypoint predictions from regression vectors and transform
+    them back to the image.
+    Note:
+        - batch_size: N
+        - num_keypoints: K
+    Args:
+        regression_preds (np.ndarray[N, K, 2]): model prediction.
+        center (np.ndarray[N, 2]): Center of the bounding box (x, y).
+        scale (np.ndarray[N, 2]): Scale of the bounding box
+            wrt height/width.
+        img_size (list(img_width, img_height)): model input image size.
+    Returns:
+        tuple:
+        - preds (np.ndarray[N, K, 2]): Predicted keypoint location in images.
+        - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+    """
+    N, K, _ = regression_preds.shape
+    preds, maxvals = regression_preds, np.ones((N, K, 1), dtype=np.float32)
+    preds = preds * img_size
+    # Transform back to the image
+    for i in range(N):
+        preds[i] = transform_preds(preds[i], center[i], scale[i], img_size)
+    return preds, maxvals
+def keypoints_from_heatmaps(heatmaps,
+                            center,
+                            scale,
+                            unbiased=False,
+                            post_process='default',
+                            kernel=11,
+                            valid_radius_factor=0.0546875,
+                            use_udp=False,
+                            target_type='GaussianHeatmap'):
+    """Get final keypoint predictions from heatmaps and transform them back to
+    the image.
+    Note:
+        - batch size: N
+        - num keypoints: K
+        - heatmap height: H
+        - heatmap width: W
+    Args:
+        heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
+        center (np.ndarray[N, 2]): Center of the bounding box (x, y).
+        scale (np.ndarray[N, 2]): Scale of the bounding box
+            wrt height/width.
+        post_process (str/None): Choice of methods to post-process
+            heatmaps. Currently supported: None, 'default', 'unbiased',
+            'megvii'.
+        unbiased (bool): Option to use unbiased decoding. Mutually
+            exclusive with megvii.
+            Note: this arg is deprecated and unbiased=True can be replaced
+            by post_process='unbiased'
+            Paper ref: Zhang et al. Distribution-Aware Coordinate
+            Representation for Human Pose Estimation (CVPR 2020).
+        kernel (int): Gaussian kernel size (K) for modulation, which should
+            match the heatmap gaussian sigma when training.
+            K=17 for sigma=3 and k=11 for sigma=2.
+        valid_radius_factor (float): The radius factor of the positive area
+            in classification heatmap for UDP.
+        use_udp (bool): Use unbiased data processing.
+        target_type (str): 'GaussianHeatmap' or 'CombinedTarget'.
+            GaussianHeatmap: Classification target with gaussian distribution.
+            CombinedTarget: The combination of classification target
+            (response map) and regression target (offset map).
+            Paper ref: Huang et al. The Devil is in the Details: Delving into
+            Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
+    Returns:
+        tuple: A tuple containing keypoint predictions and scores.
+        - preds (np.ndarray[N, K, 2]): Predicted keypoint location in images.
+        - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+    """
+    # Avoid being affected
+    heatmaps = heatmaps.copy()
+    # detect conflicts
+    if unbiased:
+        assert post_process not in [False, None, 'megvii']
+    if post_process in ['megvii', 'unbiased']:
+        assert kernel > 0
+    if use_udp:
+        assert not post_process == 'megvii'
+    # normalize configs
+    if post_process is False:
+        warnings.warn(
+            'post_process=False is deprecated, '
+            'please use post_process=None instead', DeprecationWarning)
+        post_process = None
+    elif post_process is True:
+        if unbiased is True:
+            warnings.warn(
+                'post_process=True, unbiased=True is deprecated,'
+                " please use post_process='unbiased' instead",
+                DeprecationWarning)
+            post_process = 'unbiased'
+        else:
+            warnings.warn(
+                'post_process=True, unbiased=False is deprecated, '
+                "please use post_process='default' instead",
+                DeprecationWarning)
+            post_process = 'default'
+    elif post_process == 'default':
+        if unbiased is True:
+            warnings.warn(
+                'unbiased=True is deprecated, please use '
+                "post_process='unbiased' instead", DeprecationWarning)
+            post_process = 'unbiased'
+    # start processing
+    if post_process == 'megvii':
+        heatmaps = _gaussian_blur(heatmaps, kernel=kernel)
+    N, K, H, W = heatmaps.shape
+    if use_udp:
+        if target_type.lower() == 'GaussianHeatMap'.lower():
+            preds, maxvals = _get_max_preds(heatmaps)
+            preds = post_dark_udp(preds, heatmaps, kernel=kernel)
+        elif target_type.lower() == 'CombinedTarget'.lower():
+            for person_heatmaps in heatmaps:
+                for i, heatmap in enumerate(person_heatmaps):
+                    kt = 2 * kernel + 1 if i % 3 == 0 else kernel
+                    cv2.GaussianBlur(heatmap, (kt, kt), 0, heatmap)
+            # valid radius is in direct proportion to the height of heatmap.
+            valid_radius = valid_radius_factor * H
+            offset_x = heatmaps[:, 1::3, :].flatten() * valid_radius
+            offset_y = heatmaps[:, 2::3, :].flatten() * valid_radius
+            heatmaps = heatmaps[:, ::3, :]
+            preds, maxvals = _get_max_preds(heatmaps)
+            index = preds[..., 0] + preds[..., 1] * W
+            index += W * H * np.arange(0, N * K / 3)
+            index = index.astype(int).reshape(N, K // 3, 1)
+            preds += np.concatenate((offset_x[index], offset_y[index]), axis=2)
+        else:
+            raise ValueError('target_type should be either '
+                             "'GaussianHeatmap' or 'CombinedTarget'")
+    else:
+        preds, maxvals = _get_max_preds(heatmaps)
+        if post_process == 'unbiased':  # alleviate biased coordinate
+            # apply Gaussian distribution modulation.
+            heatmaps = np.log(
+                np.maximum(_gaussian_blur(heatmaps, kernel), 1e-10))
+            for n in range(N):
+                for k in range(K):
+                    preds[n][k] = _taylor(heatmaps[n][k], preds[n][k])
+        elif post_process is not None:
+            # add +/-0.25 shift to the predicted locations for higher acc.
+            for n in range(N):
+                for k in range(K):
+                    heatmap = heatmaps[n][k]
+                    px = int(preds[n][k][0])
+                    py = int(preds[n][k][1])
+                    if 1 < px < W - 1 and 1 < py < H - 1:
+                        diff = np.array([
+                            heatmap[py][px + 1] - heatmap[py][px - 1],
+                            heatmap[py + 1][px] - heatmap[py - 1][px]
+                        ])
+                        preds[n][k] += np.sign(diff) * .25
+                        if post_process == 'megvii':
+                            preds[n][k] += 0.5
+    # Transform back to the image
+    for i in range(N):
+        preds[i] = transform_preds(
+            preds[i], center[i], scale[i], [W, H], use_udp=use_udp)
+    if post_process == 'megvii':
+        maxvals = maxvals / 255.0 + 0.5
+    return preds, maxvals
+def keypoints_from_heatmaps3d(heatmaps, center, scale):
+    """Get final keypoint predictions from 3d heatmaps and transform them back
+    to the image.
+    Note:
+        - batch size: N
+        - num keypoints: K
+        - heatmap depth size: D
+        - heatmap height: H
+        - heatmap width: W
+    Args:
+        heatmaps (np.ndarray[N, K, D, H, W]): model predicted heatmaps.
+        center (np.ndarray[N, 2]): Center of the bounding box (x, y).
+        scale (np.ndarray[N, 2]): Scale of the bounding box
+            wrt height/width.
+    Returns:
+        tuple: A tuple containing keypoint predictions and scores.
+        - preds (np.ndarray[N, K, 3]): Predicted 3d keypoint location \
+            in images.
+        - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
+    """
+    N, K, D, H, W = heatmaps.shape
+    preds, maxvals = _get_max_preds_3d(heatmaps)
+    # Transform back to the image
+    for i in range(N):
+        preds[i, :, :2] = transform_preds(preds[i, :, :2], center[i], scale[i],
+                                          [W, H])
+    return preds, maxvals
+def multilabel_classification_accuracy(pred, gt, mask, thr=0.5):
+    """Get multi-label classification accuracy.
+    Note:
+        - batch size: N
+        - label number: L
+    Args:
+        pred (np.ndarray[N, L, 2]): model predicted labels.
+        gt (np.ndarray[N, L, 2]): ground-truth labels.
+        mask (np.ndarray[N, 1] or np.ndarray[N, L] ): reliability of
+        ground-truth labels.
+    Returns:
+        float: multi-label classification accuracy.
+    """
+    # we only compute accuracy on the samples with ground-truth of all labels.
+    valid = (mask > 0).min(axis=1) if mask.ndim == 2 else (mask > 0)
+    pred, gt = pred[valid], gt[valid]
+    if pred.shape[0] == 0:
+        acc = 0.0  # when no sample is with gt labels, set acc to 0.
+    else:
+        # The classification of a sample is regarded as correct
+        # only if it's correct for all labels.
+        acc = (((pred - thr) * (gt - thr)) > 0).all(axis=1).mean()
+    return acc
+def get_transform(center, scale, res, rot=0):
+    """Generate transformation matrix."""
+    # res: (height, width), (rows, cols)
+    crop_aspect_ratio = res[0] / float(res[1])
+    h = 200 * scale
+    w = h / crop_aspect_ratio
+    t = np.zeros((3, 3))
+    t[0, 0] = float(res[1]) / w
+    t[1, 1] = float(res[0]) / h
+    t[0, 2] = res[1] * (-float(center[0]) / w + .5)
+    t[1, 2] = res[0] * (-float(center[1]) / h + .5)
+    t[2, 2] = 1
+    if not rot == 0:
+        rot = -rot  # To match direction of rotation from cropping
+        rot_mat = np.zeros((3, 3))
+        rot_rad = rot * np.pi / 180
+        sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+        rot_mat[0, :2] = [cs, -sn]
+        rot_mat[1, :2] = [sn, cs]
+        rot_mat[2, 2] = 1
+        # Need to rotate around center
+        t_mat = np.eye(3)
+        t_mat[0, 2] = -res[1] / 2
+        t_mat[1, 2] = -res[0] / 2
+        t_inv = t_mat.copy()
+        t_inv[:2, 2] *= -1
+        t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
+    return t
+def transform(pt, center, scale, res, invert=0, rot=0):
+    """Transform pixel location to different reference."""
+    t = get_transform(center, scale, res, rot=rot)
+    if invert:
+        t = np.linalg.inv(t)
+    new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.]).T
+    new_pt = np.dot(t, new_pt)
+    return np.array([round(new_pt[0]), round(new_pt[1])], dtype=int) + 1
+def bbox_from_detector(bbox, input_resolution=(224, 224), rescale=1.25):
+    """
+    Get center and scale of bounding box from bounding box.
+    The expected format is [min_x, min_y, max_x, max_y].
+    """
+    CROP_IMG_HEIGHT, CROP_IMG_WIDTH = input_resolution
+    CROP_ASPECT_RATIO = CROP_IMG_HEIGHT / float(CROP_IMG_WIDTH)
+    # center
+    center_x = (bbox[0] + bbox[2]) / 2.0
+    center_y = (bbox[1] + bbox[3]) / 2.0
+    center = np.array([center_x, center_y])
+    # scale
+    bbox_w = bbox[2] - bbox[0]
+    bbox_h = bbox[3] - bbox[1]
+    bbox_size = max(bbox_w * CROP_ASPECT_RATIO, bbox_h)
+    scale = np.array([bbox_size / CROP_ASPECT_RATIO, bbox_size]) / 200.0
+    # scale = bbox_size / 200.0
+    # adjust bounding box tightness
+    scale *= rescale
+    return center, scale
+def crop(img, center, scale, res):
+    """
+    Crop image according to the supplied bounding box.
+    res: [rows, cols]
+    """
+    # Upper left point
+    ul = np.array(transform([1, 1], center, max(scale), res, invert=1)) - 1
+    # Bottom right point
+    br = np.array(transform([res[1] + 1, res[0] + 1], center, max(scale), res, invert=1)) - 1
+    new_shape = [br[1] - ul[1], br[0] - ul[0]]
+    if len(img.shape) > 2:
+        new_shape += [img.shape[2]]
+    new_img = np.zeros(new_shape, dtype=np.float32)
+    # Range to fill new array
+    new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
+    new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
+    # Range to sample from original image
+    old_x = max(0, ul[0]), min(len(img[0]), br[0])
+    old_y = max(0, ul[1]), min(len(img), br[1])
+    try:
+        new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]]
+    except Exception as e:
+        print(e)
+    new_img = cv2.resize(new_img, (res[1], res[0]))  # (cols, rows)
+    return new_img, new_shape, (old_x, old_y), (new_x, new_y)  # , ul, br
+def split_kp2ds_for_aa(kp2ds, ret_face=False):
+    kp2ds_body = (kp2ds[[0, 6, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 17, 20]] + kp2ds[[0, 5, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3, 18, 21]]) / 2
+    kp2ds_lhand = kp2ds[91:112]
+    kp2ds_rhand = kp2ds[112:133]
+    kp2ds_face = kp2ds[22:91]
+    if ret_face:
+        return kp2ds_body.copy(), kp2ds_lhand.copy(), kp2ds_rhand.copy(), kp2ds_face.copy()
+    return kp2ds_body.copy(), kp2ds_lhand.copy(), kp2ds_rhand.copy()
+def load_pose_metas_from_kp2ds_seq(kp2ds_seq, width, height):
+    metas = []
+    last_kp2ds_body = None
+    for kps in kp2ds_seq:
+        kps = kps.copy()
+        kps[:, 0] /= width
+        kps[:, 1] /= height
+        kp2ds_body, kp2ds_lhand, kp2ds_rhand, kp2ds_face = split_kp2ds_for_aa(kps, ret_face=True)
+        # Exclude cases where all values are less than 0
+        if last_kp2ds_body is not None and kp2ds_body[:, :2].min(axis=1).max() < 0:
+            kp2ds_body = last_kp2ds_body
+        last_kp2ds_body = kp2ds_body
+        meta = {
+            "width": width,
+            "height": height,
+            "keypoints_body": kp2ds_body,
+            "keypoints_left_hand": kp2ds_lhand,
+            "keypoints_right_hand": kp2ds_rhand,
+            "keypoints_face": kp2ds_face,
+        }
+        metas.append(meta)
+    return metas

ComfyUI-WanAnimatePreprocess/pyproject.toml ADDED Viewed

	@@ -0,0 +1,15 @@

+[project]
+name = "ComfyUI-WanAnimatePreprocess"
+description = "ComfyUI nodes for WanAnimate input processing"
+version = "1.0.2"
+license = {file = "LICENSE"}
+dependencies = ["opencv-python", "onnxruntime-gpu", "onnx"]
+[project.urls]
+Repository = "https://github.com/kijai/ComfyUI-WanAnimatePreprocess"
+#  Used by Comfy Registry https://comfyregistry.org
+[tool.comfy]
+PublisherId = "kijai"
+DisplayName = "ComfyUI-WanAnimatePreprocess"
+Icon = ""

ComfyUI-WanAnimatePreprocess/readme.md ADDED Viewed

	@@ -0,0 +1,29 @@

+## ComfyUI helper nodes for [Wan video 2.2 Animate preprocessing](https://github.com/Wan-Video/Wan2.2/tree/main/wan/modules/animate/preprocess)
+Nodes to run the ViTPose model, get face crops and keypoint list for SAM2 segmentation.
+Models:
+to `ComfyUI/models/detection` (subject to change in the future)
+YOLO:
+https://huggingface.co/Wan-AI/Wan2.2-Animate-14B/blob/main/process_checkpoint/det/yolov10m.onnx
+ViTPose ONNX:
+Use either the Large model from here:
+https://huggingface.co/JunkyByte/easy_ViTPose/tree/main/onnx/wholebody
+Or the Huge model like in the original code, it's split into two files due to ONNX file size limit:
+Both files need to be in same directory, and the onnx file selected in the model loader:
+`vitpose_h_wholebody_data.bin` and `vitpose_h_wholebody_model.onnx`
+https://huggingface.co/Kijai/vitpose_comfy/tree/main/onnx
+![example](example.png)

ComfyUI-WanAnimatePreprocess/requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+onnx
+onnxruntime-gpu
+opencv-python

ComfyUI-WanAnimatePreprocess/retarget_pose.py ADDED Viewed

	@@ -0,0 +1,843 @@

+# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
+import numpy as np
+from tqdm import tqdm
+import math
+from typing import NamedTuple
+import copy
+from .pose_utils.pose2d_utils import AAPoseMeta
+# load skeleton name and bone lines
+keypoint_list = [
+        "Nose",
+        "Neck",
+        "RShoulder",
+        "RElbow",
+        "RWrist", # No.4
+        "LShoulder",
+        "LElbow",
+        "LWrist", # No.7
+        "RHip",
+        "RKnee",
+        "RAnkle", # No.10
+        "LHip",
+        "LKnee",
+        "LAnkle", # No.13
+        "REye",
+        "LEye",
+        "REar",
+        "LEar",
+        "LToe",
+        "RToe",
+]
+limbSeq = [
+    [2, 3], [2, 6],     # shoulders
+    [3, 4], [4, 5],     # left arm
+    [6, 7], [7, 8],     # right arm
+    [2, 9], [9, 10], [10, 11],    # right leg
+    [2, 12], [12, 13], [13, 14],  # left leg
+    [2, 1], [1, 15], [15, 17], [1, 16], [16, 18], # face (nose, eyes, ears)
+    [14, 19], # left foot
+    [11, 20] #  right foot
+]
+eps = 0.01
+class Keypoint(NamedTuple):
+    x: float
+    y: float
+    score: float = 1.0
+    id: int = -1
+# for each limb, calculate src & dst bone's length
+# and calculate their ratios
+def get_length(skeleton, limb):
+    k1_index, k2_index = limb
+    H, W = skeleton['height'], skeleton['width']
+    keypoints = skeleton['keypoints_body']
+    keypoint1 = keypoints[k1_index - 1]
+    keypoint2 = keypoints[k2_index - 1]
+    if keypoint1 is None or keypoint2 is None:
+        return None, None, None
+    X = np.array([keypoint1[0], keypoint2[0]]) * float(W)
+    Y = np.array([keypoint1[1], keypoint2[1]]) * float(H)
+    length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+    return X, Y, length
+def get_handpose_meta(keypoints, delta, src_H, src_W):
+    new_keypoints = []
+    for idx, keypoint in enumerate(keypoints):
+        if keypoint is None:
+            new_keypoints.append(None)
+            continue
+        if keypoint.score == 0:
+            new_keypoints.append(None)
+            continue
+        x, y = keypoint.x, keypoint.y
+        x = int(x * src_W + delta[0])
+        y = int(y * src_H + delta[1])
+        new_keypoints.append(
+                Keypoint(
+                    x=x,
+                    y=y,
+                    score=keypoint.score,
+                ))
+    return new_keypoints
+def deal_hand_keypoints(hand_res, r_ratio, l_ratio, hand_score_th = 0.5):
+    left_hand = []
+    right_hand = []
+    left_delta_x = hand_res['left'][0][0] * (l_ratio - 1)
+    left_delta_y = hand_res['left'][0][1] * (l_ratio - 1)
+    right_delta_x = hand_res['right'][0][0] * (r_ratio - 1)
+    right_delta_y = hand_res['right'][0][1] * (r_ratio - 1)
+    length = len(hand_res['left'])
+    for i in range(length):
+        # left hand
+        if hand_res['left'][i][2] < hand_score_th:
+            left_hand.append(
+                Keypoint(
+                    x=-1,
+                    y=-1,
+                    score=0,
+                )
+            )
+        else:
+            left_hand.append(
+                Keypoint(
+                    x=hand_res['left'][i][0] * l_ratio - left_delta_x,
+                    y=hand_res['left'][i][1] * l_ratio - left_delta_y,
+                    score = hand_res['left'][i][2]
+                )
+            )
+        # right hand
+        if hand_res['right'][i][2] < hand_score_th:
+            right_hand.append(
+                Keypoint(
+                    x=-1,
+                    y=-1,
+                    score=0,
+                )
+            )
+        else:
+            right_hand.append(
+                Keypoint(
+                    x=hand_res['right'][i][0] * r_ratio - right_delta_x,
+                    y=hand_res['right'][i][1] * r_ratio - right_delta_y,
+                    score = hand_res['right'][i][2]
+                )
+            )
+    return right_hand, left_hand
+def get_scaled_pose(canvas, src_canvas, keypoints, keypoints_hand, bone_ratio_list, delta_ground_x, delta_ground_y,
+                                       rescaled_src_ground_x, body_flag, id, scale_min, threshold = 0.4):
+    H, W = canvas
+    src_H, src_W = src_canvas
+    new_length_list = [ ]
+    angle_list = [ ]
+    # keypoints from 0-1 to H/W range
+    for idx in range(len(keypoints)):
+        if keypoints[idx] is None or len(keypoints[idx]) == 0:
+            continue
+        keypoints[idx] = [keypoints[idx][0] * src_W, keypoints[idx][1] * src_H, keypoints[idx][2]]
+    # first traverse, get new_length_list and angle_list
+    for idx, (k1_index, k2_index) in enumerate(limbSeq):
+        keypoint1 = keypoints[k1_index - 1]
+        keypoint2 = keypoints[k2_index - 1]
+        if keypoint1 is None or keypoint2 is None or len(keypoint1) == 0 or len(keypoint2) == 0:
+            new_length_list.append(None)
+            angle_list.append(None)
+            continue
+        Y = np.array([keypoint1[0], keypoint2[0]]) #* float(W)
+        X = np.array([keypoint1[1], keypoint2[1]]) #* float(H)
+        length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+        new_length = length * bone_ratio_list[idx]
+        angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+        new_length_list.append(new_length)
+        angle_list.append(angle)
+    # Keep foot length within 0.5x calf length
+    foot_lower_leg_ratio = 0.5
+    if new_length_list[8] != None and new_length_list[18] != None:
+        if new_length_list[18] > new_length_list[8] * foot_lower_leg_ratio:
+            new_length_list[18] = new_length_list[8] * foot_lower_leg_ratio
+    if new_length_list[11] != None and new_length_list[17] != None:
+        if new_length_list[17] > new_length_list[11] * foot_lower_leg_ratio:
+            new_length_list[17] = new_length_list[11] * foot_lower_leg_ratio
+    # second traverse, calculate new keypoints
+    rescale_keypoints = keypoints.copy()
+    for idx, (k1_index, k2_index) in enumerate(limbSeq):
+        # update dst_keypoints
+        start_keypoint = rescale_keypoints[k1_index - 1]
+        new_length = new_length_list[idx]
+        angle = angle_list[idx]
+        if rescale_keypoints[k1_index - 1] is None or rescale_keypoints[k2_index - 1] is None or \
+            len(rescale_keypoints[k1_index - 1]) == 0 or len(rescale_keypoints[k2_index - 1]) == 0:
+            continue
+        # calculate end_keypoint
+        delta_x = new_length * math.cos(math.radians(angle))
+        delta_y = new_length * math.sin(math.radians(angle))
+        end_keypoint_x = start_keypoint[0] - delta_x
+        end_keypoint_y = start_keypoint[1] - delta_y
+        # update keypoints
+        rescale_keypoints[k2_index - 1] = [end_keypoint_x, end_keypoint_y, rescale_keypoints[k2_index - 1][2]]
+    if id == 0:
+        if body_flag == 'full_body' and rescale_keypoints[8] != None and rescale_keypoints[11] != None:
+            delta_ground_x_offset_first_frame = (rescale_keypoints[8][0] + rescale_keypoints[11][0]) / 2 - rescaled_src_ground_x
+            delta_ground_x += delta_ground_x_offset_first_frame
+        elif body_flag == 'half_body' and rescale_keypoints[1] != None:
+            delta_ground_x_offset_first_frame = rescale_keypoints[1][0] - rescaled_src_ground_x
+            delta_ground_x += delta_ground_x_offset_first_frame
+    # offset all keypoints
+    for idx in range(len(rescale_keypoints)):
+        if rescale_keypoints[idx] is None or len(rescale_keypoints[idx]) == 0 :
+            continue
+        rescale_keypoints[idx][0] -= delta_ground_x
+        rescale_keypoints[idx][1] -= delta_ground_y
+        # rescale keypoints to original size
+        rescale_keypoints[idx][0] /= scale_min
+        rescale_keypoints[idx][1] /= scale_min
+    # Scale hand proportions based on body skeletal ratios
+    r_ratio = max(bone_ratio_list[0], bone_ratio_list[1]) / scale_min
+    l_ratio = max(bone_ratio_list[0], bone_ratio_list[1]) / scale_min
+    left_hand, right_hand = deal_hand_keypoints(keypoints_hand, r_ratio, l_ratio, hand_score_th = threshold)
+    left_hand_new = left_hand.copy()
+    right_hand_new = right_hand.copy()
+    if rescale_keypoints[4] == None and rescale_keypoints[7] == None:
+        pass
+    elif rescale_keypoints[4] == None and rescale_keypoints[7] != None:
+        right_hand_delta =  np.array(rescale_keypoints[7][:2]) - np.array(keypoints[7][:2])
+        right_hand_new = get_handpose_meta(right_hand, right_hand_delta, src_H, src_W)
+    elif rescale_keypoints[4] != None and rescale_keypoints[7] == None:
+        left_hand_delta = np.array(rescale_keypoints[4][:2]) - np.array(keypoints[4][:2])
+        left_hand_new = get_handpose_meta(left_hand, left_hand_delta, src_H, src_W)
+    else:
+        # get left_hand and right_hand offset
+        left_hand_delta = np.array(rescale_keypoints[4][:2]) - np.array(keypoints[4][:2])
+        right_hand_delta =  np.array(rescale_keypoints[7][:2]) - np.array(keypoints[7][:2])
+        if keypoints[4][0] != None and left_hand[0].x != -1:
+            left_hand_root_offset = np.array( ( keypoints[4][0] - left_hand[0].x * src_W,  keypoints[4][1] - left_hand[0].y * src_H))
+            left_hand_delta += left_hand_root_offset
+        if keypoints[7][0] != None and right_hand[0].x != -1:
+            right_hand_root_offset = np.array( ( keypoints[7][0] - right_hand[0].x * src_W, keypoints[7][1] - right_hand[0].y * src_H))
+            right_hand_delta += right_hand_root_offset
+        dis_left_hand = ((keypoints[4][0] - left_hand[0].x * src_W) ** 2 + (keypoints[4][1] - left_hand[0].y * src_H) ** 2) ** 0.5
+        dis_right_hand = ((keypoints[7][0] - left_hand[0].x * src_W) ** 2 + (keypoints[7][1] - left_hand[0].y * src_H) ** 2) ** 0.5
+        if dis_left_hand > dis_right_hand:
+            right_hand_new = get_handpose_meta(left_hand, right_hand_delta, src_H, src_W)
+            left_hand_new = get_handpose_meta(right_hand, left_hand_delta, src_H, src_W)
+        else:
+            left_hand_new = get_handpose_meta(left_hand, left_hand_delta, src_H, src_W)
+            right_hand_new = get_handpose_meta(right_hand, right_hand_delta, src_H, src_W)
+    # get normalized keypoints_body
+    norm_body_keypoints = [ ]
+    for body_keypoint in rescale_keypoints:
+        if body_keypoint != None:
+            norm_body_keypoints.append([body_keypoint[0] / W , body_keypoint[1] / H, body_keypoint[2]])
+        else:
+            norm_body_keypoints.append(None)
+    frame_info = {
+                    'height': H,
+                    'width': W,
+                    'keypoints_body': norm_body_keypoints,
+                    'keypoints_left_hand' : left_hand_new,
+                    'keypoints_right_hand' : right_hand_new,
+                }
+    return frame_info
+def rescale_skeleton(H, W, keypoints, bone_ratio_list):
+    rescale_keypoints = keypoints.copy()
+    new_length_list = [ ]
+    angle_list = [ ]
+    # keypoints from 0-1 to H/W range
+    for idx in range(len(rescale_keypoints)):
+        if rescale_keypoints[idx] is None or len(rescale_keypoints[idx]) == 0:
+            continue
+        rescale_keypoints[idx] = [rescale_keypoints[idx][0] * W, rescale_keypoints[idx][1] * H]
+    # first traverse, get new_length_list and angle_list
+    for idx, (k1_index, k2_index) in enumerate(limbSeq):
+        keypoint1 = rescale_keypoints[k1_index - 1]
+        keypoint2 = rescale_keypoints[k2_index - 1]
+        if keypoint1 is None or keypoint2 is None or len(keypoint1) == 0 or len(keypoint2) == 0:
+            new_length_list.append(None)
+            angle_list.append(None)
+            continue
+        Y = np.array([keypoint1[0], keypoint2[0]]) #* float(W)
+        X = np.array([keypoint1[1], keypoint2[1]]) #* float(H)
+        length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+        new_length = length * bone_ratio_list[idx]
+        angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+        new_length_list.append(new_length)
+        angle_list.append(angle)
+    # # second traverse, calculate new keypoints
+    for idx, (k1_index, k2_index) in enumerate(limbSeq):
+        # update dst_keypoints
+        start_keypoint = rescale_keypoints[k1_index - 1]
+        new_length = new_length_list[idx]
+        angle = angle_list[idx]
+        if rescale_keypoints[k1_index - 1] is None or rescale_keypoints[k2_index - 1] is None or \
+            len(rescale_keypoints[k1_index - 1]) == 0 or len(rescale_keypoints[k2_index - 1]) == 0:
+            continue
+        # calculate end_keypoint
+        delta_x = new_length * math.cos(math.radians(angle))
+        delta_y = new_length * math.sin(math.radians(angle))
+        end_keypoint_x = start_keypoint[0] - delta_x
+        end_keypoint_y = start_keypoint[1] - delta_y
+        # update keypoints
+        rescale_keypoints[k2_index - 1] = [end_keypoint_x, end_keypoint_y]
+    return rescale_keypoints
+def fix_lack_keypoints_use_sym(skeleton):
+    keypoints = skeleton['keypoints_body']
+    H, W = skeleton['height'], skeleton['width']
+    limb_points_list = [
+                        [3, 4, 5],
+                        [6, 7, 8],
+                        [12, 13, 14, 19],
+                        [9, 10, 11, 20],
+    ]
+    for limb_points in limb_points_list:
+        miss_flag = False
+        for point in limb_points:
+            if keypoints[point - 1] is None:
+                miss_flag = True
+                continue
+            if miss_flag:
+                skeleton['keypoints_body'][point - 1] = None
+    repair_limb_seq_left = [
+        [3, 4], [4, 5],     # left arm
+        [12, 13], [13, 14],  # left leg
+        [14, 19] # left foot
+    ]
+    repair_limb_seq_right = [
+        [6, 7], [7, 8],     # right arm
+        [9, 10], [10, 11],    # right leg
+        [11, 20] # right foot
+    ]
+    repair_limb_seq = [repair_limb_seq_left, repair_limb_seq_right]
+    for idx_part, part in enumerate(repair_limb_seq):
+        for idx, limb in enumerate(part):
+            k1_index, k2_index = limb
+            keypoint1 = keypoints[k1_index - 1]
+            keypoint2 = keypoints[k2_index - 1]
+            if keypoint1 != None and keypoint2 is None:
+                # reference to symmetric limb
+                sym_limb = repair_limb_seq[1-idx_part][idx]
+                k1_index_sym, k2_index_sym = sym_limb
+                keypoint1_sym = keypoints[k1_index_sym - 1]
+                keypoint2_sym = keypoints[k2_index_sym - 1]
+                ref_length = 0
+                if keypoint1_sym != None and keypoint2_sym != None:
+                    X = np.array([keypoint1_sym[0], keypoint2_sym[0]]) * float(W)
+                    Y = np.array([keypoint1_sym[1], keypoint2_sym[1]]) * float(H)
+                    ref_length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+                else:
+                    ref_length_left, ref_length_right = 0, 0
+                    if keypoints[1] != None and keypoints[8] != None:
+                        X = np.array([keypoints[1][0], keypoints[8][0]]) * float(W)
+                        Y = np.array([keypoints[1][1], keypoints[8][1]]) * float(H)
+                        ref_length_left = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+                        if idx <= 1: # arms
+                            ref_length_left /= 2
+                    if keypoints[1] != None and keypoints[11] != None:
+                        X = np.array([keypoints[1][0], keypoints[11][0]]) * float(W)
+                        Y = np.array([keypoints[1][1], keypoints[11][1]]) * float(H)
+                        ref_length_right = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+                        if idx <= 1: # arms
+                            ref_length_right /= 2
+                        elif idx == 4: # foot
+                            ref_length_right /= 5
+                    ref_length = max(ref_length_left, ref_length_right)
+                if ref_length != 0:
+                    skeleton['keypoints_body'][k2_index - 1] = [0, 0] #init
+                    skeleton['keypoints_body'][k2_index - 1][0] = skeleton['keypoints_body'][k1_index - 1][0]
+                    skeleton['keypoints_body'][k2_index - 1][1] = skeleton['keypoints_body'][k1_index - 1][1] + ref_length / H
+    return skeleton
+def rescale_shorten_skeleton(ratio_list, src_length_list, dst_length_list):
+    modify_bone_list = [
+        [0, 1],
+        [2, 4],
+        [3, 5],
+        [6, 9],
+        [7, 10],
+        [8, 11],
+        [17, 18]
+    ]
+    for modify_bone in modify_bone_list:
+        new_ratio = max(ratio_list[modify_bone[0]], ratio_list[modify_bone[1]])
+        ratio_list[modify_bone[0]] = new_ratio
+        ratio_list[modify_bone[1]] = new_ratio
+    if ratio_list[13]!= None and ratio_list[15]!= None:
+        ratio_eye_avg = (ratio_list[13] + ratio_list[15]) / 2
+        ratio_list[13] = ratio_eye_avg
+        ratio_list[15] = ratio_eye_avg
+    if ratio_list[14]!= None and ratio_list[16]!= None:
+        ratio_eye_avg = (ratio_list[14] + ratio_list[16]) / 2
+        ratio_list[14] = ratio_eye_avg
+        ratio_list[16] = ratio_eye_avg
+    return ratio_list, src_length_list, dst_length_list
+def check_full_body(keypoints, threshold = 0.4):
+    body_flag = 'half_body'
+    # 1. If ankle points exist, confidence is greater than the threshold, and points do not exceed the frame, return full_body
+    if keypoints[10] != None and keypoints[13] != None and keypoints[8] != None and keypoints[11] != None:
+        if (keypoints[10][1] <= 1 and keypoints[13][1] <= 1) and (keypoints[10][2] >= threshold and keypoints[13][2] >= threshold) and \
+            (keypoints[8][1] <= 1 and keypoints[11][1] <= 1) and (keypoints[8][2] >= threshold and keypoints[11][2] >= threshold):
+            body_flag = 'full_body'
+            return body_flag
+    # 2. If hip points exist, return three_quarter_body
+    if (keypoints[8] != None and keypoints[11] != None):
+        if (keypoints[8][1] <= 1 and keypoints[11][1] <= 1) and (keypoints[8][2] >= threshold and keypoints[11][2] >= threshold):
+            body_flag = 'three_quarter_body'
+            return body_flag
+    return body_flag
+def check_full_body_both(flag1, flag2):
+    body_flag_dict = {
+        'full_body': 2,
+        'three_quarter_body' : 1,
+        'half_body': 0
+    }
+    body_flag_dict_reverse = {
+        2: 'full_body',
+        1: 'three_quarter_body',
+        0: 'half_body'
+    }
+    flag1_num = body_flag_dict[flag1]
+    flag2_num = body_flag_dict[flag2]
+    flag_both_num = min(flag1_num, flag2_num)
+    return body_flag_dict_reverse[flag_both_num]
+def write_to_poses(data_to_json, none_idx, dst_shape, bone_ratio_list, delta_ground_x, delta_ground_y, rescaled_src_ground_x, body_flag, scale_min):
+    outputs = []
+    length = len(data_to_json)
+    for id in tqdm(range(length)):
+        src_height, src_width = data_to_json[id]['height'], data_to_json[id]['width']
+        width, height = dst_shape
+        keypoints = data_to_json[id]['keypoints_body']
+        for idx in range(len(keypoints)):
+            if idx in none_idx:
+                keypoints[idx] = None
+        new_keypoints = keypoints.copy()
+        # get hand keypoints
+        keypoints_hand = {'left' : data_to_json[id]['keypoints_left_hand'], 'right' : data_to_json[id]['keypoints_right_hand']}
+        # Normalize hand coordinates to 0-1 range
+        for hand_idx in range(len(data_to_json[id]['keypoints_left_hand'])):
+            data_to_json[id]['keypoints_left_hand'][hand_idx][0] = data_to_json[id]['keypoints_left_hand'][hand_idx][0] / src_width
+            data_to_json[id]['keypoints_left_hand'][hand_idx][1] = data_to_json[id]['keypoints_left_hand'][hand_idx][1] / src_height
+        for hand_idx in range(len(data_to_json[id]['keypoints_right_hand'])):
+            data_to_json[id]['keypoints_right_hand'][hand_idx][0] = data_to_json[id]['keypoints_right_hand'][hand_idx][0] / src_width
+            data_to_json[id]['keypoints_right_hand'][hand_idx][1] = data_to_json[id]['keypoints_right_hand'][hand_idx][1] / src_height
+        frame_info = get_scaled_pose((height, width), (src_height, src_width), new_keypoints, keypoints_hand, bone_ratio_list, delta_ground_x, delta_ground_y, rescaled_src_ground_x, body_flag, id, scale_min)
+        outputs.append(frame_info)
+    return outputs
+def calculate_scale_ratio(skeleton, skeleton_edit, scale_ratio_flag):
+    if scale_ratio_flag:
+        headw = max(skeleton['keypoints_body'][0][0], skeleton['keypoints_body'][14][0], skeleton['keypoints_body'][15][0], skeleton['keypoints_body'][16][0], skeleton['keypoints_body'][17][0]) - \
+                    min(skeleton['keypoints_body'][0][0], skeleton['keypoints_body'][14][0], skeleton['keypoints_body'][15][0], skeleton['keypoints_body'][16][0], skeleton['keypoints_body'][17][0])
+        headw_edit = max(skeleton_edit['keypoints_body'][0][0], skeleton_edit['keypoints_body'][14][0], skeleton_edit['keypoints_body'][15][0], skeleton_edit['keypoints_body'][16][0], skeleton_edit['keypoints_body'][17][0]) - \
+                    min(skeleton_edit['keypoints_body'][0][0], skeleton_edit['keypoints_body'][14][0], skeleton_edit['keypoints_body'][15][0], skeleton_edit['keypoints_body'][16][0], skeleton_edit['keypoints_body'][17][0])
+        headw_ratio = headw / headw_edit
+        _, _, shoulder = get_length(skeleton, [6,3])
+        _, _, shoulder_edit = get_length(skeleton_edit, [6,3])
+        shoulder_ratio = shoulder / shoulder_edit
+        return max(headw_ratio, shoulder_ratio)
+    else:
+        return 1
+def retarget_pose(src_skeleton, dst_skeleton, all_src_skeleton, src_skeleton_edit, dst_skeleton_edit, threshold=0.4):
+    if src_skeleton_edit is not None and dst_skeleton_edit is not None:
+        use_edit_for_base = True
+    else:
+        use_edit_for_base = False
+    src_skeleton_ori = copy.deepcopy(src_skeleton)
+    dst_skeleton_ori_h, dst_skeleton_ori_w = dst_skeleton['height'], dst_skeleton['width']
+    if src_skeleton['keypoints_body'][0] != None and src_skeleton['keypoints_body'][10] != None and src_skeleton['keypoints_body'][13] != None and \
+        dst_skeleton['keypoints_body'][0] != None and dst_skeleton['keypoints_body'][10] != None and dst_skeleton['keypoints_body'][13] != None and \
+            src_skeleton['keypoints_body'][0][2] > 0.5 and src_skeleton['keypoints_body'][10][2] > 0.5 and src_skeleton['keypoints_body'][13][2] > 0.5 and \
+        dst_skeleton['keypoints_body'][0][2] > 0.5 and dst_skeleton['keypoints_body'][10][2] > 0.5 and dst_skeleton['keypoints_body'][13][2] > 0.5:
+        src_height = src_skeleton['height'] * abs(
+            (src_skeleton['keypoints_body'][10][1] + src_skeleton['keypoints_body'][13][1]) / 2 -
+            src_skeleton['keypoints_body'][0][1])
+        dst_height = dst_skeleton['height'] * abs(
+            (dst_skeleton['keypoints_body'][10][1] + dst_skeleton['keypoints_body'][13][1]) / 2 -
+            dst_skeleton['keypoints_body'][0][1])
+        scale_min = 1.0 * src_height / dst_height
+    elif src_skeleton['keypoints_body'][0] != None and src_skeleton['keypoints_body'][8] != None and src_skeleton['keypoints_body'][11] != None and \
+        dst_skeleton['keypoints_body'][0] != None and dst_skeleton['keypoints_body'][8] != None and dst_skeleton['keypoints_body'][11] != None and \
+            src_skeleton['keypoints_body'][0][2] > 0.5 and src_skeleton['keypoints_body'][8][2] > 0.5 and src_skeleton['keypoints_body'][11][2] > 0.5 and \
+        dst_skeleton['keypoints_body'][0][2] > 0.5 and dst_skeleton['keypoints_body'][8][2] > 0.5 and dst_skeleton['keypoints_body'][11][2] > 0.5:
+        src_height = src_skeleton['height'] * abs(
+            (src_skeleton['keypoints_body'][8][1] + src_skeleton['keypoints_body'][11][1]) / 2 -
+            src_skeleton['keypoints_body'][0][1])
+        dst_height = dst_skeleton['height'] * abs(
+            (dst_skeleton['keypoints_body'][8][1] + dst_skeleton['keypoints_body'][11][1]) / 2 -
+            dst_skeleton['keypoints_body'][0][1])
+        scale_min = 1.0 * src_height / dst_height
+    else:
+        scale_min = np.sqrt(src_skeleton['height'] * src_skeleton['width']) / np.sqrt(dst_skeleton['height'] * dst_skeleton['width'])
+    if use_edit_for_base:
+        scale_ratio_flag = False
+        if src_skeleton_edit['keypoints_body'][0] != None and src_skeleton_edit['keypoints_body'][10] != None and src_skeleton_edit['keypoints_body'][13] != None and \
+            dst_skeleton_edit['keypoints_body'][0] != None and dst_skeleton_edit['keypoints_body'][10] != None and dst_skeleton_edit['keypoints_body'][13] != None and \
+                src_skeleton_edit['keypoints_body'][0][2] > 0.5 and src_skeleton_edit['keypoints_body'][10][2] > 0.5 and src_skeleton_edit['keypoints_body'][13][2] > 0.5 and \
+            dst_skeleton_edit['keypoints_body'][0][2] > 0.5 and dst_skeleton_edit['keypoints_body'][10][2] > 0.5 and dst_skeleton_edit['keypoints_body'][13][2] > 0.5:
+            src_height_edit = src_skeleton_edit['height'] * abs(
+                (src_skeleton_edit['keypoints_body'][10][1] + src_skeleton_edit['keypoints_body'][13][1]) / 2 -
+                src_skeleton_edit['keypoints_body'][0][1])
+            dst_height_edit = dst_skeleton_edit['height'] * abs(
+                (dst_skeleton_edit['keypoints_body'][10][1] + dst_skeleton_edit['keypoints_body'][13][1]) / 2 -
+                dst_skeleton_edit['keypoints_body'][0][1])
+            scale_min_edit = 1.0 * src_height_edit / dst_height_edit
+        elif src_skeleton_edit['keypoints_body'][0] != None and src_skeleton_edit['keypoints_body'][8] != None and src_skeleton_edit['keypoints_body'][11] != None and \
+            dst_skeleton_edit['keypoints_body'][0] != None and dst_skeleton_edit['keypoints_body'][8] != None and dst_skeleton_edit['keypoints_body'][11] != None and \
+                src_skeleton_edit['keypoints_body'][0][2] > 0.5 and src_skeleton_edit['keypoints_body'][8][2] > 0.5 and src_skeleton_edit['keypoints_body'][11][2] > 0.5 and \
+            dst_skeleton_edit['keypoints_body'][0][2] > 0.5 and dst_skeleton_edit['keypoints_body'][8][2] > 0.5 and dst_skeleton_edit['keypoints_body'][11][2] > 0.5:
+            src_height_edit = src_skeleton_edit['height'] * abs(
+                (src_skeleton_edit['keypoints_body'][8][1] + src_skeleton_edit['keypoints_body'][11][1]) / 2 -
+                src_skeleton_edit['keypoints_body'][0][1])
+            dst_height_edit = dst_skeleton_edit['height'] * abs(
+                (dst_skeleton_edit['keypoints_body'][8][1] + dst_skeleton_edit['keypoints_body'][11][1]) / 2 -
+                dst_skeleton_edit['keypoints_body'][0][1])
+            scale_min_edit = 1.0 * src_height_edit / dst_height_edit
+        else:
+            scale_min_edit = np.sqrt(src_skeleton_edit['height'] * src_skeleton_edit['width']) / np.sqrt(dst_skeleton_edit['height'] * dst_skeleton_edit['width'])
+            scale_ratio_flag = True
+        # Flux may change the scale, compensate for it here
+        ratio_src = calculate_scale_ratio(src_skeleton, src_skeleton_edit, scale_ratio_flag)
+        ratio_dst = calculate_scale_ratio(dst_skeleton, dst_skeleton_edit, scale_ratio_flag)
+        dst_skeleton_edit['height'] = int(dst_skeleton_edit['height'] * scale_min_edit)
+        dst_skeleton_edit['width'] = int(dst_skeleton_edit['width'] * scale_min_edit)
+        for idx in range(len(dst_skeleton_edit['keypoints_left_hand'])):
+            dst_skeleton_edit['keypoints_left_hand'][idx][0] *= scale_min_edit
+            dst_skeleton_edit['keypoints_left_hand'][idx][1] *= scale_min_edit
+        for idx in range(len(dst_skeleton_edit['keypoints_right_hand'])):
+            dst_skeleton_edit['keypoints_right_hand'][idx][0] *= scale_min_edit
+            dst_skeleton_edit['keypoints_right_hand'][idx][1] *= scale_min_edit
+    dst_skeleton['height'] = int(dst_skeleton['height'] * scale_min)
+    dst_skeleton['width'] = int(dst_skeleton['width'] * scale_min)
+    for idx in range(len(dst_skeleton['keypoints_left_hand'])):
+        dst_skeleton['keypoints_left_hand'][idx][0] *= scale_min
+        dst_skeleton['keypoints_left_hand'][idx][1] *= scale_min
+    for idx in range(len(dst_skeleton['keypoints_right_hand'])):
+        dst_skeleton['keypoints_right_hand'][idx][0] *= scale_min
+        dst_skeleton['keypoints_right_hand'][idx][1] *= scale_min
+    dst_body_flag = check_full_body(dst_skeleton['keypoints_body'], threshold)
+    src_body_flag = check_full_body(src_skeleton_ori['keypoints_body'], threshold)
+    body_flag = check_full_body_both(dst_body_flag, src_body_flag)
+    #print('body_flag: ', body_flag)
+    if use_edit_for_base:
+        src_skeleton_edit = fix_lack_keypoints_use_sym(src_skeleton_edit)
+        dst_skeleton_edit = fix_lack_keypoints_use_sym(dst_skeleton_edit)
+    else:
+        src_skeleton = fix_lack_keypoints_use_sym(src_skeleton)
+        dst_skeleton = fix_lack_keypoints_use_sym(dst_skeleton)
+    none_idx = []
+    for idx in range(len(dst_skeleton['keypoints_body'])):
+        if dst_skeleton['keypoints_body'][idx] == None or src_skeleton['keypoints_body'][idx] == None:
+            src_skeleton['keypoints_body'][idx] = None
+            dst_skeleton['keypoints_body'][idx] = None
+            none_idx.append(idx)
+    # get bone ratio list
+    ratio_list, src_length_list, dst_length_list = [], [], []
+    for idx, limb in enumerate(limbSeq):
+        if use_edit_for_base:
+            src_X, src_Y, src_length = get_length(src_skeleton_edit, limb)
+            dst_X, dst_Y, dst_length = get_length(dst_skeleton_edit, limb)
+            if src_X is None or src_Y is None or dst_X is None or dst_Y is None:
+                ratio = -1
+            else:
+                ratio = 1.0 * dst_length * ratio_dst / src_length / ratio_src
+        else:
+            src_X, src_Y, src_length = get_length(src_skeleton, limb)
+            dst_X, dst_Y, dst_length = get_length(dst_skeleton, limb)
+            if src_X is None or src_Y is None or dst_X is None or dst_Y is None:
+                ratio = -1
+            else:
+                ratio = 1.0 * dst_length / src_length
+        ratio_list.append(ratio)
+        src_length_list.append(src_length)
+        dst_length_list.append(dst_length)
+    for idx, ratio in enumerate(ratio_list):
+        if ratio == -1:
+            if ratio_list[0] != -1 and ratio_list[1] != -1:
+                ratio_list[idx] = (ratio_list[0] + ratio_list[1]) / 2
+    # Consider adding constraints when Flux fails to correct head pose, causing neck issues.
+    # if ratio_list[12] > (ratio_list[0]+ratio_list[1])/2*1.25:
+    #     ratio_list[12] = (ratio_list[0]+ratio_list[1])/2*1.25
+    ratio_list, src_length_list, dst_length_list = rescale_shorten_skeleton(ratio_list, src_length_list, dst_length_list)
+    rescaled_src_skeleton_ori = rescale_skeleton(src_skeleton_ori['height'], src_skeleton_ori['width'],
+                                                 src_skeleton_ori['keypoints_body'], ratio_list)
+    # get global translation offset_x and offset_y
+    if body_flag == 'full_body':
+        #print('use foot mark.')
+        dst_ground_y = max(dst_skeleton['keypoints_body'][10][1], dst_skeleton['keypoints_body'][13][1]) * dst_skeleton[
+            'height']
+        # The midpoint between toe and ankle
+        if dst_skeleton['keypoints_body'][18] != None and dst_skeleton['keypoints_body'][19] != None:
+            right_foot_mid = (dst_skeleton['keypoints_body'][10][1] + dst_skeleton['keypoints_body'][19][1]) / 2
+            left_foot_mid = (dst_skeleton['keypoints_body'][13][1] + dst_skeleton['keypoints_body'][18][1]) / 2
+            dst_ground_y = max(left_foot_mid, right_foot_mid) * dst_skeleton['height']
+        rescaled_src_ground_y = max(rescaled_src_skeleton_ori[10][1], rescaled_src_skeleton_ori[13][1])
+        delta_ground_y = rescaled_src_ground_y - dst_ground_y
+        dst_ground_x = (dst_skeleton['keypoints_body'][8][0] + dst_skeleton['keypoints_body'][11][0]) * dst_skeleton[
+            'width'] / 2
+        rescaled_src_ground_x = (rescaled_src_skeleton_ori[8][0] + rescaled_src_skeleton_ori[11][0]) / 2
+        delta_ground_x = rescaled_src_ground_x - dst_ground_x
+        delta_x, delta_y = delta_ground_x, delta_ground_y
+    else:
+        #print('use neck mark.')
+        # use neck keypoint as mark
+        src_neck_y = rescaled_src_skeleton_ori[1][1]
+        dst_neck_y = dst_skeleton['keypoints_body'][1][1]
+        delta_neck_y = src_neck_y - dst_neck_y * dst_skeleton['height']
+        src_neck_x = rescaled_src_skeleton_ori[1][0]
+        dst_neck_x = dst_skeleton['keypoints_body'][1][0]
+        delta_neck_x = src_neck_x - dst_neck_x * dst_skeleton['width']
+        delta_x, delta_y = delta_neck_x, delta_neck_y
+        rescaled_src_ground_x = src_neck_x
+    dst_shape = (dst_skeleton_ori_w, dst_skeleton_ori_h)
+    output = write_to_poses(all_src_skeleton, none_idx, dst_shape, ratio_list, delta_x, delta_y,
+                                rescaled_src_ground_x, body_flag, scale_min)
+    return output
+def get_retarget_pose(tpl_pose_meta0, refer_pose_meta, tpl_pose_metas, tql_edit_pose_meta0, refer_edit_pose_meta):
+    for key, value in tpl_pose_meta0.items():
+        if type(value) is np.ndarray:
+            if key in ['keypoints_left_hand', 'keypoints_right_hand']:
+                value = value * np.array([[tpl_pose_meta0["width"], tpl_pose_meta0["height"], 1.0]])
+            if not isinstance(value, list):
+                value = value.tolist()
+        tpl_pose_meta0[key] = value
+    for key, value in refer_pose_meta.items():
+        if type(value) is np.ndarray:
+            if key in ['keypoints_left_hand', 'keypoints_right_hand']:
+                value = value * np.array([[refer_pose_meta["width"], refer_pose_meta["height"], 1.0]])
+            if not isinstance(value, list):
+                value = value.tolist()
+        refer_pose_meta[key] = value
+    tpl_pose_metas_new = []
+    for meta in tpl_pose_metas:
+        for key, value in meta.items():
+            if type(value) is np.ndarray:
+                if key in ['keypoints_left_hand', 'keypoints_right_hand']:
+                    value = value * np.array([[meta["width"], meta["height"], 1.0]])
+                if not isinstance(value, list):
+                    value = value.tolist()
+            meta[key] = value
+        tpl_pose_metas_new.append(meta)
+    if tql_edit_pose_meta0 is not None:
+        for key, value in tql_edit_pose_meta0.items():
+            if type(value) is np.ndarray:
+                if key in ['keypoints_left_hand', 'keypoints_right_hand']:
+                    value = value * np.array([[tql_edit_pose_meta0["width"], tql_edit_pose_meta0["height"], 1.0]])
+                if not isinstance(value, list):
+                    value = value.tolist()
+            tql_edit_pose_meta0[key] = value
+    if refer_edit_pose_meta is not None:
+        for key, value in refer_edit_pose_meta.items():
+            if type(value) is np.ndarray:
+                if key in ['keypoints_left_hand', 'keypoints_right_hand']:
+                    value = value * np.array([[refer_edit_pose_meta["width"], refer_edit_pose_meta["height"], 1.0]])
+                if not isinstance(value, list):
+                    value = value.tolist()
+            refer_edit_pose_meta[key] = value
+    retarget_tpl_pose_metas = retarget_pose(tpl_pose_meta0, refer_pose_meta, tpl_pose_metas_new, tql_edit_pose_meta0, refer_edit_pose_meta)
+    pose_metas = []
+    for meta in retarget_tpl_pose_metas:
+        pose_meta = AAPoseMeta()
+        width, height = meta["width"], meta["height"]
+        pose_meta.width = width
+        pose_meta.height = height
+        pose_meta.kps_body = np.array(meta["keypoints_body"])[:, :2] * (width, height)
+        pose_meta.kps_body_p = np.array(meta["keypoints_body"])[:, 2]
+        kps_lhand = []
+        kps_lhand_p = []
+        for each_kps_lhand in meta["keypoints_left_hand"]:
+            if each_kps_lhand is not None:
+                kps_lhand.append([each_kps_lhand.x, each_kps_lhand.y])
+                kps_lhand_p.append(each_kps_lhand.score)
+            else:
+                kps_lhand.append([None, None])
+                kps_lhand_p.append(0.0)
+        pose_meta.kps_lhand = np.array(kps_lhand)
+        pose_meta.kps_lhand_p = np.array(kps_lhand_p)
+        kps_rhand = []
+        kps_rhand_p = []
+        for each_kps_rhand in meta["keypoints_right_hand"]:
+            if each_kps_rhand is not None:
+                kps_rhand.append([each_kps_rhand.x, each_kps_rhand.y])
+                kps_rhand_p.append(each_kps_rhand.score)
+            else:
+                kps_rhand.append([None, None])
+                kps_rhand_p.append(0.0)
+        pose_meta.kps_rhand = np.array(kps_rhand)
+        pose_meta.kps_rhand_p = np.array(kps_rhand_p)
+        pose_metas.append(pose_meta)
+    return pose_metas

ComfyUI-WanAnimatePreprocess/utils.py ADDED Viewed

	@@ -0,0 +1,317 @@

+# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
+import os
+import cv2
+import math
+import random
+import numpy as np
+def get_mask_boxes(mask):
+    y_coords, x_coords = np.nonzero(mask)
+    x_min = x_coords.min()
+    x_max = x_coords.max()
+    y_min = y_coords.min()
+    y_max = y_coords.max()
+    bbox = np.array([x_min, y_min, x_max, y_max]).astype(np.int32)
+    return bbox
+def get_aug_mask(body_mask, w_len=10, h_len=20):
+    body_bbox = get_mask_boxes(body_mask)
+    bbox_wh = body_bbox[2:4] - body_bbox[0:2]
+    w_slice = np.int32(bbox_wh[0] / w_len)
+    h_slice = np.int32(bbox_wh[1] / h_len)
+    for each_w in range(body_bbox[0], body_bbox[2], w_slice):
+        w_start = min(each_w, body_bbox[2])
+        w_end = min((each_w + w_slice), body_bbox[2])
+        for each_h in range(body_bbox[1], body_bbox[3], h_slice):
+            h_start = min(each_h, body_bbox[3])
+            h_end = min((each_h + h_slice), body_bbox[3])
+            if body_mask[h_start:h_end, w_start:w_end].sum() > 0:
+                body_mask[h_start:h_end, w_start:w_end] = 1
+    return body_mask
+def get_mask_body_img(img_copy, hand_mask, k=7, iterations=1):
+    kernel = np.ones((k, k), np.uint8)
+    dilation = cv2.dilate(hand_mask, kernel, iterations=iterations)
+    mask_hand_img = img_copy * (1 - dilation[:, :, None])
+    return mask_hand_img, dilation
+def get_face_bboxes(kp2ds, scale, image_shape, ratio_aug):
+    h, w = image_shape
+    kp2ds_face = kp2ds.copy()[23:91, :2]
+    min_x, min_y = np.min(kp2ds_face, axis=0)
+    max_x, max_y = np.max(kp2ds_face, axis=0)
+    initial_width = max_x - min_x
+    initial_height = max_y - min_y
+    initial_area = initial_width * initial_height
+    expanded_area = initial_area * scale
+    new_width = np.sqrt(expanded_area * (initial_width / initial_height))
+    new_height = np.sqrt(expanded_area * (initial_height / initial_width))
+    delta_width = (new_width - initial_width) / 2
+    delta_height = (new_height - initial_height) / 4
+    if ratio_aug:
+        if random.random() > 0.5:
+            delta_width += random.uniform(0, initial_width // 10)
+        else:
+            delta_height += random.uniform(0, initial_height // 10)
+    expanded_min_x = max(min_x - delta_width, 0)
+    expanded_max_x = min(max_x + delta_width, w)
+    expanded_min_y = max(min_y - 3 * delta_height, 0)
+    expanded_max_y = min(max_y + delta_height, h)
+    return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)]
+def calculate_new_size(orig_w, orig_h, target_area, divisor=64):
+    target_ratio = orig_w / orig_h
+    def check_valid(w, h):
+        if w <= 0 or h <= 0:
+            return False
+        return (w * h <= target_area and
+                w % divisor == 0 and
+                h % divisor == 0)
+    def get_ratio_diff(w, h):
+        return abs(w / h - target_ratio)
+    def round_to_64(value, round_up=False, divisor=64):
+        if round_up:
+            return divisor * ((value + (divisor - 1)) // divisor)
+        return divisor * (value // divisor)
+    possible_sizes = []
+    max_area_h = int(np.sqrt(target_area / target_ratio))
+    max_area_w = int(max_area_h * target_ratio)
+    max_h = round_to_64(max_area_h, round_up=True, divisor=divisor)
+    max_w = round_to_64(max_area_w, round_up=True, divisor=divisor)
+    for h in range(divisor, max_h + divisor, divisor):
+        ideal_w = h * target_ratio
+        w_down = round_to_64(ideal_w)
+        w_up = round_to_64(ideal_w, round_up=True)
+        for w in [w_down, w_up]:
+            if check_valid(w, h, divisor):
+                possible_sizes.append((w, h, get_ratio_diff(w, h)))
+    if not possible_sizes:
+        raise ValueError("Can not find suitable size")
+    possible_sizes.sort(key=lambda x: (-x[0] * x[1], x[2]))
+    best_w, best_h, _ = possible_sizes[0]
+    return int(best_w), int(best_h)
+def resize_by_area(image, target_area, keep_aspect_ratio=True, divisor=64, padding_color=(0, 0, 0)):
+    h, w = image.shape[:2]
+    try:
+        new_w, new_h = calculate_new_size(w, h, target_area, divisor)
+    except:
+        aspect_ratio = w / h
+        if keep_aspect_ratio:
+            new_h = math.sqrt(target_area / aspect_ratio)
+            new_w = target_area / new_h
+        else:
+            new_w = new_h = math.sqrt(target_area)
+        new_w, new_h = int((new_w // divisor) * divisor), int((new_h // divisor) * divisor)
+    interpolation = cv2.INTER_AREA if (new_w * new_h < w * h) else cv2.INTER_LINEAR
+    resized_image = padding_resize(image, height=new_h, width=new_w, padding_color=padding_color,
+                                    interpolation=interpolation)
+    return resized_image
+def padding_resize(img_ori, height=512, width=512, padding_color=(0, 0, 0), interpolation=cv2.INTER_LINEAR):
+    ori_height = img_ori.shape[0]
+    ori_width = img_ori.shape[1]
+    channel = img_ori.shape[2]
+    img_pad = np.zeros((height, width, channel), dtype=img_ori.dtype)
+    if channel == 1:
+        img_pad[:, :, 0] = padding_color[0]
+    else:
+        img_pad[:, :, 0] = padding_color[0]
+        img_pad[:, :, 1] = padding_color[1]
+        img_pad[:, :, 2] = padding_color[2]
+    if (ori_height / ori_width) > (height / width):
+        new_width = int(height / ori_height * ori_width)
+        img = cv2.resize(img_ori, (new_width, height), interpolation=interpolation)
+        padding = int((width - new_width) / 2)
+        if len(img.shape) == 2:
+            img = img[:, :, np.newaxis]
+        img_pad[:, padding: padding + new_width, :] = img
+    else:
+        new_height = int(width / ori_width * ori_height)
+        img = cv2.resize(img_ori, (width, new_height), interpolation=interpolation)
+        padding = int((height - new_height) / 2)
+        if len(img.shape) == 2:
+            img = img[:, :, np.newaxis]
+        img_pad[padding: padding + new_height, :, :] = img
+    return img_pad
+def resize_to_bounds(img_ori, height=512, width=512, padding_color=(0, 0, 0), interpolation=cv2.INTER_LINEAR, extra_padding=64, crop_target_image=None):
+    # Find non-black pixel bounds
+    if crop_target_image is not None:
+        ref = crop_target_image
+        if ref.ndim == 2:
+            mask = ref > 0
+        else:
+            mask = np.any(ref != 0, axis=2)
+        coords = np.argwhere(mask)
+        if coords.size == 0:
+            # All black, fallback to full image
+            y0, x0 = 0, 0
+            y1, x1 = img_ori.shape[0], img_ori.shape[1]
+        else:
+            y0, x0 = coords.min(axis=0)
+            y1, x1 = coords.max(axis=0) + 1
+            # Intended crop bounds with padding
+            pad_y0 = y0 - extra_padding
+            pad_x0 = x0 - extra_padding
+            pad_y1 = y1 + extra_padding
+            pad_x1 = x1 + extra_padding
+            # Actual crop bounds clipped to image
+            crop_y0 = max(pad_y0, 0)
+            crop_x0 = max(pad_x0, 0)
+            crop_y1 = min(pad_y1, img_ori.shape[0])
+            crop_x1 = min(pad_x1, img_ori.shape[1])
+        crop_img = img_ori[crop_y0:crop_y1, crop_x0:crop_x1]
+        # Pad if needed
+        pad_top = crop_y0 - pad_y0
+        pad_left = crop_x0 - pad_x0
+        pad_bottom = pad_y1 - crop_y1
+        pad_right = pad_x1 - crop_x1
+        if any([pad_top, pad_left, pad_bottom, pad_right]):
+            channel = crop_img.shape[2] if crop_img.ndim == 3 else 1
+            crop_img = np.pad(
+                crop_img,
+                ((pad_top, pad_bottom), (pad_left, pad_right)) + ((0, 0),) if channel > 1 else ((pad_top, pad_bottom), (pad_left, pad_right)),
+                mode='constant', constant_values=0
+            )
+    else:
+        if img_ori.ndim == 2:
+            mask = img_ori > 0
+        else:
+            mask = np.any(img_ori != 0, axis=2)
+        coords = np.argwhere(mask)
+        if coords.size == 0:
+            # All black, fallback to original
+            crop_img = img_ori
+        else:
+            y0, x0 = coords.min(axis=0)
+            y1, x1 = coords.max(axis=0) + 1
+            pad_y0 = y0 - extra_padding
+            pad_x0 = x0 - extra_padding
+            pad_y1 = y1 + extra_padding
+            pad_x1 = x1 + extra_padding
+            crop_y0 = max(pad_y0, 0)
+            crop_x0 = max(pad_x0, 0)
+            crop_y1 = min(pad_y1, img_ori.shape[0])
+            crop_x1 = min(pad_x1, img_ori.shape[1])
+            crop_img = img_ori[crop_y0:crop_y1, crop_x0:crop_x1]
+            pad_top = crop_y0 - pad_y0
+            pad_left = crop_x0 - pad_x0
+            pad_bottom = pad_y1 - crop_y1
+            pad_right = pad_x1 - crop_x1
+            if any([pad_top, pad_left, pad_bottom, pad_right]):
+                channel = crop_img.shape[2] if crop_img.ndim == 3 else 1
+                crop_img = np.pad(
+                    crop_img,
+                    ((pad_top, pad_bottom), (pad_left, pad_right)) + ((0, 0),) if channel > 1 else ((pad_top, pad_bottom), (pad_left, pad_right)),
+                    mode='constant', constant_values=0
+                )
+    ori_height = crop_img.shape[0]
+    ori_width = crop_img.shape[1]
+    channel = crop_img.shape[2] if crop_img.ndim == 3 else 1
+    img_pad = np.zeros((height, width, channel), dtype=crop_img.dtype)
+    if channel == 1:
+        img_pad[:, :, 0] = padding_color[0]
+    else:
+        for c in range(channel):
+            img_pad[:, :, c] = padding_color[c % len(padding_color)]
+    # Resize cropped image to fit target size, preserving aspect ratio
+    crop_aspect = ori_width / ori_height
+    target_aspect = width / height
+    if crop_aspect > target_aspect:
+        new_width = width
+        new_height = int(width / crop_aspect)
+    else:
+        new_height = height
+        new_width = int(height * crop_aspect)
+    img = cv2.resize(crop_img, (new_width, new_height), interpolation=interpolation)
+    if img.ndim == 2:
+        img = img[:, :, np.newaxis]
+    y_pad = (height - new_height) // 2
+    x_pad = (width - new_width) // 2
+    img_pad[y_pad:y_pad + new_height, x_pad:x_pad + new_width, :] = img
+    return img_pad
+def get_frame_indices(frame_num, video_fps, clip_length, train_fps):
+    start_frame = 0
+    times = np.arange(0, clip_length) / train_fps
+    frame_indices = start_frame + np.round(times * video_fps).astype(int)
+    frame_indices = np.clip(frame_indices, 0, frame_num - 1)
+    return frame_indices.tolist()
+def get_face_bboxes(kp2ds, scale, image_shape):
+    h, w = image_shape
+    kp2ds_face = kp2ds.copy()[1:] * (w, h)
+    min_x, min_y = np.min(kp2ds_face, axis=0)
+    max_x, max_y = np.max(kp2ds_face, axis=0)
+    initial_width = max_x - min_x
+    initial_height = max_y - min_y
+    initial_area = initial_width * initial_height
+    expanded_area = initial_area * scale
+    new_width = np.sqrt(expanded_area * (initial_width / initial_height))
+    new_height = np.sqrt(expanded_area * (initial_height / initial_width))
+    delta_width = (new_width - initial_width) / 2
+    delta_height = (new_height - initial_height) / 4
+    expanded_min_x = max(min_x - delta_width, 0)
+    expanded_max_x = min(max_x + delta_width, w)
+    expanded_min_y = max(min_y - 3 * delta_height, 0)
+    expanded_max_y = min(max_y + delta_height, h)
+    return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)]