highscoregames12018 commited on
Commit
e8ef05d
·
verified ·
1 Parent(s): 94a2a0f

Add/update custom_nodes

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. custom_nodes/Civicomfy/download_history.json +0 -0
  2. custom_nodes/comfyui-segment-anything-2/.gitattributes +2 -0
  3. custom_nodes/comfyui-segment-anything-2/.github/workflows/publish.yml +25 -0
  4. custom_nodes/comfyui-segment-anything-2/.gitignore +9 -0
  5. custom_nodes/comfyui-segment-anything-2/.tracking +43 -0
  6. custom_nodes/comfyui-segment-anything-2/LICENSE +201 -0
  7. custom_nodes/comfyui-segment-anything-2/__init__.py +3 -0
  8. custom_nodes/comfyui-segment-anything-2/example_workflows/florence_segment_2.json +579 -0
  9. custom_nodes/comfyui-segment-anything-2/example_workflows/image_batch_bbox_segment.json +766 -0
  10. custom_nodes/comfyui-segment-anything-2/example_workflows/points_segment_video_example.json +447 -0
  11. custom_nodes/comfyui-segment-anything-2/load_model.py +194 -0
  12. custom_nodes/comfyui-segment-anything-2/nodes.py +771 -0
  13. custom_nodes/comfyui-segment-anything-2/pyproject.toml +15 -0
  14. custom_nodes/comfyui-segment-anything-2/readme.md +25 -0
  15. custom_nodes/comfyui-segment-anything-2/sam2/__init__.py +5 -0
  16. custom_nodes/comfyui-segment-anything-2/sam2/automatic_mask_generator.py +436 -0
  17. custom_nodes/comfyui-segment-anything-2/sam2/modeling/__init__.py +5 -0
  18. custom_nodes/comfyui-segment-anything-2/sam2/modeling/backbones/__init__.py +5 -0
  19. custom_nodes/comfyui-segment-anything-2/sam2/modeling/backbones/hieradet.py +316 -0
  20. custom_nodes/comfyui-segment-anything-2/sam2/modeling/backbones/image_encoder.py +134 -0
  21. custom_nodes/comfyui-segment-anything-2/sam2/modeling/backbones/utils.py +95 -0
  22. custom_nodes/comfyui-segment-anything-2/sam2/modeling/memory_attention.py +169 -0
  23. custom_nodes/comfyui-segment-anything-2/sam2/modeling/memory_encoder.py +181 -0
  24. custom_nodes/comfyui-segment-anything-2/sam2/modeling/position_encoding.py +220 -0
  25. custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam/__init__.py +5 -0
  26. custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam/mask_decoder.py +295 -0
  27. custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam/prompt_encoder.py +182 -0
  28. custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam/transformer.py +347 -0
  29. custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam2_base.py +907 -0
  30. custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam2_utils.py +323 -0
  31. custom_nodes/comfyui-segment-anything-2/sam2/sam2_image_predictor.py +446 -0
  32. custom_nodes/comfyui-segment-anything-2/sam2/sam2_video_predictor.py +1154 -0
  33. custom_nodes/comfyui-segment-anything-2/sam2/utils/__init__.py +5 -0
  34. custom_nodes/comfyui-segment-anything-2/sam2/utils/amg.py +348 -0
  35. custom_nodes/comfyui-segment-anything-2/sam2/utils/misc.py +349 -0
  36. custom_nodes/comfyui-segment-anything-2/sam2/utils/transforms.py +106 -0
  37. custom_nodes/comfyui-segment-anything-2/sam2_configs/__init__.py +5 -0
  38. custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2.1_hiera_b+.yaml +116 -0
  39. custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2.1_hiera_l.yaml +120 -0
  40. custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2.1_hiera_s.yaml +119 -0
  41. custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2.1_hiera_t.yaml +121 -0
  42. custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2_hiera_b+.yaml +119 -0
  43. custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2_hiera_l.yaml +120 -0
  44. custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2_hiera_s.yaml +119 -0
  45. custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2_hiera_t.yaml +121 -0
  46. custom_nodes/comfyui-tensorops/.gitattributes +2 -0
  47. custom_nodes/comfyui-tensorops/.gitignore +2 -0
  48. custom_nodes/comfyui-tensorops/__init__.py +3 -0
  49. custom_nodes/comfyui-tensorops/nodes/__init__.py +54 -0
  50. custom_nodes/comfyui-tensorops/nodes/background_select.py +71 -0
custom_nodes/Civicomfy/download_history.json CHANGED
The diff for this file is too large to render. See raw diff
 
custom_nodes/comfyui-segment-anything-2/.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
custom_nodes/comfyui-segment-anything-2/.github/workflows/publish.yml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Publish to Comfy registry
2
+ on:
3
+ workflow_dispatch:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - "pyproject.toml"
9
+
10
+ permissions:
11
+ issues: write
12
+
13
+ jobs:
14
+ publish-node:
15
+ name: Publish Custom Node to registry
16
+ runs-on: ubuntu-latest
17
+ if: ${{ github.repository_owner == 'kijai' }}
18
+ steps:
19
+ - name: Check out code
20
+ uses: actions/checkout@v4
21
+ - name: Publish Custom Node
22
+ uses: Comfy-Org/publish-node-action@v1
23
+ with:
24
+ ## Add your own personal access token to your Github Repository secrets and reference it here.
25
+ personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}
custom_nodes/comfyui-segment-anything-2/.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ *pyc
3
+ .vscode
4
+ __pycache__
5
+ *.egg-info
6
+ *.bak
7
+ checkpoints
8
+ results
9
+ backup
custom_nodes/comfyui-segment-anything-2/.tracking ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .gitattributes
2
+ .github/workflows/publish.yml
3
+ .gitignore
4
+ LICENSE
5
+ __init__.py
6
+ example_workflows/florence_segment_2.json
7
+ example_workflows/image_batch_bbox_segment.json
8
+ example_workflows/points_segment_video_example.json
9
+ load_model.py
10
+ nodes.py
11
+ pyproject.toml
12
+ readme.md
13
+ sam2/__init__.py
14
+ sam2/automatic_mask_generator.py
15
+ sam2/modeling/__init__.py
16
+ sam2/modeling/backbones/__init__.py
17
+ sam2/modeling/backbones/hieradet.py
18
+ sam2/modeling/backbones/image_encoder.py
19
+ sam2/modeling/backbones/utils.py
20
+ sam2/modeling/memory_attention.py
21
+ sam2/modeling/memory_encoder.py
22
+ sam2/modeling/position_encoding.py
23
+ sam2/modeling/sam/__init__.py
24
+ sam2/modeling/sam/mask_decoder.py
25
+ sam2/modeling/sam/prompt_encoder.py
26
+ sam2/modeling/sam/transformer.py
27
+ sam2/modeling/sam2_base.py
28
+ sam2/modeling/sam2_utils.py
29
+ sam2/sam2_image_predictor.py
30
+ sam2/sam2_video_predictor.py
31
+ sam2/utils/__init__.py
32
+ sam2/utils/amg.py
33
+ sam2/utils/misc.py
34
+ sam2/utils/transforms.py
35
+ sam2_configs/__init__.py
36
+ sam2_configs/sam2.1_hiera_b+.yaml
37
+ sam2_configs/sam2.1_hiera_l.yaml
38
+ sam2_configs/sam2.1_hiera_s.yaml
39
+ sam2_configs/sam2.1_hiera_t.yaml
40
+ sam2_configs/sam2_hiera_b+.yaml
41
+ sam2_configs/sam2_hiera_l.yaml
42
+ sam2_configs/sam2_hiera_s.yaml
43
+ sam2_configs/sam2_hiera_t.yaml
custom_nodes/comfyui-segment-anything-2/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
custom_nodes/comfyui-segment-anything-2/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
2
+
3
+ __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
custom_nodes/comfyui-segment-anything-2/example_workflows/florence_segment_2.json ADDED
@@ -0,0 +1,579 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "last_node_id": 102,
3
+ "last_link_id": 239,
4
+ "nodes": [
5
+ {
6
+ "id": 83,
7
+ "type": "LoadImage",
8
+ "pos": [
9
+ -6,
10
+ 40
11
+ ],
12
+ "size": {
13
+ "0": 315,
14
+ "1": 314
15
+ },
16
+ "flags": {},
17
+ "order": 0,
18
+ "mode": 0,
19
+ "outputs": [
20
+ {
21
+ "name": "IMAGE",
22
+ "type": "IMAGE",
23
+ "links": [
24
+ 196
25
+ ],
26
+ "shape": 3,
27
+ "slot_index": 0
28
+ },
29
+ {
30
+ "name": "MASK",
31
+ "type": "MASK",
32
+ "links": null,
33
+ "shape": 3
34
+ }
35
+ ],
36
+ "properties": {
37
+ "Node name for S&R": "LoadImage"
38
+ },
39
+ "widgets_values": [
40
+ "truck.jpg",
41
+ "image"
42
+ ]
43
+ },
44
+ {
45
+ "id": 66,
46
+ "type": "DownloadAndLoadSAM2Model",
47
+ "pos": [
48
+ -34,
49
+ -171
50
+ ],
51
+ "size": {
52
+ "0": 351.7801513671875,
53
+ "1": 130
54
+ },
55
+ "flags": {},
56
+ "order": 1,
57
+ "mode": 0,
58
+ "outputs": [
59
+ {
60
+ "name": "sam2_model",
61
+ "type": "SAM2MODEL",
62
+ "links": [
63
+ 236
64
+ ],
65
+ "shape": 3,
66
+ "slot_index": 0
67
+ }
68
+ ],
69
+ "properties": {
70
+ "Node name for S&R": "DownloadAndLoadSAM2Model"
71
+ },
72
+ "widgets_values": [
73
+ "sam2_hiera_small.safetensors",
74
+ "single_image",
75
+ "cuda",
76
+ "bf16"
77
+ ]
78
+ },
79
+ {
80
+ "id": 84,
81
+ "type": "ImageAndMaskPreview",
82
+ "pos": [
83
+ 958,
84
+ -293
85
+ ],
86
+ "size": {
87
+ "0": 667.9199829101562,
88
+ "1": 541.2733154296875
89
+ },
90
+ "flags": {},
91
+ "order": 9,
92
+ "mode": 0,
93
+ "inputs": [
94
+ {
95
+ "name": "image",
96
+ "type": "IMAGE",
97
+ "link": 192
98
+ },
99
+ {
100
+ "name": "mask",
101
+ "type": "MASK",
102
+ "link": 238,
103
+ "slot_index": 1
104
+ }
105
+ ],
106
+ "outputs": [
107
+ {
108
+ "name": "composite",
109
+ "type": "IMAGE",
110
+ "links": null,
111
+ "shape": 3
112
+ }
113
+ ],
114
+ "properties": {
115
+ "Node name for S&R": "ImageAndMaskPreview"
116
+ },
117
+ "widgets_values": [
118
+ 1,
119
+ "255, 0, 0",
120
+ false
121
+ ]
122
+ },
123
+ {
124
+ "id": 72,
125
+ "type": "ImageResizeKJ",
126
+ "pos": [
127
+ 353,
128
+ 127
129
+ ],
130
+ "size": {
131
+ "0": 315,
132
+ "1": 242
133
+ },
134
+ "flags": {},
135
+ "order": 3,
136
+ "mode": 0,
137
+ "inputs": [
138
+ {
139
+ "name": "image",
140
+ "type": "IMAGE",
141
+ "link": 196
142
+ },
143
+ {
144
+ "name": "get_image_size",
145
+ "type": "IMAGE",
146
+ "link": null
147
+ },
148
+ {
149
+ "name": "width_input",
150
+ "type": "INT",
151
+ "link": null,
152
+ "widget": {
153
+ "name": "width_input"
154
+ }
155
+ },
156
+ {
157
+ "name": "height_input",
158
+ "type": "INT",
159
+ "link": null,
160
+ "widget": {
161
+ "name": "height_input"
162
+ }
163
+ }
164
+ ],
165
+ "outputs": [
166
+ {
167
+ "name": "IMAGE",
168
+ "type": "IMAGE",
169
+ "links": [
170
+ 192,
171
+ 210,
172
+ 226,
173
+ 237
174
+ ],
175
+ "shape": 3,
176
+ "slot_index": 0
177
+ },
178
+ {
179
+ "name": "width",
180
+ "type": "INT",
181
+ "links": null,
182
+ "shape": 3
183
+ },
184
+ {
185
+ "name": "height",
186
+ "type": "INT",
187
+ "links": null,
188
+ "shape": 3
189
+ }
190
+ ],
191
+ "properties": {
192
+ "Node name for S&R": "ImageResizeKJ"
193
+ },
194
+ "widgets_values": [
195
+ 768,
196
+ 512,
197
+ "nearest-exact",
198
+ false,
199
+ 2,
200
+ 0,
201
+ 0
202
+ ]
203
+ },
204
+ {
205
+ "id": 99,
206
+ "type": "PreviewImage",
207
+ "pos": [
208
+ 1044,
209
+ -744
210
+ ],
211
+ "size": {
212
+ "0": 530.9268798828125,
213
+ "1": 363.34893798828125
214
+ },
215
+ "flags": {},
216
+ "order": 5,
217
+ "mode": 0,
218
+ "inputs": [
219
+ {
220
+ "name": "images",
221
+ "type": "IMAGE",
222
+ "link": 226
223
+ }
224
+ ],
225
+ "properties": {
226
+ "Node name for S&R": "PreviewImage"
227
+ }
228
+ },
229
+ {
230
+ "id": 90,
231
+ "type": "PreviewImage",
232
+ "pos": [
233
+ 422,
234
+ -800
235
+ ],
236
+ "size": {
237
+ "0": 568.406494140625,
238
+ "1": 384.9489440917969
239
+ },
240
+ "flags": {},
241
+ "order": 6,
242
+ "mode": 0,
243
+ "inputs": [
244
+ {
245
+ "name": "images",
246
+ "type": "IMAGE",
247
+ "link": 200
248
+ }
249
+ ],
250
+ "properties": {
251
+ "Node name for S&R": "PreviewImage"
252
+ }
253
+ },
254
+ {
255
+ "id": 93,
256
+ "type": "Florence2toCoordinates",
257
+ "pos": [
258
+ 399,
259
+ -314
260
+ ],
261
+ "size": {
262
+ "0": 210,
263
+ "1": 78
264
+ },
265
+ "flags": {},
266
+ "order": 7,
267
+ "mode": 0,
268
+ "inputs": [
269
+ {
270
+ "name": "data",
271
+ "type": "JSON",
272
+ "link": 204
273
+ }
274
+ ],
275
+ "outputs": [
276
+ {
277
+ "name": "coordinates",
278
+ "type": "STRING",
279
+ "links": [],
280
+ "shape": 3,
281
+ "slot_index": 0
282
+ },
283
+ {
284
+ "name": "bboxes",
285
+ "type": "BBOX",
286
+ "links": [
287
+ 239
288
+ ],
289
+ "shape": 3,
290
+ "slot_index": 1
291
+ }
292
+ ],
293
+ "properties": {
294
+ "Node name for S&R": "Florence2toCoordinates"
295
+ },
296
+ "widgets_values": [
297
+ ""
298
+ ]
299
+ },
300
+ {
301
+ "id": 87,
302
+ "type": "Florence2Run",
303
+ "pos": [
304
+ -85,
305
+ -796
306
+ ],
307
+ "size": {
308
+ "0": 400,
309
+ "1": 304
310
+ },
311
+ "flags": {},
312
+ "order": 4,
313
+ "mode": 0,
314
+ "inputs": [
315
+ {
316
+ "name": "image",
317
+ "type": "IMAGE",
318
+ "link": 210,
319
+ "slot_index": 0
320
+ },
321
+ {
322
+ "name": "florence2_model",
323
+ "type": "FL2MODEL",
324
+ "link": 197,
325
+ "slot_index": 1
326
+ }
327
+ ],
328
+ "outputs": [
329
+ {
330
+ "name": "image",
331
+ "type": "IMAGE",
332
+ "links": [
333
+ 200
334
+ ],
335
+ "shape": 3,
336
+ "slot_index": 0
337
+ },
338
+ {
339
+ "name": "mask",
340
+ "type": "MASK",
341
+ "links": null,
342
+ "shape": 3
343
+ },
344
+ {
345
+ "name": "caption",
346
+ "type": "STRING",
347
+ "links": null,
348
+ "shape": 3,
349
+ "slot_index": 2
350
+ },
351
+ {
352
+ "name": "data",
353
+ "type": "JSON",
354
+ "links": [
355
+ 204
356
+ ],
357
+ "shape": 3,
358
+ "slot_index": 3
359
+ }
360
+ ],
361
+ "properties": {
362
+ "Node name for S&R": "Florence2Run"
363
+ },
364
+ "widgets_values": [
365
+ "wheel",
366
+ "caption_to_phrase_grounding",
367
+ true,
368
+ false,
369
+ 1024,
370
+ 3,
371
+ true,
372
+ ""
373
+ ]
374
+ },
375
+ {
376
+ "id": 102,
377
+ "type": "Sam2Segmentation",
378
+ "pos": [
379
+ 440,
380
+ -120
381
+ ],
382
+ "size": [
383
+ 314.5386123916544,
384
+ 162
385
+ ],
386
+ "flags": {},
387
+ "order": 8,
388
+ "mode": 0,
389
+ "inputs": [
390
+ {
391
+ "name": "sam2_model",
392
+ "type": "SAM2MODEL",
393
+ "link": 236
394
+ },
395
+ {
396
+ "name": "image",
397
+ "type": "IMAGE",
398
+ "link": 237
399
+ },
400
+ {
401
+ "name": "bboxes",
402
+ "type": "BBOX",
403
+ "link": 239
404
+ },
405
+ {
406
+ "name": "coordinates_positive",
407
+ "type": "STRING",
408
+ "link": null,
409
+ "widget": {
410
+ "name": "coordinates_positive"
411
+ }
412
+ },
413
+ {
414
+ "name": "coordinates_negative",
415
+ "type": "STRING",
416
+ "link": null,
417
+ "widget": {
418
+ "name": "coordinates_negative"
419
+ }
420
+ }
421
+ ],
422
+ "outputs": [
423
+ {
424
+ "name": "mask",
425
+ "type": "MASK",
426
+ "links": [
427
+ 238
428
+ ],
429
+ "shape": 3
430
+ }
431
+ ],
432
+ "properties": {
433
+ "Node name for S&R": "Sam2Segmentation"
434
+ },
435
+ "widgets_values": [
436
+ true,
437
+ "",
438
+ "",
439
+ true
440
+ ]
441
+ },
442
+ {
443
+ "id": 88,
444
+ "type": "DownloadAndLoadFlorence2Model",
445
+ "pos": [
446
+ -470,
447
+ -777
448
+ ],
449
+ "size": {
450
+ "0": 315,
451
+ "1": 106
452
+ },
453
+ "flags": {},
454
+ "order": 2,
455
+ "mode": 0,
456
+ "outputs": [
457
+ {
458
+ "name": "florence2_model",
459
+ "type": "FL2MODEL",
460
+ "links": [
461
+ 197
462
+ ],
463
+ "shape": 3,
464
+ "slot_index": 0
465
+ }
466
+ ],
467
+ "properties": {
468
+ "Node name for S&R": "DownloadAndLoadFlorence2Model"
469
+ },
470
+ "widgets_values": [
471
+ "microsoft/Florence-2-base",
472
+ "fp16",
473
+ "sdpa"
474
+ ]
475
+ }
476
+ ],
477
+ "links": [
478
+ [
479
+ 192,
480
+ 72,
481
+ 0,
482
+ 84,
483
+ 0,
484
+ "IMAGE"
485
+ ],
486
+ [
487
+ 196,
488
+ 83,
489
+ 0,
490
+ 72,
491
+ 0,
492
+ "IMAGE"
493
+ ],
494
+ [
495
+ 197,
496
+ 88,
497
+ 0,
498
+ 87,
499
+ 1,
500
+ "FL2MODEL"
501
+ ],
502
+ [
503
+ 200,
504
+ 87,
505
+ 0,
506
+ 90,
507
+ 0,
508
+ "IMAGE"
509
+ ],
510
+ [
511
+ 204,
512
+ 87,
513
+ 3,
514
+ 93,
515
+ 0,
516
+ "JSON"
517
+ ],
518
+ [
519
+ 210,
520
+ 72,
521
+ 0,
522
+ 87,
523
+ 0,
524
+ "IMAGE"
525
+ ],
526
+ [
527
+ 226,
528
+ 72,
529
+ 0,
530
+ 99,
531
+ 0,
532
+ "IMAGE"
533
+ ],
534
+ [
535
+ 236,
536
+ 66,
537
+ 0,
538
+ 102,
539
+ 0,
540
+ "SAM2MODEL"
541
+ ],
542
+ [
543
+ 237,
544
+ 72,
545
+ 0,
546
+ 102,
547
+ 1,
548
+ "IMAGE"
549
+ ],
550
+ [
551
+ 238,
552
+ 102,
553
+ 0,
554
+ 84,
555
+ 1,
556
+ "MASK"
557
+ ],
558
+ [
559
+ 239,
560
+ 93,
561
+ 1,
562
+ 102,
563
+ 2,
564
+ "BBOX"
565
+ ]
566
+ ],
567
+ "groups": [],
568
+ "config": {},
569
+ "extra": {
570
+ "ds": {
571
+ "scale": 0.7627768444385467,
572
+ "offset": [
573
+ 564.3268832902941,
574
+ 896.4031145502903
575
+ ]
576
+ }
577
+ },
578
+ "version": 0.4
579
+ }
custom_nodes/comfyui-segment-anything-2/example_workflows/image_batch_bbox_segment.json ADDED
@@ -0,0 +1,766 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "last_node_id": 30,
3
+ "last_link_id": 58,
4
+ "nodes": [
5
+ {
6
+ "id": 2,
7
+ "type": "DownloadAndLoadSAM2Model",
8
+ "pos": [
9
+ 119,
10
+ 52
11
+ ],
12
+ "size": {
13
+ "0": 315,
14
+ "1": 130
15
+ },
16
+ "flags": {},
17
+ "order": 0,
18
+ "mode": 0,
19
+ "outputs": [
20
+ {
21
+ "name": "sam2_model",
22
+ "type": "SAM2MODEL",
23
+ "links": [
24
+ 9
25
+ ],
26
+ "shape": 3,
27
+ "slot_index": 0
28
+ }
29
+ ],
30
+ "properties": {
31
+ "Node name for S&R": "DownloadAndLoadSAM2Model"
32
+ },
33
+ "widgets_values": [
34
+ "sam2_hiera_base_plus.safetensors",
35
+ "single_image",
36
+ "cuda",
37
+ "bf16"
38
+ ]
39
+ },
40
+ {
41
+ "id": 13,
42
+ "type": "DownloadAndLoadFlorence2Model",
43
+ "pos": [
44
+ 105,
45
+ -299
46
+ ],
47
+ "size": {
48
+ "0": 315,
49
+ "1": 106
50
+ },
51
+ "flags": {},
52
+ "order": 1,
53
+ "mode": 0,
54
+ "inputs": [
55
+ {
56
+ "name": "lora",
57
+ "type": "PEFTLORA",
58
+ "link": null
59
+ }
60
+ ],
61
+ "outputs": [
62
+ {
63
+ "name": "florence2_model",
64
+ "type": "FL2MODEL",
65
+ "links": [
66
+ 23
67
+ ],
68
+ "shape": 3
69
+ }
70
+ ],
71
+ "properties": {
72
+ "Node name for S&R": "DownloadAndLoadFlorence2Model"
73
+ },
74
+ "widgets_values": [
75
+ "microsoft/Florence-2-large",
76
+ "fp16",
77
+ "sdpa"
78
+ ]
79
+ },
80
+ {
81
+ "id": 26,
82
+ "type": "MaskToImage",
83
+ "pos": [
84
+ 1161,
85
+ 280
86
+ ],
87
+ "size": {
88
+ "0": 210,
89
+ "1": 26
90
+ },
91
+ "flags": {},
92
+ "order": 8,
93
+ "mode": 0,
94
+ "inputs": [
95
+ {
96
+ "name": "mask",
97
+ "type": "MASK",
98
+ "link": 43
99
+ }
100
+ ],
101
+ "outputs": [
102
+ {
103
+ "name": "IMAGE",
104
+ "type": "IMAGE",
105
+ "links": [
106
+ 44
107
+ ],
108
+ "shape": 3,
109
+ "slot_index": 0
110
+ }
111
+ ],
112
+ "properties": {
113
+ "Node name for S&R": "MaskToImage"
114
+ }
115
+ },
116
+ {
117
+ "id": 25,
118
+ "type": "ImageCompositeMasked",
119
+ "pos": [
120
+ 1124,
121
+ 364
122
+ ],
123
+ "size": {
124
+ "0": 315,
125
+ "1": 146
126
+ },
127
+ "flags": {},
128
+ "order": 9,
129
+ "mode": 0,
130
+ "inputs": [
131
+ {
132
+ "name": "destination",
133
+ "type": "IMAGE",
134
+ "link": 55,
135
+ "slot_index": 0
136
+ },
137
+ {
138
+ "name": "source",
139
+ "type": "IMAGE",
140
+ "link": 44
141
+ },
142
+ {
143
+ "name": "mask",
144
+ "type": "MASK",
145
+ "link": 45
146
+ }
147
+ ],
148
+ "outputs": [
149
+ {
150
+ "name": "IMAGE",
151
+ "type": "IMAGE",
152
+ "links": [
153
+ 56
154
+ ],
155
+ "shape": 3,
156
+ "slot_index": 0
157
+ }
158
+ ],
159
+ "properties": {
160
+ "Node name for S&R": "ImageCompositeMasked"
161
+ },
162
+ "widgets_values": [
163
+ 0,
164
+ 0,
165
+ false
166
+ ]
167
+ },
168
+ {
169
+ "id": 29,
170
+ "type": "VHS_VideoCombine",
171
+ "pos": [
172
+ 1486,
173
+ 58
174
+ ],
175
+ "size": [
176
+ 772.7946166992188,
177
+ 859.3206163194444
178
+ ],
179
+ "flags": {},
180
+ "order": 10,
181
+ "mode": 0,
182
+ "inputs": [
183
+ {
184
+ "name": "images",
185
+ "type": "IMAGE",
186
+ "link": 56,
187
+ "slot_index": 0
188
+ },
189
+ {
190
+ "name": "audio",
191
+ "type": "VHS_AUDIO",
192
+ "link": null
193
+ },
194
+ {
195
+ "name": "meta_batch",
196
+ "type": "VHS_BatchManager",
197
+ "link": null
198
+ },
199
+ {
200
+ "name": "vae",
201
+ "type": "VAE",
202
+ "link": null
203
+ }
204
+ ],
205
+ "outputs": [
206
+ {
207
+ "name": "Filenames",
208
+ "type": "VHS_FILENAMES",
209
+ "links": null,
210
+ "shape": 3
211
+ }
212
+ ],
213
+ "properties": {
214
+ "Node name for S&R": "VHS_VideoCombine"
215
+ },
216
+ "widgets_values": {
217
+ "frame_rate": 3,
218
+ "loop_count": 0,
219
+ "filename_prefix": "AnimateDiff",
220
+ "format": "video/h264-mp4",
221
+ "pix_fmt": "yuv420p",
222
+ "crf": 19,
223
+ "save_metadata": true,
224
+ "pingpong": false,
225
+ "save_output": false,
226
+ "videopreview": {
227
+ "hidden": false,
228
+ "paused": false,
229
+ "params": {
230
+ "filename": "AnimateDiff_00002.mp4",
231
+ "subfolder": "",
232
+ "type": "temp",
233
+ "format": "video/h264-mp4",
234
+ "frame_rate": 3
235
+ }
236
+ }
237
+ }
238
+ },
239
+ {
240
+ "id": 11,
241
+ "type": "VHS_LoadVideo",
242
+ "pos": [
243
+ 76,
244
+ 274
245
+ ],
246
+ "size": [
247
+ 235.1999969482422,
248
+ 429.0311089409722
249
+ ],
250
+ "flags": {},
251
+ "order": 2,
252
+ "mode": 0,
253
+ "inputs": [
254
+ {
255
+ "name": "meta_batch",
256
+ "type": "VHS_BatchManager",
257
+ "link": null
258
+ },
259
+ {
260
+ "name": "vae",
261
+ "type": "VAE",
262
+ "link": null
263
+ }
264
+ ],
265
+ "outputs": [
266
+ {
267
+ "name": "IMAGE",
268
+ "type": "IMAGE",
269
+ "links": [
270
+ 28,
271
+ 37
272
+ ],
273
+ "shape": 3,
274
+ "slot_index": 0
275
+ },
276
+ {
277
+ "name": "frame_count",
278
+ "type": "INT",
279
+ "links": null,
280
+ "shape": 3
281
+ },
282
+ {
283
+ "name": "audio",
284
+ "type": "VHS_AUDIO",
285
+ "links": null,
286
+ "shape": 3
287
+ },
288
+ {
289
+ "name": "video_info",
290
+ "type": "VHS_VIDEOINFO",
291
+ "links": null,
292
+ "shape": 3
293
+ }
294
+ ],
295
+ "properties": {
296
+ "Node name for S&R": "VHS_LoadVideo"
297
+ },
298
+ "widgets_values": {
299
+ "video": "katana_02.mp4",
300
+ "force_rate": 0,
301
+ "force_size": "Disabled",
302
+ "custom_width": 512,
303
+ "custom_height": 512,
304
+ "frame_load_cap": 16,
305
+ "skip_first_frames": 0,
306
+ "select_every_nth": 5,
307
+ "choose video to upload": "image",
308
+ "videopreview": {
309
+ "hidden": false,
310
+ "paused": false,
311
+ "params": {
312
+ "frame_load_cap": 16,
313
+ "skip_first_frames": 0,
314
+ "force_rate": 0,
315
+ "filename": "katana_02.mp4",
316
+ "type": "input",
317
+ "format": "video/mp4",
318
+ "select_every_nth": 5
319
+ }
320
+ }
321
+ }
322
+ },
323
+ {
324
+ "id": 21,
325
+ "type": "GetImageSizeAndCount",
326
+ "pos": [
327
+ 393,
328
+ 282
329
+ ],
330
+ "size": {
331
+ "0": 210,
332
+ "1": 86
333
+ },
334
+ "flags": {},
335
+ "order": 4,
336
+ "mode": 0,
337
+ "inputs": [
338
+ {
339
+ "name": "image",
340
+ "type": "IMAGE",
341
+ "link": 37
342
+ }
343
+ ],
344
+ "outputs": [
345
+ {
346
+ "name": "image",
347
+ "type": "IMAGE",
348
+ "links": [
349
+ 58
350
+ ],
351
+ "shape": 3,
352
+ "slot_index": 0
353
+ },
354
+ {
355
+ "name": "1440 width",
356
+ "type": "INT",
357
+ "links": null,
358
+ "shape": 3
359
+ },
360
+ {
361
+ "name": "1024 height",
362
+ "type": "INT",
363
+ "links": null,
364
+ "shape": 3
365
+ },
366
+ {
367
+ "name": "16 count",
368
+ "type": "INT",
369
+ "links": null,
370
+ "shape": 3
371
+ }
372
+ ],
373
+ "properties": {
374
+ "Node name for S&R": "GetImageSizeAndCount"
375
+ }
376
+ },
377
+ {
378
+ "id": 7,
379
+ "type": "Sam2Segmentation",
380
+ "pos": [
381
+ 744,
382
+ 227
383
+ ],
384
+ "size": {
385
+ "0": 314.3733825683594,
386
+ "1": 190.31338500976562
387
+ },
388
+ "flags": {},
389
+ "order": 7,
390
+ "mode": 0,
391
+ "inputs": [
392
+ {
393
+ "name": "sam2_model",
394
+ "type": "SAM2MODEL",
395
+ "link": 9
396
+ },
397
+ {
398
+ "name": "image",
399
+ "type": "IMAGE",
400
+ "link": 58
401
+ },
402
+ {
403
+ "name": "bboxes",
404
+ "type": "BBOX",
405
+ "link": 54
406
+ },
407
+ {
408
+ "name": "mask",
409
+ "type": "MASK",
410
+ "link": null,
411
+ "slot_index": 3
412
+ },
413
+ {
414
+ "name": "coordinates_positive",
415
+ "type": "STRING",
416
+ "link": null,
417
+ "widget": {
418
+ "name": "coordinates_positive"
419
+ }
420
+ },
421
+ {
422
+ "name": "coordinates_negative",
423
+ "type": "STRING",
424
+ "link": null,
425
+ "widget": {
426
+ "name": "coordinates_negative"
427
+ }
428
+ }
429
+ ],
430
+ "outputs": [
431
+ {
432
+ "name": "mask",
433
+ "type": "MASK",
434
+ "links": [
435
+ 43,
436
+ 45
437
+ ],
438
+ "shape": 3,
439
+ "slot_index": 0
440
+ }
441
+ ],
442
+ "properties": {
443
+ "Node name for S&R": "Sam2Segmentation"
444
+ },
445
+ "widgets_values": [
446
+ true,
447
+ "",
448
+ "",
449
+ true
450
+ ]
451
+ },
452
+ {
453
+ "id": 30,
454
+ "type": "VHS_VideoCombine",
455
+ "pos": [
456
+ 1093,
457
+ -868
458
+ ],
459
+ "size": [
460
+ 772.7946166992188,
461
+ 859.3206163194444
462
+ ],
463
+ "flags": {},
464
+ "order": 5,
465
+ "mode": 0,
466
+ "inputs": [
467
+ {
468
+ "name": "images",
469
+ "type": "IMAGE",
470
+ "link": 57,
471
+ "slot_index": 0
472
+ },
473
+ {
474
+ "name": "audio",
475
+ "type": "VHS_AUDIO",
476
+ "link": null
477
+ },
478
+ {
479
+ "name": "meta_batch",
480
+ "type": "VHS_BatchManager",
481
+ "link": null
482
+ },
483
+ {
484
+ "name": "vae",
485
+ "type": "VAE",
486
+ "link": null
487
+ }
488
+ ],
489
+ "outputs": [
490
+ {
491
+ "name": "Filenames",
492
+ "type": "VHS_FILENAMES",
493
+ "links": null,
494
+ "shape": 3
495
+ }
496
+ ],
497
+ "properties": {
498
+ "Node name for S&R": "VHS_VideoCombine"
499
+ },
500
+ "widgets_values": {
501
+ "frame_rate": 3,
502
+ "loop_count": 0,
503
+ "filename_prefix": "AnimateDiff",
504
+ "format": "video/h264-mp4",
505
+ "pix_fmt": "yuv420p",
506
+ "crf": 19,
507
+ "save_metadata": true,
508
+ "pingpong": false,
509
+ "save_output": false,
510
+ "videopreview": {
511
+ "hidden": false,
512
+ "paused": false,
513
+ "params": {
514
+ "filename": "AnimateDiff_00001.mp4",
515
+ "subfolder": "",
516
+ "type": "temp",
517
+ "format": "video/h264-mp4",
518
+ "frame_rate": 3
519
+ }
520
+ }
521
+ }
522
+ },
523
+ {
524
+ "id": 16,
525
+ "type": "Florence2toCoordinates",
526
+ "pos": [
527
+ 942,
528
+ 16
529
+ ],
530
+ "size": {
531
+ "0": 315,
532
+ "1": 102
533
+ },
534
+ "flags": {},
535
+ "order": 6,
536
+ "mode": 0,
537
+ "inputs": [
538
+ {
539
+ "name": "data",
540
+ "type": "JSON",
541
+ "link": 26
542
+ }
543
+ ],
544
+ "outputs": [
545
+ {
546
+ "name": "center_coordinates",
547
+ "type": "STRING",
548
+ "links": [],
549
+ "shape": 3,
550
+ "slot_index": 0
551
+ },
552
+ {
553
+ "name": "bboxes",
554
+ "type": "BBOX",
555
+ "links": [
556
+ 54
557
+ ],
558
+ "shape": 3,
559
+ "slot_index": 1
560
+ }
561
+ ],
562
+ "properties": {
563
+ "Node name for S&R": "Florence2toCoordinates"
564
+ },
565
+ "widgets_values": [
566
+ "0",
567
+ true
568
+ ]
569
+ },
570
+ {
571
+ "id": 12,
572
+ "type": "Florence2Run",
573
+ "pos": [
574
+ 506,
575
+ -316
576
+ ],
577
+ "size": {
578
+ "0": 400,
579
+ "1": 352
580
+ },
581
+ "flags": {},
582
+ "order": 3,
583
+ "mode": 0,
584
+ "inputs": [
585
+ {
586
+ "name": "image",
587
+ "type": "IMAGE",
588
+ "link": 28,
589
+ "slot_index": 0
590
+ },
591
+ {
592
+ "name": "florence2_model",
593
+ "type": "FL2MODEL",
594
+ "link": 23,
595
+ "slot_index": 1
596
+ }
597
+ ],
598
+ "outputs": [
599
+ {
600
+ "name": "image",
601
+ "type": "IMAGE",
602
+ "links": [
603
+ 55,
604
+ 57
605
+ ],
606
+ "shape": 3,
607
+ "slot_index": 0
608
+ },
609
+ {
610
+ "name": "mask",
611
+ "type": "MASK",
612
+ "links": null,
613
+ "shape": 3
614
+ },
615
+ {
616
+ "name": "caption",
617
+ "type": "STRING",
618
+ "links": null,
619
+ "shape": 3
620
+ },
621
+ {
622
+ "name": "data",
623
+ "type": "JSON",
624
+ "links": [
625
+ 26
626
+ ],
627
+ "shape": 3,
628
+ "slot_index": 3
629
+ }
630
+ ],
631
+ "properties": {
632
+ "Node name for S&R": "Florence2Run"
633
+ },
634
+ "widgets_values": [
635
+ "sword",
636
+ "caption_to_phrase_grounding",
637
+ true,
638
+ false,
639
+ 1024,
640
+ 3,
641
+ true,
642
+ "",
643
+ 3228786869,
644
+ "fixed"
645
+ ]
646
+ }
647
+ ],
648
+ "links": [
649
+ [
650
+ 9,
651
+ 2,
652
+ 0,
653
+ 7,
654
+ 0,
655
+ "SAM2MODEL"
656
+ ],
657
+ [
658
+ 23,
659
+ 13,
660
+ 0,
661
+ 12,
662
+ 1,
663
+ "FL2MODEL"
664
+ ],
665
+ [
666
+ 26,
667
+ 12,
668
+ 3,
669
+ 16,
670
+ 0,
671
+ "JSON"
672
+ ],
673
+ [
674
+ 28,
675
+ 11,
676
+ 0,
677
+ 12,
678
+ 0,
679
+ "IMAGE"
680
+ ],
681
+ [
682
+ 37,
683
+ 11,
684
+ 0,
685
+ 21,
686
+ 0,
687
+ "IMAGE"
688
+ ],
689
+ [
690
+ 43,
691
+ 7,
692
+ 0,
693
+ 26,
694
+ 0,
695
+ "MASK"
696
+ ],
697
+ [
698
+ 44,
699
+ 26,
700
+ 0,
701
+ 25,
702
+ 1,
703
+ "IMAGE"
704
+ ],
705
+ [
706
+ 45,
707
+ 7,
708
+ 0,
709
+ 25,
710
+ 2,
711
+ "MASK"
712
+ ],
713
+ [
714
+ 54,
715
+ 16,
716
+ 1,
717
+ 7,
718
+ 2,
719
+ "BBOX"
720
+ ],
721
+ [
722
+ 55,
723
+ 12,
724
+ 0,
725
+ 25,
726
+ 0,
727
+ "IMAGE"
728
+ ],
729
+ [
730
+ 56,
731
+ 25,
732
+ 0,
733
+ 29,
734
+ 0,
735
+ "IMAGE"
736
+ ],
737
+ [
738
+ 57,
739
+ 12,
740
+ 0,
741
+ 30,
742
+ 0,
743
+ "IMAGE"
744
+ ],
745
+ [
746
+ 58,
747
+ 21,
748
+ 0,
749
+ 7,
750
+ 1,
751
+ "IMAGE"
752
+ ]
753
+ ],
754
+ "groups": [],
755
+ "config": {},
756
+ "extra": {
757
+ "ds": {
758
+ "scale": 0.620921323059155,
759
+ "offset": [
760
+ 253.5867105157341,
761
+ 622.007731477587
762
+ ]
763
+ }
764
+ },
765
+ "version": 0.4
766
+ }
custom_nodes/comfyui-segment-anything-2/example_workflows/points_segment_video_example.json ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "last_node_id": 114,
3
+ "last_link_id": 54,
4
+ "nodes": [
5
+ {
6
+ "id": 106,
7
+ "type": "DownloadAndLoadSAM2Model",
8
+ "pos": [
9
+ 451,
10
+ 420
11
+ ],
12
+ "size": {
13
+ "0": 315,
14
+ "1": 130
15
+ },
16
+ "flags": {},
17
+ "order": 0,
18
+ "mode": 0,
19
+ "outputs": [
20
+ {
21
+ "name": "sam2_model",
22
+ "type": "SAM2MODEL",
23
+ "links": [
24
+ 40
25
+ ],
26
+ "shape": 3
27
+ }
28
+ ],
29
+ "properties": {
30
+ "Node name for S&R": "DownloadAndLoadSAM2Model"
31
+ },
32
+ "widgets_values": [
33
+ "sam2_hiera_base_plus.safetensors",
34
+ "video",
35
+ "cuda",
36
+ "bf16"
37
+ ]
38
+ },
39
+ {
40
+ "id": 112,
41
+ "type": "ShowText|pysssss",
42
+ "pos": [
43
+ 1421,
44
+ -359
45
+ ],
46
+ "size": {
47
+ "0": 315,
48
+ "1": 100
49
+ },
50
+ "flags": {},
51
+ "order": 4,
52
+ "mode": 0,
53
+ "inputs": [
54
+ {
55
+ "name": "text",
56
+ "type": "STRING",
57
+ "link": 53,
58
+ "widget": {
59
+ "name": "text"
60
+ }
61
+ }
62
+ ],
63
+ "outputs": [
64
+ {
65
+ "name": "STRING",
66
+ "type": "STRING",
67
+ "links": null,
68
+ "shape": 6
69
+ }
70
+ ],
71
+ "properties": {
72
+ "Node name for S&R": "ShowText|pysssss"
73
+ },
74
+ "widgets_values": [
75
+ "",
76
+ "[{\"x\": 620, \"y\": 359}, {\"x\": 621, \"y\": 246}]"
77
+ ]
78
+ },
79
+ {
80
+ "id": 102,
81
+ "type": "VHS_LoadVideo",
82
+ "pos": [
83
+ 14,
84
+ -59
85
+ ],
86
+ "size": [
87
+ 363.24957275390625,
88
+ 619.2495727539062
89
+ ],
90
+ "flags": {},
91
+ "order": 1,
92
+ "mode": 0,
93
+ "inputs": [
94
+ {
95
+ "name": "meta_batch",
96
+ "type": "VHS_BatchManager",
97
+ "link": null
98
+ },
99
+ {
100
+ "name": "vae",
101
+ "type": "VAE",
102
+ "link": null
103
+ }
104
+ ],
105
+ "outputs": [
106
+ {
107
+ "name": "IMAGE",
108
+ "type": "IMAGE",
109
+ "links": [
110
+ 41,
111
+ 43,
112
+ 52
113
+ ],
114
+ "shape": 3,
115
+ "slot_index": 0
116
+ },
117
+ {
118
+ "name": "frame_count",
119
+ "type": "INT",
120
+ "links": null,
121
+ "shape": 3
122
+ },
123
+ {
124
+ "name": "audio",
125
+ "type": "VHS_AUDIO",
126
+ "links": null,
127
+ "shape": 3
128
+ },
129
+ {
130
+ "name": "video_info",
131
+ "type": "VHS_VIDEOINFO",
132
+ "links": null,
133
+ "shape": 3
134
+ }
135
+ ],
136
+ "properties": {
137
+ "Node name for S&R": "VHS_LoadVideo"
138
+ },
139
+ "widgets_values": {
140
+ "video": "ballerina_davinci.mp4",
141
+ "force_rate": 0,
142
+ "force_size": "Disabled",
143
+ "custom_width": 512,
144
+ "custom_height": 512,
145
+ "frame_load_cap": 16,
146
+ "skip_first_frames": 0,
147
+ "select_every_nth": 3,
148
+ "choose video to upload": "image",
149
+ "videopreview": {
150
+ "hidden": false,
151
+ "paused": false,
152
+ "params": {
153
+ "frame_load_cap": 16,
154
+ "skip_first_frames": 0,
155
+ "force_rate": 0,
156
+ "filename": "ballerina_davinci.mp4",
157
+ "type": "input",
158
+ "format": "video/mp4",
159
+ "select_every_nth": 3
160
+ }
161
+ }
162
+ }
163
+ },
164
+ {
165
+ "id": 113,
166
+ "type": "Note",
167
+ "pos": [
168
+ 56,
169
+ -415
170
+ ],
171
+ "size": [
172
+ 309.10654388427724,
173
+ 177.01340377807628
174
+ ],
175
+ "flags": {},
176
+ "order": 2,
177
+ "mode": 0,
178
+ "properties": {
179
+ "text": ""
180
+ },
181
+ "widgets_values": [
182
+ "To get the image for the points editor, first create a canvas, then either input image/video (first frame is taken), or copy/paste an image while the node is selected, or drag&drop an image.\n\nWARNING: the image WILL BE SAVED to the node in compressed format, including when saving the workflow!\n\nClick the ? on the node for more information"
183
+ ],
184
+ "color": "#432",
185
+ "bgcolor": "#653"
186
+ },
187
+ {
188
+ "id": 107,
189
+ "type": "PreviewAnimation",
190
+ "pos": [
191
+ 1340,
192
+ 32
193
+ ],
194
+ "size": {
195
+ "0": 514.92431640625,
196
+ "1": 577.3973999023438
197
+ },
198
+ "flags": {},
199
+ "order": 6,
200
+ "mode": 0,
201
+ "inputs": [
202
+ {
203
+ "name": "images",
204
+ "type": "IMAGE",
205
+ "link": 43
206
+ },
207
+ {
208
+ "name": "masks",
209
+ "type": "MASK",
210
+ "link": 42,
211
+ "slot_index": 1
212
+ }
213
+ ],
214
+ "title": "Preview Animation 16x768x768",
215
+ "properties": {
216
+ "Node name for S&R": "PreviewAnimation"
217
+ },
218
+ "widgets_values": [
219
+ 16,
220
+ null
221
+ ]
222
+ },
223
+ {
224
+ "id": 105,
225
+ "type": "Sam2Segmentation",
226
+ "pos": [
227
+ 859,
228
+ 409
229
+ ],
230
+ "size": {
231
+ "0": 315,
232
+ "1": 170
233
+ },
234
+ "flags": {},
235
+ "order": 5,
236
+ "mode": 0,
237
+ "inputs": [
238
+ {
239
+ "name": "sam2_model",
240
+ "type": "SAM2MODEL",
241
+ "link": 40,
242
+ "slot_index": 0
243
+ },
244
+ {
245
+ "name": "image",
246
+ "type": "IMAGE",
247
+ "link": 41,
248
+ "slot_index": 1
249
+ },
250
+ {
251
+ "name": "bboxes",
252
+ "type": "BBOX",
253
+ "link": null
254
+ },
255
+ {
256
+ "name": "coordinates_positive",
257
+ "type": "STRING",
258
+ "link": 54,
259
+ "widget": {
260
+ "name": "coordinates_positive"
261
+ },
262
+ "slot_index": 3
263
+ },
264
+ {
265
+ "name": "coordinates_negative",
266
+ "type": "STRING",
267
+ "link": null,
268
+ "widget": {
269
+ "name": "coordinates_negative"
270
+ }
271
+ }
272
+ ],
273
+ "outputs": [
274
+ {
275
+ "name": "mask",
276
+ "type": "MASK",
277
+ "links": [
278
+ 42
279
+ ],
280
+ "shape": 3,
281
+ "slot_index": 0
282
+ }
283
+ ],
284
+ "properties": {
285
+ "Node name for S&R": "Sam2Segmentation"
286
+ },
287
+ "widgets_values": [
288
+ true,
289
+ "",
290
+ "",
291
+ false
292
+ ]
293
+ },
294
+ {
295
+ "id": 114,
296
+ "type": "PointsEditor",
297
+ "pos": [
298
+ 432,
299
+ -735
300
+ ],
301
+ "size": [
302
+ 813,
303
+ 1068
304
+ ],
305
+ "flags": {},
306
+ "order": 3,
307
+ "mode": 0,
308
+ "inputs": [
309
+ {
310
+ "name": "bg_image",
311
+ "type": "IMAGE",
312
+ "link": 52
313
+ }
314
+ ],
315
+ "outputs": [
316
+ {
317
+ "name": "positive_coords",
318
+ "type": "STRING",
319
+ "links": [
320
+ 53,
321
+ 54
322
+ ],
323
+ "shape": 3,
324
+ "slot_index": 0
325
+ },
326
+ {
327
+ "name": "negative_coords",
328
+ "type": "STRING",
329
+ "links": null,
330
+ "shape": 3
331
+ },
332
+ {
333
+ "name": "bbox",
334
+ "type": "BBOX",
335
+ "links": null,
336
+ "shape": 3,
337
+ "slot_index": 2
338
+ },
339
+ {
340
+ "name": "bbox_mask",
341
+ "type": "MASK",
342
+ "links": null,
343
+ "shape": 3
344
+ },
345
+ {
346
+ "name": "cropped_image",
347
+ "type": "IMAGE",
348
+ "links": null,
349
+ "shape": 3
350
+ }
351
+ ],
352
+ "properties": {
353
+ "Node name for S&R": "PointsEditor",
354
+ "imgData": {
355
+ "name": "bg_image",
356
+ "base64": [
357
+ "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAMAAwADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDE+wysPkuCPwqGaDUrYbluUIzjkV6Xf6dZx2MrJbRqQvBC1xeoIBbnjuK8urGVN2Z2wkp7FOCTUyo+dGq0suoDrEp+hrf8M6VbX8LmdSdoGADiuh/4Rmx/hEg/4FThRnNcyFKpFOzOEWe8HW2P4Gni6mH3rd/yrtj4Yte0ko/4FSf8IxB2mkqvq9Qn2sTjBc+sbj6rR58R6j8xXYnw1EP+WzH8KzbrTlgl8tSH/CpdGp2H7SJz/m25PO38qM2p/u10aeHpJ4w4WIA9jTW8KSkfch/Op9jU7D549znttt2IpcIBxJW1J4UmCEiOM49GrnL+y2QSbMq6g9DUSUofEik1LYtD2kpPm/viqOmWnmWitI7lj1O6r/2BOgkcfjU8w7CbpB0YUedKOwP40v8AZx7TOKhlsrpVPlzAn3FF2KxMLuUfwfrTheyf8865XUZvEVmS0cSPGOcjmt6zlaezilcYZlBI96puSVwsXINYAu0ieFwWOM7eK6QRRsM7FP4VzEQHmp9a6qP7i/Su3Cyck7mNZWGG3iP/ACyT8qqXtvEkDERqD6gVpYqpqH/Hq1b1EuVmcdznGFRNxUxqpdbsLtPOa8nqdSJBSg01elPoGKKkUcU2Ntw6VIBSEGKMU7FJQAmKMUtApjExS4paMUAJijFLRQA3FIRT6awzQAzFGKZczpbQGV+gp0biWJXXowyKQFpB/o7fSsFlFtLJNLLiMnPPQVvr/wAe5+lY2qWkd1p0sUgO0jsaufQI9QhvbWVcpOjD2Iqbzof76/nWd4Z0KGS3Kw2+9gecmuoj8MTkcWaj6tRyt/ChtpbsyvOt/wC8v50efa9yDW4vha4P/LvCPq1Sjwpc/wDPOAfjT9nU/lJ54dznTNZdwPypDLYn+Fa6Gfw1LBC0jiHCjPFYkkKD+AflUS5ofEhpqWxXL2HeNfypN2n/APPNa2NJ0J9TieSMxKFOPmFaY8Hzd2t/++TVxp1JK6QnKKdmcqH0/wD55L+VKH0/P+qX8q60eD37vB/3xTx4PPeSH/v3V+xrdieeHc5Dfp//ADyX8qTzdP8A+eS/lXYjwgP+ekX/AH7pf+ESH/PWP/v3R7Ct2D2kDjPPsP8Ankv5UC4sQeI1H4V2o8JJ/wA9U/790o8JRj/lqv8A37FHsK3YPaQ7nGfabTtGv5Uhu7Uf8sh+Vdv/AMIpH/z2H/fAqjq+hJY2LTCTcR224pSoVUrsFODdjkze2vaIf98003sB/wCWI/75qVx6Cun0TQLW90+OeUvub0NY0lKo7RNJuMFdnJfbYD0hH5Uv2uI9If0r0EeFLD1k/wC+qd/with38z/vquj6rVMvbQPO/tKf88T+VNkugilhbsfYLXpA8L6eO0n/AH1WZr2j2ljY+ZCrBt2OWqZYepFczGqkW7Hn1tqy3dw8H2WWMr3ZcCuitubQfSqrIuw8CrloP9E/CsYu7NZIgApQKCDg461WsBdYk+0nJ3fL9KZJbxS4pcUtIQ3FGKdS4oAZijFOoxTGNxRinYpM89KAExRinUlADcUhFPpKAI2FQTD5T9KsNUE33DSGiCzHyN9as4qvZ/cb61aoGb2l/wDHqKvYqlpY/wBGFXsV61L4Ecc/iY01lapzKgrWIrJ1L/XL9Kiv8A6fxFBuKwbrXyl41rbQNLKvX0Fbz9TWBbxgatcnHJxXnN2OlK5LH9uuBmeQRg/wpUy2UI5cbj/tc1oWtt5pOegrYsILWCUtKm7jgkZxWsKcp6kymomFHYoQClvkeoWrY0uVYjL9mIT1IrsBbi4gUwEKh9BS6jHt0yRfRa2eHstWZ+1OHmhUwsNoHFcv4dlla9uYnbdGrfKD2rr5h+7b6VyHh/8A5CN5/vmuO+jOhI9h1Fc2E3+6a4LUV/0Z/wAK9BvRmym/3TXBagP9Ekrqxm6MMP1Oi8FDMcuf7orsAK4/wUfklH+yK7EV0Yb+GjKt8YmKMU6iugzIZYy6FQ20+tZckEFq4aUltx6mtaQkKSBk+lY9xBPOdzjAzwDUsRqQlGjUoQVxxipMVkWxubXtuj9K0YLmOcfKcN3BouBI/wBxvpXnOoDPn/U16M/3D9K87vh8031NcWM2R00N2RaFGrpCrDK7ua7qLTLFyB5KZx6Vw+hcKn+9XUGWSKRXViD9aMIk07hXbui5d6RYpESI9p9jWDPYoFLAmtt5muIcs/4VQmGUauidOLWxjGbTOau1zBIPY1Tsxi2UVoXQ/dyD2NUbMfuBXmM7ehPH/rF+tdTD/q1+lcuo+cfWuog/1K/SuvBvVmNfoS4qpqA/0VquVVvhm1auyp8LMY7nNmql0G3xkHAB5q4RVW74UGvJZ1oVOlPqOP7tSAgED1qQHITv27Tj1qYCkAp4oEIRUYkRnKhgWHUelTVAltHHM0qjDN1PrQASReYByRg54p/SnYpGUMMGgBaXFIq4FOpjExSU6kPWgBtIadTTSAY6LImGAIPY0YAGAMCpSP3YNRnpTAsoP9FNUblc2zj2q/GP9ENU5hmBh7U6nQIdS14BGXmHpXoDZRCwUtjsK4LwEMXE/wBa9BUjFd2F/hnPX+MjV8puIK/WnKQwypBHqKc6B0Knoagtbd7feC2VJ4rpMSO/GbKX/dNeezcE16Lej/RJf90153cDDt9TXBjN0dVDZnUeDB/oM3+/XUYrmPBv/HjN/v11FdWG/hIxrfGxMUtLRW5kJikxS0UAJilxRRQAYrI8SLnRpa16y/EAzo830rOr8DKh8SPOXHFd94ZX/iSQiuDYcGu+8Mc6LF7V52C/iM68T8JsgUtFFeqcQmKw/FIzpf8AwKt2sbxMudJf2NZVl+7kXT+JHBsP3Z+lW7Lm1qqw/dn6VbsObWvIp7ndLYgxzTgKQdTTsHIpkigUh4p1BFAhBzS1DMZl2+SqnnnPYVMOnNMYUUE7Rk9KQEEZFIBaTFLRTATFGKWigBtNNOpDQAw1BN901YNQTfdakNEFn/qz9as1Wsv9W31q1QM3tL/49hV+qOl/8ewq/Xq0n7iOSe7GGsnUf9cv0rXIrK1IfvV+lTX+AIfEZ7dTWLAv/Eyn/Ctth1rHg/5CU1eZI6om9YD92a0IYWmcImMn1qjYj5DWnao7SgRnDetehQ+BHNU+Jm/ZQtBapG2Mj0pmpD/QJv8Adqa2V0hAkbLZ61HqX/HhL/u1tP4TNbo4eb/Vt9K4/wAP/wDIRvP9812U3+rb6GuO0Af8TG8/3zXk9Gd6PZroZtZR/smuCvv+PWQV304zbyD/AGTXB3w/0eX6V2Yzoc2H6m54KP8ArP8AdFdlXF+Cz87j/Z/rXYuWEbbeWxxW+F/hkVvjJAc0tZ1k14Zj54Ow+varkoDJgkgVuZEh9arSrJMpVQVyetENxF9zfyPWie8SJflILUmIFQW8OGbOaqtB5b+fnHfFS/a45IcvgEdqzLu+MoKLwv8AOkBqpeRSqcMM1wt+P3s+PU1sfMemc+1Y90DvkB61yYv4UdGH3ZBopxt/3q6eYcD6Vy2k8YH+1XdfYEe2V9/O3pSwmzHiOhlRuVbHrTpfuNTWXa+PQ05+Vaux7HOtzn7kcPVCzH7k/WtG5H36oWn+qYe9eTLc7lsSDhh9a6i2/wBQn0rmMc109r/x7p9K6cHuzKsT1XvB/oz/AEqwKhuxmBx7V2z+FmEdzmT1NVbsfu6tEcmq14P3NeSzsW4yH7gqdFBOarwH5KfGsouCxceVj7uO9SDLQpw6VEd+8YPy96lHSgQtFFMZiGACk+9ADjxSZBpcZqKOAI7Nk5brmgCYClxSClpgJimsQoyTxT6inhE0TRt0YYNAxFkWQZVgR7GnGqdjp6WCssbsVbnB7Vc7UMB7D90KhPSrDD/RxVY9KGBciH+h1VkH7pvpVuH/AI8qrP8Acb6U6vQIFrwOMXU31rsb4zKitECcHtXI+DAVupvrXXzXqwf60YBrtwr/AHZz1/iCyvGk+WUbSPWrjOMjDD6VjSut7OqxttHqKmtdNlguvNaYsvoa6EzAuXn/AB6S/wC7XnlwPnb616JeD/RZB/smvO7n77fWuLGbo66HU6jwaf8AQ5h/t11Fcr4NP+jzD/arqhXVhv4SMa3xsWiiitzIQ0UUUAFFFFABWdrgzpE/0rRqjrIzpU4/2aip8DKh8SPNyOK7rwqc6Mn1rh26V2/hQ/8AEnA9DXmYL+IdmI+A3aKKK9Y4QrL8QDOkS1qVna4M6RN9Kir8DLh8SPPiP3f4Vb04f6MwqqfuVa07/UNXiw3O+WxAPvGn1H/GfrTwaogU5zS0gIJpR1oEGKMUtIM5oAXGaTGKWg0AJSUtQpIzOwKkAdD60xktFLSUAJimnin0hFAEIO7tio5fun6VORUUo+Q0DRWsvuN9atYqrY8o31q5SDqbulj/AEYVexVLS/8Aj2FXq9Sl8COWe401lakP3y/StY1l6j/rl+lTW+AcPiM9h1rGgH/EznrbbvWNCP8AiZz150jqib1j/q619PDG5XAPuaybIfJWxp0xiuVUKTuOPpXfQ+FHLU+Jm/GnlrgknnNQakP9Al/3atVV1L/jwm/3a2n8Jmt0cTMP3bfQ1x3h/wD5CV4P9s12U3+rb6GuN8P/APIUvf8AeNeS9md6PaJBmJx7GuDvP9TKPrXeuPkb6Vwl6vyzD6124zZHNh+pp+DD++I/2a7cVwvg44uQP9k13QrXCfwyK/xi01kDKVPQ06lrpMjLurIKC8Y5rOZJ92MciujIzWIbl4b4pMuFJwpqGJkKQyzZ4wRVeWJon2sOa2ZSYk3qM1RmEl3grGeKBFjT7VPI3kAlq5XURi7mH+0a347mWz+VgQPQ1gXz+ZcyPj7xzXHi/hR0UNyjpnDN/vV0kdzcMVTecdK5vTuJH/3q6MqVMZUcnGKnB9S8QXJbKVU3nnvVZvukVtGV0tB5i84rHk5zxXccyMO5HzPWdaD5XHvWndD52rOth9/615U/iZ2rYk710tnzbJ9K5rvXS2XNsn0rfB/EzOtsWKiuR+4f6VPUVxzC30rvnsYLc5dh8xqtd/6k1akGGNVroZhNeQzsRWtuUq4tU7X7lXR0qRseBmnYpFFOoJIIY5VkcySbgTwMdBU2KdijFMBKKa2cjFOFIAopcUUAJRS0lMBCKaRT6a1Ax7f8e1VT0NXGH+jVTbpTY0X7cf6DVdx8hq1a82RqBh8poqbIUNyx4VkEM87HoK1by7+3sFA2hay/Cyq97OjdCa6f7LawThtoBbgV14VXgYV/iM/TbaQ3IOc7TXS1XggjiJKDBNWK64qxgQ3Qzbv/ALprzq4H71x7mvRrjmBx7GvPLlf3z/U1xY3odOH6nQeDT+7nH+1XWCuR8HcG4HvXXiujC/wkZVvjYUUUV0GQlFLTQwJwCM0ALRRS0AJVPUxnTrgf7Bq5VbUBmwn/ANw1MvhY47o81eu08J86X/wKuMfvXY+ETnT3Ho1eXg/4p21/gOixRRRXrHCJVHVxnS5/92r9U9UGdNnH+yamfwsqO6POz92rOm/6p6rH7pqxpv8AqnrxIbnoS2Kx/wBY31psXm+a+8rs/hApx/1r/WnqoBz61ZA5VCkkDrTqBS0hBRSYpaACmmnVDI7q6BU3Ank+lAElNVSM806kyM4oAWigcUUAJSHpSgg0UAMyDUcv+rb6VLtxUcw/dt9KBopaf9xv96rtUbFN6MCT97tV/FA3ubmlf8ewq/VDS/8Aj3rQr06XwI5ZbiVlaiP3y/StY1l6j/rl+lKt8A4fEZ7d6yIR/wATSf6VssOtY8X/ACFZh7CvNkdMTdsfuGtS2mMEm8JuI9qzLIfIa29L/wBc/wDu16GH+BHNU+JmxaTGe3WRl2k9qZqP/HhN/u0+13eT8wwc0zUf+PCb/drefwma3RxM/wDqm+hrjvD/ABq94D/eNdjLyjVx+hf8hq9/3jXkPZnej2c8qfpXDXo+aYe5rr/7SssH/SY+PeuRvGVpJWU5Uk4NduM1SOXD7steEOL0fQ13grz/AMKtsv1ycdRXfhh6itMI/cFX+IcKWm7h6il3D1FdRgIay7+HzLmMtgKD1rVz71k6srsUCVLA0GCrDnGQBVJdUhjyuwjFJaz3GVSRQVq3LBD5TNtXOOam4GTdSC/b92uMd6529QxzMp61uwzMLllRKyNTU/amLDk1y4r4Dah8RlWXEz/Wu7soYmgjdsFgK4W1H+kPj1rrNPtbkorB/k9KjB7s0xHQ2JJY5I2XNYsgxmtl7Vfs5bocVjSdTXczmRi3f+tas23HL/WtO8/1jVnQffk+teVU+JnbHYcRzXR6f/x6pXOnqa6LTjm0StsH8bIrbFuo5h+5f6VKKZIMxt9K9CWxzrc5eQfOfrVa4GYjVqT/AFjfWq84/dn6V5EjsRRs+Vq8o4qjZ9CKvr0qRslUUtIKdQSGKSlpKAGOpYYBxTx0pADk07FACUtGKKAEopaSgBvOTmmtxTzTXGRQMmYZtM1SarxH+h1SboaqQRNCz5szUJ6GpbDm0b61Gw4NOp8KCO5P4VP/ABNZVrr72BPLLlcsOlcd4XONak5rvjsPUiuvCfAc+I+IwlhvvPRwzBcjvW72FQT7TCwVgD2qKznxDiVxuHcmupOxiWJeYm+lee3gxcyf7xrv3uIdrfvU6etcHfY+1SY5GTXHjNkdFDqa/g4/vbgV2NcX4RcLdThiBkd67HzE/vL+dbYV/u0RW+IfRWbO0kc/mJKCvpmriXMRQFpEB7810XRiTVGsKI5dRyarXeoRwRhkZHOcYzSw6jbPGGaeNT6bqLodi3ilqqdQs/8An5j/AO+qQ6lZY/4+ov8AvqndCLVV73mym/3DUf8AaliP+XuL/vqoLrVrBraRRdRklT0NTJqzKSdzgX711vhA/wCiSj/ark36nHrXQ+GNQtLSGVbidIyWyNxxXlYR2q6nbW+A7Cis/wDtzTP+f6H/AL6o/tzTP+f2H/vqvW5kcNmaFVr9d1jMP9g1WOvaYP8Al8j/ADqvc69pj28iLdISVIAFTKSs9RxTucUw6ip9N/1cg96gPJJqbTf+Wn1rxIfEehLYrH/Xv9amHSoWP+kOPepl6VZAqg7jk8U6kBBJHeloEFFQsJvtAIK+Vjkd6moAKQilpKAEqtLZLLcpMXcFOgB4q1RTASlooosAmKKOtBp2AQ1FLzGfpUh60x/uH6UrDKOn/dk/3qvVQ048zD/aq/SGzc0r/j3rRArP0r/j3rSxxXp0vgRyy3GGsvUR+9X6Vq45rN1IfvF+lFb4GOHxGcw61jxf8haf6CtphyaxY/8AkLz/AEFeZI6Ym7ZfcNalnci2lLlcgisW3hMnSRl+lXFs/WWQ/jXdQb5Ec9Re8dVbXS3EXmfd+pqPUJYzZSjeuSvHNUrPRYXtwXeU5/2zTrrR7WK1d1D7gMgljXRK/KZq1zmX+4a4/Qx/xOr3/ersnHymuP0Mf8Tu9+teQ9mdyPWBpNiP+XWP8q5W9jWK4lRRhQeBXc4rjNTXbfTD3rvxcUoo5aD1KOjwfabvyd7Jluq9a7BPD6KB/plzx/tVyegf8hUD/ar0alhEnF3HXbTMn+wk/wCfu4/76o/sQDpeXH/fVa1QyLN5gZCNvcV18qMOZmcdHkXkX8/51DJYSGQL9ukz71flW7kmCj5V9RUdzYOV3KxLUcqFdkC6RdYyt8/5U46PdlSDfufwq7CJrdF3tuXv7VdByMijlQXOaGmTiYqtyQfXFZOoRSQ3LJI+9h3rtXhUuG71yevJt1BvcCubFRXs7mtF+8YMJK3T4ro7S41XywIkTYehJrnI/wDj9YV1kF+I7ZY8fMKwwiu2a19EWNmriEljH09ayJDfknmOtxdTZ4SvlnPrWc2Sea7nG/U50zDnFzvPmbfwqrD/AKx61b0fN+FZcX+ukrzaqtNo6oO6HEfMa6DTP+PNa589TW/pf/HmK0wn8RirbF6mP9w/Sn01x8h+lek9jmRzEvEjfWq8/MZqzN/rW+tQS/cP0ryJbnWjOtOrD3rQXpWda/ff1zV23d3BLpt54qFsUyyKWkHWloJIjMvneUM78Z6U9QR1OaXaM5xz60tABRRiloASilooASkp1JQA2kbpS5pG6UDJzzZ1RbpWhjNjms9ulVLoETQ07m1b60xhwafpn/Hs/wBaaepqqnwoUd2R6Fa/atXkiLsnGcqcGuvGgRjk3M5/4HXM+GeNfk+ld3Lnymx1xXVhIpwMa794yG0SIDInlOP9qo7bTbS63BjJuX/aNS6d9q89/MJKZ71qhFXJVQM108ifQxuzLbQbLBJRunXca4+8RYrmRF+6pIFeiMPlNefah/x+zf7xrkxkUkrG9B6sn8PWcV3eyLJuwBng4rqhoVmP+ev/AH8Nc74V41CT/drtavCxTgTWfvGYdEsu/mfjIaVdEsT0Rj/wM0uo2lxPIrQtgAcjNNsbW8t3+ZgUPUZro5UY3YyfTdOt13PHgH3NOg07S5wdkKt+Jq/cCFo8SgYPrUcFjFDJ5iE0cq7BdjBounj/AJdUpRpFgP8Al0i/75q9RV8q7BdlP+yrD/n0h/75qG502yW3kItYgdp5CitKoLr/AI9pP901MkrAm7nmsgwWHvXQeF7K1u4pTPbxyENxuXNc/N95vqa6bwYRsuB/tV5WGX707qvwG8NI04dLK3/79inDS7EdLSD/AL4FW6K9flXY4bsriwsx0tYf++BUVzZWwtpcQRD5T0QVdqOcZt5B/smlJKw02eakdak00/6z60xhgt9TTtO6yfWvDj8R6D2K7f8AH0496lDEEALkHrULf8fkn1qyoqyGOA5zS0YooEJRS0lABSUtJQAUUoGa0rLTt3zy9OwranSc3ZCbSKcFpLORtHHqa6Cx0i1RAZcO3+1WNq3iKw0WMguDIOig1w198RJ2lJjO1B2FdsIU6fS7Js5eR6PrGpadpabWRBmuQvfFun7sKVwfSuL1TxONah8tnIk6A1y86yRthifY1TlzByqK7nrtrq9rej93IM/WrbEFDj0rxm3v57ZwUcjHvXZaH4mM2Ipm5xjnvXPUordFRkmdNp/35v8AerQrN01tzykdCc1pVxGjN3SR/o9aVZ2k/wDHvWlXp0vgRyT3Y0iszU/9YlahrM1P76UVvgY4bmeRzWMg/wCJvN9BW0etYyf8heb6CvMkdKNuyHWtBe1ULLoa0YMeYm7pnmu+h8COep8Rv2p8y1BXjimXgcWEm/rtqzb7DENmMe1Q6j/x4y/7tdMvhZitzi5OAa5DQhnW736110n3TXI6F/yG73/erxn1PQR7KOtcdq4/4mEv1rrRKgP31/OuT1Yg6hIQQRmvRxfwHJQ+IpaHxrA/369GrzfSTt1kf7wr0dTwDUYP4WViN0RmNpJAX6DoKW4nEMZY1VvdRFs4RVLMaW3aS4y0yYHYV2HOLZXrXLEGMjHepXvokl8s5zUqqka8AAVl3Etu9x8ynjvQI0ZJgy4jwSe1V1mlgOJehqBby2tz8inNK9y14oRIyM9zRcCd1lcB4pCa5zWRJ9qHmjBxXQ2XmRyeVIPpWT4kXFzGfUVz4n+GzWj8Ry6/8f5rpLWxknCuD8tc23F8K7PTHYW4AGeK5sHuzbEbI0EtoooCNozjmsiUDewHTNXo2l85ll4XtVOYASHHTNeicqMm+HzCsmP/AF8la9+ORWSgxcNXmV/jZ10/hHN1re0o/wCij61hMPmrc0nm2/GnhP4gVvhNCmt9006kb7pr02cyOYuB+/f61BIPkNWbr/j4f61Xf7pryJ7s60Zdr/r3+taSdKzLb/j5f61pp0qFsVIlFLSCloJCikDZJGKWgAooooAKKKKACkpaMUANprdKcaa3SgZbA/0E1nN92tJR/oBrOb7tVLYIl7S/9RJ9aO5o0v8A1Un1pT941U/hQR3YvhvjX2HqK74jIxXAaAyx68Wdgox1Ndx9ttv+eyf99V14T4DDEfEVpmktZ12DKN2xV8cgGoDd2rdZY/zo+22w6zx/99CuowsTnpXnmoD/AE6b/eNd0dQtAD+/j/76FcPflXvJWU5BbgiuLGbI6KG7Lnhc41Rh6rXbiuF8OukWqkuwUbeprtBd25/5bJ/31WmE/hk1/iEuyywMUbBHeotPu2uAysOV71K1xbspBlQg/wC1UUUtnb52SxjPX5q6TEtvGsgwwyKcAFAA6Cq32+1H/LxH/wB9Uh1OzHW5j/76pqwi3S1ROr2A/wCXqP8AOm/2zp//AD9R/nRdBYv1Fcf8e0n+6aqf23p3/P0tMl1rT2idftAOQR0NJtWGlqcFKMs31NdH4N6zj3rnpcGRiOmTitPw9qkGmtL5yyHd02rmvJwztV1O6rrA7wUVhjxRZnpDcn6RGj/hJ7bta3Z/7ZGvW549zi5WbdNk5if6Gsb/AISSLtZXh/7ZGg+IUZSPsF2OOpjpOcbBys5CQfvH/wB4/wA6bp5/eSj3p8h3O56ZJNR2P+tk+teIviO/oVn/AOP5/rV1egqk5/09/rV1OlWDHUYp1FBA2kp2KSgBtBoo71SQy7YQK77n6CqfifXzptk0cOfMI4I7Vfgby4Gb0Ga828U6xvlYZ9RXowSjCyMt3qclqWoXN1MzyuxJPc1mPISeTUk8xkYmoK1ijOUgDEHI4NacEy3UXlS/eHQ1l09CV5FE4XQQnyskmjMchX0pbeZopVZTyDTHkaQ5Y00daaWmom1fQ9N8MXhuIzk109cP4LYnIruB0ry60bSOlO6N7Sf+PetIVm6R/wAe9aWK7qPwI557sKy9T/1iVqVm6mPnSit8DHDczz1rFT/kLzfQVtt1rFUf8Tib/dFeZI6Ym3Z960IGjWTMgytZFvFK5OyTaKtrBMBgzk/hXdQfuI56i946nTmQ237v7pPek1JgLCXntWfY6Ys1sCbuZSeoVqfd6TFFau/nTMVGfmfNdEm+VmSSuc5J9w/SuQ0If8Tm9/3jXYScKa5DQv8AkNXv+8a8h9TvR6jHpelsMru4/wBs1z2oRJDeOkf3R0rZXIrI1If6UT6ivRxUUoaHHRd5GZbMyamSn3uCK61G11kBUwgdsmuQt+NWHvivQY7qFI03N2rHCK9zSu9jImGqb8ytDupBcaqOBLFitx2t7lOCDVSCGIO27Bwa6+V9znuVYzrMoODEwoa01ReWSP8AOtL7akTiONdx9qvg74/mGMjmmo+YrnLi1vpydojJHvVuK21eMYUwr9avoYLOdiWxup1zfRGI+W/ze1Pl8wuZrx60WBEkGRWXqSX25TeMrHttrTW5lB+8TVXVizRxlqxrx/ds0pP3kc3N8t4p74rpLD+0/KVoY49mOMmucuOLpK7fTJB9iVQOQK5cIryZvXfuoqTS6psO5Ih+NZrpqJJO+MV0IjlnDbxgVRlj8tiK9Bw8zlUjCuY7wAGR0I9hVFeLk/Sty+H7sVidLuvOrq0zqpu8R7fera0j/j3P1rGb79bGkH9w31ow38UKvwmlQelFHavUOY5q7/4+X+tV2+6as3n/AB9P9arsODXk1N2da2Mi3/4+3+taadKzID/p0g96006VmtipEopaQUtBItJS01lDYp2AWigUUgCiiloASilNJQAlNbpTjTTQMuJ/x4Gs1ulaKf8AHi1Z56VT2QRLulcpJSt940mk9JRTmGJG+tXP4EEd2R6TBHca55cq5XBOK69dH07dtMC5NcnoR/4qMD/ZNd8EGQcciunCJOGpjiHaRSGjaeP+XZKR9K05MZt4xmr7tsUt6DNZSy/2kxH3AhrqsjnuyRtIsCpxbJXI3kSxXUiIMKDgCu8C7Y8egriNRGL2X61yYtJRR0UHdsj0i3iuNVWOVdy4ziuuGjWA6W61yuif8hhPpXcjnvTwduQVf4ij/ZdiP+XdKjOn2KtgwJiprixLymTziufSq6WDyZzMWWuppGFyZdNsCMiCP8qlXT7IqMW0WP8AdFQ2ttJBKVZ8qe1aAUIp2ijQVyAWFoOlvF/3yKd9jth/ywj/AO+RUI1FRIVdCuOtW1dWAIIOadkBH9lgH/LGP/vkUyWCLyX/AHadP7oqzTJv9S/0oa0GjzicASv9TW74RAZ5wQDg1h3IxM/+8a2/CB/fXA+leVh/4x21PgOtCj0pcCilr1ziEwKY4G0/SpDTW+6fpSewHnc/E8n+8f51BY/66T61YuOLiUf7Z/nUFj/rpfrXh/aPQWxTk/5CD/Wr8fQVny/8hFvrWhH0FUDJKKKKCQ7U2nU09aYDaB1oNNJ2gk9qaC1ya7vre2snEsoUsvA9a8h1zLTM2cgkmtbxDePdXhYMdqHgZrBuLrzFw/JruhLmQThyGM45NNqeQKSSKjC8103ORoRVzVkQnZmlgjXOWIqeaeJY9qkE1Dk29DVQSV2Z5GCRRQTk5oHWtTE7nwUDubFd3XEeClxuPrXbjpXlV3751x2N7SP+Pc1p9qzNH/1FaldtH4Ec892JWZqX30rTrM1L76UVvgY4blButYqj/icz/wC6K2z1rGH/ACF5v90V5kjpibVl0arw7VRsujVoRqXYKoyTXoUPgRz1PiNnT7by1WQN1HIqXUP+PKb/AHaNPjkjhKy9c8Uuof8AHjN/u1vL4TJbnGy/dP0rj9C/5DV7/vGuxk+6a47Q/wDkNXv+8a8dnfE9UEMasvOR3rG11ES8XZ0K1sR7QOeTWRrSETIcY4r1MT/DZw0fiOfQ7dUT8K65kJhVs9RXHvxqUZru7S3E9omTjiuXCPVm+IWiJLO0BQOW69qZJbZn2o/WknL2oCo5xUMDv5oc54Oa7rnIatvZxwfN1b1qC7vSuUT86bNfErhfzqiMyPg9TVCZE7s7ZJJoFa0OlIVDO1LLaW0LDccUwK9iiZJcc9s1X19Rsj21duJofK2xYz7Vj30kkkahug6VjX/hs0pfGjnrsYuEPvXZ6S4S0DkcYrjr0Ylj+tdVpV3FHbrHIQMjvXFhPjZ0Yj4Ua32yIjrWbcuHkJFWGtfMYtGflNVJo2jbBr0mchRvR+7rCb/j7H0rfu/9XWDJ/wAfa152JXvnXR+Ee4+atfR/9U31rJf71a2j/wCrf61OH/ijqfAalB6UUV6jOY5u9/4+n+tVj0q1fD/S3+tVj0ryqnxM6o7GBIsjXMyRPsc8Bh2rVs45I7dFkkMjgcse9Z3TUZPrWtH0FZLYuRKKMjOO9Ao2jOe9BIHPajNLUJhJnEu9uBjb2pgT0YoFFIAxRRRQAUlLSUAIaaacaQ0DLMfNo1UD0q9D/wAebVSPereyBFzSOsop7/6xvrTNJ+/JT3/1rVU/gQl8TGaHx4lX3U16DmvPtH48Sx+4Nd+/+rP0rpwfwMxxHxIRpI8EFh71SW2jjcywt0OSBWdHaz3EzlSQAe9XbKGaCUo6koe9dZzmijrJFuWuJ1Mf6dL9a7bAVcAYFcXqwxqEv1rkxnwI6KG7K+mIz6mio+xiOtdbDpsi3Ale4dsds1ymknGsQ/jXfAcUYNe4wr7le7haaHahwaZaQtbwYc5NXKqXcrxbcLlc8muto5yjcfaJrkCMkL61at7jy28qV8sfWra7SgIArMvI2a5Xah69fWltqIuXMkMY+YDn2rNkE7yrJAWA7VsCNWRQ6g08KAMAACnYY2MsY13fexzRL/qm+lHljeHycill/wBU30psEeeXQ/0iT/eNa/hH/j6uB7VkXX+vk/3jWl4XmSG7m3nGRXk0P4x3VPgOzpajimSYZQ5qSvXOEDTT0pTQaAPPLsYu5h/tmq1lxPL9at3v/H7OP9s1TtP+PiWvCfxnorYqTf8AIRb61oR9BWfN/wAhE1oRdB9KoTJKKKWgQlNNO7U00wGmo5RmNh6g1IaVBlwDTW4XtqefXdi63Lo6nknmua1KDypSBXsepWMD2bybAHx1ryrXLdklJNdtNcrsOrP2iuc41N5FSOMMRTK60cbFyaKSloAAM1IqEkUiLkitC3tS7ris5zsa0oczOx8Jp5Uec5zXXKwPSue0O18hFXuVya6CNAvSvMm7u501Ek7I6DR/9RWrWVo//HvWsOlehR+BHHPdiGszU/vpWnWXqf8ArEorfAxw3KJ61jL/AMheb/dFbDdax1/5C03+6K8yR0xNqy6NWtYukdyrOePWsmx/irSiheZtqDJr0MP8COap8TOgCidFdXI7gimah/x5S/7tFjG0VqEfqKTUP+PKX/drol8LM47nHy/cNcdoXOt3v1rsZfuGuO0L/kN3v1rxmd6PVrXyxGXfGRWRrUvmuh24A4FbEVqXiDZGPSsrXGQ7FXqvWvVxP8NnDR+NHMTcahEa7S0897dBHnAFcZccXsNdrpt7HDaKjHBIrjwnxM3xHwoayuz7XPPvTzMEXYAM+tR3EvnTZjzU9tp8sjq78Cu85AgjRhl+pqZrNfvKcVZltECjHGKiZiV2ocmmIri7mgbZuyBUM8xmOTU32OVzk8fWo5bdouvNUBBgjBI4puoMhtkC9auQhZ/kbgVDq1pHBbgoayrfAy6fxI5W++8h966Gws1uLUMTggVz9/8AdU+9bFlcyRQKEPFcOF/iM6a/wGtb3bWuYyMgcVHPN5zlsYqOMNKckUMApwK9I5Cvdf6o1gS8Xa1v3P8AqjWDPxdIa8/E/EdVH4SSTrWpox4krMl7Vo6OfncVnQ/ioup8Br0UUV6pynPah/x9tVU9Kt6kP9Laqh6V5VX4mdUdjEP/ACEpPrWrH0FZT8ao9akXQVkWyalpBUMkDPMr+a6hf4R0NMknOe1FAFLSAKKKKACiiigApKWkxQAhpDTqaaYyxB/x6vVI96u2/Ns4qme9U9kJFvSv9bIKkk/1zVFpX+uepZOJmqp/AgXxEOlHHiWL6GvQsgLk9K870848SQe+a9D2hlwRwRXTg/hZjiN0VZbtEUmEBj6Cm2160spR4yp7VHcmO0ZSF4JqzHPAwBBGTXWc5OelcVq//IRkrtSciuM1kY1GSuXGfAb0PiK2l/8AIYgrvx0rz7TD/wATi3+tehDpSwXwMeI3CmuoZSCBTqK7TnM1JHt59sh+Q1ocMAaRo1Y5IBNOApJAQXDSoFMYB55qYZIGetLiimAUyX/Vt9KfTJP9W30pMEef3P8Ar5P941peFQGvpgRn5azboYuJB/tGtLwof9Pm/wB2vJo/xkd1T4GdgoA6DFLRS165wiUHpRRQB5/f/wDH/P8A75qna/8AH1JV7UhjUZx/tmqFtxdSV4c/jZ6MfhKdx/yEjWjF90fSs24P/EyNaUfQUxS3JKKKKBCGmmnGmmgQhoXhxQaQHDVS3GSai+3T3+lcHqtmLqI46gcV2uqP/oDfSuWlI2GumU7NFQjoed3Vu8TkEdKqY5rq76JHJyKxJ7QZytdEKqZlOi+hQFPVSTUn2dga19K0lrmQbhxVyqJIzUHfUbpWlPdSDAOK66z0FQyMRwK0NN0xYIwqqAK2EjCrgCuOpO50RfLsULddl3sHYVpLWekT/wBoFwflxgitAVzMJO7N3R/9Sa1h0rI0b/UmtevSo/AjlnuxKzNT/wBbHWpWXqY/ex0VvgY4blButY6/8hab/dFbLDJrGX/kLTf7orzJHTE2bDqa2bF9k+cE8dqxrDvWvaS+TOHIJ9hXoYf4Ec9X4mblvJ5sW7aRzjBqLUP+PKX/AHalt5VmiDqMCotQ/wCPKX/drefwsyjucfL9xvpXHaD/AMhq9/3jXYy/6tvpXHaD/wAhm9/3jXj9zvR63axywwli2QR0rF1mNgFZlxmta1nYSKhPy1V8QjMMZ969fEL92zgpfGjj7n/j6iNdPBEDbIxPNczecTRH3robfcbdDzjFcGEfvs6cR8KNSERW+2Ruas/2vGOAprNUllCnmrsWnqoEkh4616Jxk6zS3X3flWrMMKRLnOT6mqM1wkShYSPwqus8r/KGPNOwi9cXhDbI/wAarNHPKNznioZEeM5YEU/7W+zaaYEe1gflz+FVrt5GjwxJA9a1YLm3SP5vvd6q30kU1udi4IqKnwsqHxI5a/GIc+9b2k2yT6erE81h6h/x7tW94d2m2XeeMV52F/iHXW+A0y0aW+wAZqg3JrQuVi2/LjPtVAjmvTZxoguf9Uawbr/XR/Wt+4H7k1g3YxJGfeuDFL3kdVHYfJ2rQ0b/AFz/AErPl+6KvaMf9Ib6VjR/io0n8DNujFFKK9ZnIc9qY/0o1T7Vf1UYujVCvLrfGzqjsYcpI1lhjjHWtWLoKzbkf8TUH1rSj6CsS2TClpueacKCRaKKKYBRRRSAKOlFLigBAc0UuKSgBKQ06mmgZPb/AOoeqZ71ctv9U9U26mreyEi1pf8Ar3qaX/XNUGmH/SWHtU83+uaql8CBfEVrHjxFb16BIzJCWQZIHArz+048Q25969D/AOWf4V0YPZmeI3RlKJr6MmUbcdqZaWweZkY4KdKa9zLFcsoxtzViO1kZhNG+Ca6zlNEAAAelcdrX/IReuxGQBnrjmuR10f8AEwb6VzYz+Gb0PiM/T+NXtv8Aer0MdK88suNTt/8Aer0IdB9KWC+FjxG6FooortOcKKKKACiiigBDTHIaNsHtUh5qJgFRgB2oYHA3n/HzJ/vGr/hX/kIzf7tUb3/j6k/3jV7wr/yE5f8AdryKX8Zep3T/AIZ2dHaiivXOEj/eCXqNnpUnaoLhnjwy42jrUkbiSMN0oA4bVf8AkJz/AO9Wdbf8fb1o6x/yFZ/96s23/wCPp68Sp/EZ6EPhKVx/yEzWmnQVl3P/ACE600+6KEEtyUUHpTaCaBATTTS000wA0lBNMJpgQ6k2bNhXOTj90a6G8+a3IrEuk2x/hTm9Tansczcr8xrNkUg1szpkmqEkdaQkW0VEjBcZrsfD0KFgcCuXWPBzXV+HD82KqUjKUdDqFQADFOIpR0oPSudsyKBDmWQIQG7ZqxAJBEolIL45IqGP/j5erVIbNvRv9Ua16yNG/wBWa2AOK9Oh8COafxCVman/AK2OtQjFZWqf62Oit8DHDcpHrWKv/IWm/wB0VsnrWMP+QtN/uivMkdMTasO9acDvHIGQZPpWZYd61bWURTq7DgV6GH+A56u7NmwkeWEl0CEHGKNQ/wCPKX/dqaCRZohInQ1FqH/HlL/u1vP4WZrc42X/AFbfSuO0H/kM3v8AvGuzk/1bfSuM0L/kNXv+8a8fudyPT1BIyOtVdUEptwXztzxmtKwCh/mI6d6j8QFDZKFI617Ff+Gzz6fxI4u++/Ef9quksXU2SLnk1zeof8sz/tV0WlKn2NJG5IrzsN8Z11vhNS2gRAHk7djUt3dK0eyM1WmuRKgCjFQgZr00cTG85p8UpjcMBnFATJqZrYCPdu5piEnujPjIxRFCJFJLYqHbRyB1pgJINrEZzio3f9yVqQ80x4mMZIU4qJ/Cyo7mHfD/AEZqv6S3+iR84qleDMD/AErS8P2f2uz+9jbXmYb+IddX4DoYbeBrcN1465qhKoVyByK0IbNYYSpck1QlXDnFeocaK04zEawL7hoz710MozGawL8cKf8AarixS1R00dhZPuCrmkf8fJ9xVN/9WtWtJP8Apf4Vz0/4iNZfAzepRSU6vWOQwdW/4+R9KzHcqRxnJrU1cf6Qv0rNNeXW+NnTDYyLv/kIr9K0YugrOvP+Qgv0rQi6CsTRkwp4popRQSLSYO7OaWlIoASikFLQAU6m0uTQAtNprybMZGcnFOFMAppp1NNICe15R6qsOTVq1+49Vn6mtHsgRNpvFyfpVib/AFzVX0//AI+j9KsTf6405fAC+IqW/GvWx969EX7grzqLjW7U/wC1XoqfdH0rowezM8R0Mp7R5rxgQQvrVuIC0Xa7cHpSTXRMxji++KRIGuRun4IPAFdljlLnVc1ymvr/AKfn1WusAATA6CuW8QjF4v0rmxa/dm9D4jIteNRt/wDfr0Jfuj6V55Adt7Cx6BhXex3ULIMODxWeCejKxG6J6XtVSC9SaZosYYVaZgoyTgCu85goqv8AaUMgCuuO9WKACiiigApj/dP0p5qAxBSzAnkdKQHC33/H3L/vGrnhX/kKSf7tU7//AI/Jf941b8LHGqv/ALleVT/jfM7pfwztaKKK9Y4RGAYYPSkVQowBgU6koA4TWTjVZ/rWZD/x9NWlrX/IXn+orMiP+ktXiVP4jPQh8KKV0capWkn3RWZd/wDIUHvWmnShBIkzRSUUxBTSaesbyHCKT9KnXTbp+iAD3NXGnKWyFdFImm9a0v7HkHMsqIO/NSD7BYjdv8x62WHl9rQXOuhTXTZJIN74Ve2awdQtMZVJFNauqatJcDYp2RjsO9c5NICfvGnNQ2SNIc3UyriB4ydwqiyA9q2WkPQnI96qyQxydtp9qz5V0NeZlBY81r6VJ9nmBzVEwshyRketSo+3FS7g7M7eGVZEBBqWuYstSMRAJ4roYJ1mQEGoaMWrEC8Xb1aFVBxeN9KsgYNSDN3Rf9W1bA6Vj6L/AKtq2B0r06HwI5Z/EB6VlaoP3kda1ZWqf6yOnX+Bjh8RQJ5rGH/IWm/3RWwfvGscf8hWb6CvLkdMTZsT1rXs0SScK/Ssix71qW675lXOMmvQw/wHNV+Jm/bokUQVOgqPUD/oUv8Au063hWCPapJ5zzUeoH/Qpf8AdrefwshbnJP9xq4zQOdavf8AeNdk/wBw1x3h/wD5Dd9/vGvH7ncj00VWv+bY5qyKr3v/AB6tXsVV7jPPh8SOZ1H/AFafWtjT5GFkoHSsjUP9QPrWxpCGWzAHavNw/wDEOyr8BoWgVpArHrW1DYxodx5rKtoNs438Y5rSi1GNp/Kx7A16SOJkz2cTdBg1B9gbdy3FXXLbfl60qbtvzYz7VQilJp4K/J1pkemnPzmtOop92w7DhqYFSbTlCZQciq/nokDROhDY71sxEmMbuvemzQxvG2UHT0oa0BHAXQ/dyCrnhuZ47Zgh71XvFw8q+5qXw4flkHvXlUP4p21P4ZstPIHyzEjNOldHwVqO4U7eKjiBKcivUOIST/Vt9KwtQ+6v+9W84+Q1h6gPk+hrjxXQ6KJG/wDqhVrSz/pg+lVTzEtWdMOLwVyw/iI2l8LOgFOpgp9euchi6yP3qmslgexrZ1kcoaxm4rzK/wAbOmHwmVd/8fyVei+6KpXv/H6lXIvuisDR7FgGlUkjkYpBThQSL3p1IKWgBMc0xm244zmpO1NoAKKKKAIp0d1Ajfac9cZqQUtFMApppc5pDSAmtOj/AEqu45NWLTq/0qB/vt9at7IESWH/AB9j6VZn/wBcarWXF4PpVq4H741T+APtFEHbq9qf9sV6NHzGPpXmsriPULd2OAGya7WPXrERqPMJOPQ1thGlczxCbsTpbK9+0gbkdRTruC481JIG5B5X1FU4dXso5ZH3N8x/umpjr1p6SH6Ia7uZHNZmmM4Getcz4iGLlD7Vpf29b9oZz9IzWJrN6t5MhWORcD+MYrnxUk6bNaKfMZsX/H1H/vCu5ggjEattHSuDVwkyMegbNdZFr1ksagu+cf3TWODaSdzSutjVEUaMXCgN3NV5pUvUeCKT5u5FV21uydSpZ8Ef3DVe31PT7XOxXJPohrt5kc1mXP7LAhADfvB/FV23jaKFUZtxHes0+ILfPEM5+iGnHXI/4bW5P/AKaaCzNWisk6238On3R/4DTf7anP3dLuT+FHMg5WbFMbofpWV/bF320q4/EimnVb89NKlA92FHMgszmNQ/4/pf941a8Ln/AInEg/2KpXbtJdSM6bGJ5X0pdHuZ7bU2a3tzM5X7oOK8qD/ffM7Zfwz0KisIaprB6aTj6yCn/b9aPTS0/wC/or1edHFys2qSsU3uudtOhH1lpv2nXif+PS2Uf79HOh8pga2MaxP9RWVFxct9K0NSa4fUJGuVVZc8hTkVnp/x8sa8ep8bO6Hwoz7w/wDE1StVPu1k3p/4msfvWqpwKSCRIOauwWyBPMlPyjmqcXLiqPiHUWghWFGwSOcV0UkkuZkO7dkWrzxXBp7mOCMNjuKrDxdDdfKzyQt6g8Vxkjljknk1CSDVe3l0N/YROyuri5ZfNSbzV9jWY+pPyGJzWRa6hNauMMSndauSBLuPzY+G7ihvm2Fy8ujFlvWYd6qtPzULEglT1FNJrJs0SRMZc0m/NQ5NOBoBolDlfu8j0NKDFJ1G00wUxuOadybFqNEjOc5rRg1RYMfrWHvPrTdxz1p3DlvudfbzrczrMh+UjmtAVy2k3nkShW+6etdWY8RrIp3RnoalxvqjKSs7Gzov3XrZHSsTQzkPW4Old1D4Ecs/iCsrVf8AWR1q1l6t9+KnX+BhT+Izm6msdf8AkKy/StdutZAH/E1k+leZI6kbNkeWrRjco4ZTyOlY9tDLLIRHJsHfirwsp8f8fTflXdh2+Q56i946TT5ZZYWMhzzxRqRH2GX6Vm2OlzSxEm/nXB6LS3mkyQWzSG9nkwPuseDW82+V6EK1zDf7hrj/AA//AMhu+/3jXYv9w1x3h/8A5Dd9/vGvJ7naj0D+1bIf8t1qvdarayQlFckn0FbQsbQdLeL/AL5FR3MEKW77YkHHZa9Wam09Thjy3OQu2Eludvb2q/pOoR2sO1gST6VBdKPJce1WNECm2b5QTmvOo39pZHXU+A0f7RjkO4K/5VJHeRpIHCMSPanoAR90VagUBwSvFekoy7nE2uw4a2f4baU/hUi6vMellMfwq8LmBV4x+VNju8twnHtVWl3FdFYapcHpYy0kt7fMvyWZH1Iq7JcjZ8o5qNJGdDkgEU7PuK6Ky3uo7Bi0GfdxTXvdTIK+RGOO7Ui3MiS4LcZq6Yo5F8zPOPWk0+47nIXSybpDIAGJ5xVbRlvHkkW1KjB53Vfvh++kHvUXhc4u7gCvMgv31jtb/d3Lrw6qR800Q+gqH7PqB/5elH0Wt6S23IW31RK4NejyeZx85mtZXpHzXp/Baz7q3kijy0zPz3roT0NY+pf6k/WubERSRtSdyv8A8sRU+nHF4tVx/wAe4qew/wCPxK5Y/GjZ/CzoxTqbTq9Y5DJ1n7q1ikZrb1kfu1PvWLXn4he+dEPhMm+/4/Uq7F90VQvyfty+mKvQ/dFc5qWRTxTRS0iRaXJqCW4jhKhzjccCpqAFpKQZ74paACilHSloAbRTqilQshAO0+tACgYoNN3BSqk8n9acaYEtp99vpUUn32+tS2n+tP0qOb/Wt9aroHUdZ/8AH4v0q1c/62qln/x+pVy6/wBcap/AH2jMnAOoWwPILCvQ7eGJYExGvT0rz2f/AJCFt/vivRLf/UJ9BW2C6mdfZCefAshjwAfpVZ79EkK+XkDvTZbbzbliGqVLWOLmUg56ZrvOUsxSrNEHUcVz3iQYli4rokREXCAAe1c/4kHzRVz4n+Ezaj8SOdXBnjB6bhXeW8MYgT5F6elcGv8Ar4/94V31uf8AR0+lYYPqaV+hKEX+6PyqCWRIXwUAHrikuJvIZZC3yjqKaZIbxCA3NdxyssoVZQVwQafiqRmFmioVLD1FSrewlQS+PrQguWaKRWV1DKQQe4paoAxSY4paQ0WGcHqYA1Gf/eNSeG/+Q23+4aj1T/kJT/71SeGv+Q43+5XkQ/jfM7JfwztsUAYpaWvXOISkp1J3oGcRrn/IXm/CshP+Phq19d/5C834VkL/AK8141X+Izuh8KM2+/5C0VagPFZV9/yF4a1AeKSGyWJsOKw/EKEzK3bFa4PzcVV1y3P2VXPWto6waCOkjjphiqzE1Zm61VY1COoQMRVm1ufLlHP1qrupGXK7l4YVadiWjXu496CVfxqnmrenzi4tzG3UcVUkTy5GU9jTnrqTHsFOWminCsymSCmnoacRxUbVRIw00GhjTQaBlmJ9pGK6/QtQ3Rm3kOUbgD0ri1bBrU0u4KzL9acXZkTjdHo2iLsaVevNbfasLQm3bj61uiu2j8Jwz+IKy9W+/FWrWTq/346Vb4GFP4jObrWQP+QtJ9K1z1rIX/kLSf7tebI6kbemj52rUrM077xrUFd+H+A56nxGrYD/AEc4OCTTb8v/AGbIHxux2p2n/wCq/GjU/wDjxkrefwMyW5yUv3DXH6B/yGr0+5rrpfuN9K4/QP8AkMXn+8a8dnej2EPbqnB5qhcnMT49KQCiTmJvpXtPY85bnM3P+rf8aseHdphcNUE4+Vx9af4aZcSBuleVRdqqO6p8DNzjtVyCZFXay/jUGI1fOcipfLEn+r5r1keeX1tYnj3A1X84QkrjI9aj/fRLjkCkjgabJzQBZtmSSQ56mkuomjJZScGqhBifg8irkE5mG2Tp60AZzZzSiV1XAY4qxdKgf5aq02BmXPLtVTw+5S8nI9auXA+dqoaHxezD3ryo/wAY7n/DZ0pmkIxk1Gc96VHAcbhxU07RsAU616ZxFcjisjUh+5NbBrJ1L/VNXLifhNqO5SXm3FTWP/H2lQp/x7ipbP8A4+0+tcUfjR0v4WdLTqQDgUteucZm6wP3APvWFW/qw/0X6Vz5OK8/EfGdFP4TK1Ef6Uhq5D90fSqupD9/GatQ/dH0rnRr0LIp1NHWnUmSIUViCQDinUUUAIc54pabk+lOFAC5pabTqACkJHSlpCM0AMMaswYgEjoaD0pQMd6D0pgSWn+uP0qOb/Wt9aktf9cfpTJ/9c31qugdQtDi9Srl0P31UrX/AI/E+tXrv/Wim/4bF9ozLji/tj/tivQowTaLt67a88u+Lu3/AN4V6Lan/RYz/s1vgt2RiNkZsbTwTEkEgmtB4kuYhn/9VDSRSkoCCwqvvktnGTlCa7zlJ7aGSDcrNuXtWN4l6xfjXQBgy5Fc/wCJOsX41hiP4bNaXxo5wf65P94V3tsf9HT6CuBJ/eJ/vCu9tf8Aj3j/AN0VzYLqa1+hJLCkyFXGRVCWz8ohoCQRV55lRgpPJpEj2uWLZB7V3nKyKKEzoPO6ipzbRlNuOKjuJzDHleWp1rcefHkjDDrTQD4YRAm1SSPepaa7rGhZjgChJFkQMhyDTAdSZpaSgDhNV/5CU/8AvU7w4ca59UNJqv8AyEZv96jw9xrqe6mvIj/G+Z3P+GdyKWiivXOEKKKKAOG17/kLzfhWQP8AXk1r69/yGJvwrIH+vP0rxqv8RnfD4UZl8c6tDWmOlZd//wAhSGteGMyyLGByTSSb0RTLFnbb282ThF55rI1a/FzM8KH5F6Vd8VXp0yO0sojtMuSxHoK5jf8ANnPWuuf7uPIKnHm95mfdLtc1RatW9jz8w71luMHFc50oizTlbFMamk4UmqBlzTZNt4wHQ1Yvcefkd6zdMJa4Z+wq9O++U+lU9EQtxAakWo1qZBzUFMeRxVdjzVojC1Rkb5jTJEc0wUjHNANBSJAat2cm2ZTnvVIHFTwH94uPWkDR6n4cbdGT7V0I6VzXhb/UH6V0o6V3UfhPOqr3haydX+/FWtWTrH34vxorfAxU/iM0nmslf+QrIfatY9ayl/5Ccn0rzJHUjc07q1aYrM077zVpivRw/wABz1fiNSwP7s0mqH/QJTTLGVFUqTgk07VT/wAS962n8LMo7nJyj5G+lchoHGr3n+8a7CX7h+lcfoP/ACF7z/eNeOzvR6n5kbIAq4NMc/IRUa8HNPJ3Cvaex5xzs45kH1qPw+QDKD2NT3A/eSVU0M4mmHua8mGlVHdLWDOjjVWkxnArVtFihbO7JNY6bjwvJqxCGWQbsjmvVOA1ruVPL46mqCyMvQ4q+1skqA5qo8QVioOTVCIjljk9aVgyJkd6HBQ0pmJXawyKYFbJzzS4z0pWweRSqdpzjIoAzLkfvTWfovGpSitK85mJrL0rP9ruo6k15W1b5ncv4Z0bDLYFIBg1ZtY1hnJlIINJcsjSnZ0r1DhIT0rK1AZiatXtWXqH3G+lc2J+A3pbmdH/AMewqW04uk+tQxH/AEYVLbHFyn1rz18SOnozqB0FLSL0p1ewcZR1QZs2rna6XURmzeubrgxK943p7GXqf+sjqzAfkWq+qfejNTQfcFcpt0LYpwpgp4oJFpq7u9PAzRigBKKjjjKFssWye/apKYDh0oNJmlPSkA3dzinA1Xa2VrgTEtuHQZ4qcUAKelNPSnHpTT0oAdb/AOvpJ/8AXNRbn9+KJ/8AWtVrYOo234vIz71eu/8AWj6VQh4ukPvV+6/1g+lN/AxfaMq84uIT/tCvQrb5rJB6rXnl9xLCf9oV6FZf8eUX+6K1wfxMmvsjOZWtpuT706WdpgB6UXv+vNLZTxxtiQde9egchetGZoRuHSsnxIPlirbl3CLdEATXOazM8qJuUgg96xxH8NmtL40YLffX6iu9tf8Aj1j/AN2uCfqPqK7uyObOL/dFc2C3ZtiNkNvBiPcFyR09qo/2jNt2EDPrWwQCMGq0ltFncVAr0DkZHbSo6AOQT6GkujJEwaEY9SKiEKzSYiPA6n0rVijVEAY5PvQIzJLx5Lcq0Z3HirenZ+yjK7eat7E/uio5ZoocBmAoGSUh6VDFcpMxC5qU0wOI1X/kJTfWm6F/yHE/3TT9X/5CUv1qPQzjXY/oa8lfxvmd3/Ls7vtRR2pa9Y4QpKWkoA4XX/8AkMTfhWT/AMt/wrW17/kMTfhWV/y2rxqv8RnfD4UZOoH/AImsP4V2WkWPlR/a5RjI+UH+dcsbc3PiK0j/AIScn6Cu2v7hYodi4AAwMV04aF/efQmq+iOU8YWR1KETQ/62LlR61xsE+8bWG114INdjc3Z3n0rFvLK3u5PNX5JPUd6dX3nc1p6KxTmG6EVkTLgmtiWJootrHOO9ZUw61hY2RTbrTWQuhA71Kw5oFUgYtsiwRbR1Peng5NRKSTipkWm2Sh4NWI+oqNFFToO9SDFc4Umsxmy5q3dy7VwKop8zVQh5pKl2cU0ipZSG5rU0i386cHsDWYoywrqNDg2wliOTUsJOyOx8Njb5i10g6Vz3h0fPJXRCu7D/AAI8+r8YlZGs/fi/GtisbWuHh/GnW+Bip/EZx61lD/kJyfStTvWWP+Qm/wBK8yR1I29O+81agrL077zVqCvRw/wHNV+IsxW7zREoMtUl8k8elkTEE+1T6cf3R+tV9UuGe3ljKEAHr61rU+FmcfiRzUv3D9K5DQP+Qtef7xrr5fuH6VyPh8f8TW9/3zXkM70emU4UAYp4Fe2ecc9cjE0gqlov/H9MPc1oXgxcvWbpHGpzD3rx3pV+Z37wOlhcxPkAGpnlMhBIAp1t5LMRJTZNgchOleujz2Pjmk6bjUhJHOeagRGb7oNScjrTEP3BvvU+KDz3IU4FMSFpfuilBkt2ODg0wCa3aJsdfpUtuYwpDjB96Yty28M/zUTSLIcqMUgMm+x9pbb0rGsCV1ckVr3f+uNY9r/yGgvqa8uelX5ndH4DpQ7SH1pR1qY2ckS7xzURzmvUOIXqKzL8fI30rTHSs6+Hyt9K58T8BrS3MiH/AFFSQHFwn1qODmEj3p8JxMv1rzeqOvozqk+4PpT6ZH/q1+lPr2UcZXvhm0k+lcx3rqLwf6LJ9K5Y9TXFil7yNqWxm6t0jPvUtv8A6sVFq33EPvUdvcPvSPyjgjlu1cfU26GotPFRL0pYpUlLBGB2nBxQSTClqMOpbaCMjtTqYA2e1FFFIApc0nBpR1oAWiimHd5nbbTQDz0pp6UDrQelIBYP9cKJ/wDXGiH/AFwon/1rVa2DqMi/4+U+tX7r/WL9KoRcXCfWr13/AKwU/sMX2jLv/vxH/ar0CyYCwiJ/uivP9Q6x/wC9Xe2wJ06PH9wVrg/iZNfZFK6cSTEjpTGtnEe8YxRJG6cspH1pv2hwu3PFegchds79Qgil4I4Bql4hAKRsPWn21oZzvOAuaZr42wRgdjWNf+GzSl8SOZlPT613GnHNjF/uiuGl+7+NdrphzYRf7tcuC3ZvX2RdDruxnmiRPMjZfWs+5zHMJF61etrgSoMkbu9egchQ+axbOM5qX7RJcj93kVelgSZcMKZHClsp20wII47gK25iD2qBrGaVjvlqy12rISnX3qE+fKu9Hxj0pMCxaWxt1OX3ZqyTzVWKOWSLEpIPbFTIhjQKSTjuaEBx+tf8hKSoNG/5DsP0NT67xqclVtIONct/xry9q3zO7emd92paQUtescIUlLRQBwmvf8hib8Kyh/ra1te/5DE34Vlf8ta8er8bO+HwoLNQNeRz2Q1d1C4JZhms6OTy9WjPsRVm/wCWyK6KMv3dhNe8ZUzbjVSSPJ4JFWXU5qBuKTNkU7gHy/mrJn61tXQygrHnFZ9TRbFNhzSCnP1pBQDGqOasIKhX7wqdKCSdRTi2BTB0pGPFICtMPMaljixTu9Sr0qrgIVwKhYVYPIqPYSakaFt4Sziuu0+Hy7cViWFv8w4rpol2xgVMmRNm54eHzSV0ArA0DrJXQCu/D/AjiqfEJWNrf+sh/GtqsXW/9ZD+NOt8DFT+Ize9ZSn/AImcn0FahrJT/kKSfQV5kjqRvab1atSsrTurVpivRw/wI56vxFq3ujBxtBBNWNVw1gxFUEQu4CjJq3ePIdOcSJgitZ/CzOPxI5qX7h+lcj4f/wCQre/75rrpf9W30rkvD3/IUvf9815DO5HqAFKKBzTwMV7ZwHP34xdvWXpY/wCJtKPetbUhi8PuKydP+XWZK8if8X5nZH4DpAMNjNPpq4ZxngVPIqDATmvVRwMfBc+SMbcih5PMJYVDsIGSKBVCLENy0RytPaQ3Dgn9KqipUfAxQBYlttkYYGoAKkQSS8ckUhUqcUAZV6MTViRts1lT71uX4/ffhWGR/wATeP615VbSp8zvp/CdfFfSSxhNuajZSGwasW1mqR+YG5x0qKUkvzXqR2OEjrPvxgH6Vo9qo34+U/SscQvcNaXxGHB/q2+tOjP71frTYfuuPc0q/fH1ry+x2HWxHMSn2qSooOYE+lTV7MdjiZDcjNu/0rkz94/WuumH7l/pXIvw7fWuTFbo1pdTO1b/AFKn3pbQfugaTVR/o4+tLaf6pa4up0dC4vNLHGkedigZ5OKRaeKCBQoDZA5qC9hmntzHBMYXP8YGcVZFFMCK3iaKFUZy5A5Y9TUtFFIBMUo60Uo60ALSZ9qDSUwCg9KKKQBFxMKWb/Wmkj/1opZ/9aatbB1I0/16fWr13/rFqgv+uT61euz86fSn9hh9ozr/AO6n1r0DT+bCH/cFef3/APql+td/phzp0H+4K1wfxMivsiDU2Cqq+tVrW2SfJLVa1SIsiv6UyytVdQ+8g9cCvQOQuxQLBGFXpWR4iGLeP61u44rE8Rj/AEeP61nX/hs0p/EjlJPuV2ekkHT4/pXGSfdNdNpVvNNaxssxRfQVxYT4mb1/hLt4G3jA4p6WBkRW8wqfarpTEecbiKdCxeMEqV9q9E5BDIlvGokf2yad8k8WQdymleNJUKsAQaIokhQIgwBTAjSzjQEAcGpFiVBhRipKKdgG4pCKfTDQM4vXv+Qm9U9KONbt/rVzXv8AkKSVR0w41m3+teQ/43zO5fwz0IUtRFHYgiTA9MVBLBMLhZInP+0Ca9c4Cy8ix9acCGGQeDTGQOMMM05QFXAoA4jXxjWJvwrJ/wCWla2v/wDIYl/Csn/lpXj1fjZ3w+FFaXjUIjWrJH5sWO9ZM3/H9FWupxiqpSsEjNkhwSGFQNADyBW20STLgjmqkliynKHIrpcewKfcwL2PatYU9dJqUZReRg1zdx1Nc73OiOxTfrTRSv1popobFBAep0YZ61WJAbmp41jbq1OxNyfeAKYzg1IsMHUv+tSbbZf4hRyiuVgMmngYqRpIRwCKZ5ieoqWh3FAqzDFuIqssiZ61p2cZk5VSfwqWBfsYvmHFa4GKpWa7ZCD1q+BxUMze5taAOZK3sVhaB1lrer0cN/DRyVPiCsTW/wDWQj2NbdYmuf62H6GnW+Bip/EZhrJT/kKSfQVrVkp/yFJfoK8yR1o3dO+8a0gazNP+8a0ga9HD/wANHLV+Is2kyRTq78CtLUyG0uQjoRmq1jawzxgvyfSrOqALpkoHAC1tP4GZrdHHSH9230Ncn4e/5Ct7/vGurk/1bfSuS8O/8hS9P+2a8g70eqCnimrTxXtHAYWqjF2P92saz41pq3NYH+kIfVawoPl1mvJq6VWdkNYHTqvSrEUTtyozinRxeYgC9akE0loCjKOa9SOxwMrSMxOD2ptOLGRy3rSFcVQh4hYruFNxg80olZRgHim5JpgXYbkRpjbzTM7iSagFWIxxRYDL1IYlH0rBl41OIj1roNUGJFPtXP3HGoQn3rysR/EO6j8J00UkowN5xUx560yGIsFIqaRQuOa9KOxxtDKpXw+Sre5R/EPzqneyJs+8v51nX+Bl0/iMKH7zj3pRw340kX+tk+tHc15LO1HW2hzbp9KmqCyP+ip9KsYr2YbI4nuRy8xt9K5GXiZ/rXYOPkP0rkJ+LiQe9c2K6GtLqZ+pjNt+NMtD+6WpNR/49WqKzP7la4ep0dC4DTYrpJJmhGdy9cihRjvUiKAcgcmgglpaQUtACE4paKTpQAtA60UUAKelRRI6bt7lsnjjpUuainnjt0DSHAzimgJKD0pAcjIoJoAI/wDWrTp/9Z+FNj/1gp1x/rTTWwdSEHEq/Wr911X6Vn5/eL9av3J+59Kf2GD3Rn3/APqQfeu+0o502D/cFcDf/wCorqdL1iKPT4V8mZiFA+VM1rg2lN3JrL3TZvo2ktyFGTVXToJUyzkr7VGdcT/nzuj/ANs6YddA6WVx/wB8V6HMjl5WaIjkE24nK1l+Iv8Aj1T60p1yUjK6dcH8KzNW1Ge6iVZLOWEZzlqyrSXs2XTT5kYz/dNdXpSO+koEODiuTc/Ka3NL1G9jtFWGweZR0YECuPCtKZvXV4nQ6eLhVYTnIHQ1ZnuIrdcyMFBrHXUdVYfLpTD6uKhuf7Wu8b9PUY6Zkr0eZHJY6GNleMMpyDyDT656KXXYowiWtuFHTL1MJteI/wBXaj/gVPmCxt0Vi518j/l0H50bNfP/AC2tB/wE0c3kOxtUhrFMGvn/AJfLZfohpv2PXD11CEfSOlzeQcvmYviD/kKP7iqGn/8AIXt/96ptViuIbxluZRK/94DFUoUklv4EilMblsBx2rypP978ztXwHpI6UtYK6Nen72sXP4AVJ/Yc/wDFql2fxFeqm+xxNLubVIelYp0Fj11G7P8AwKmnQB3v7v8A77p8z7BZdzE18g6vKR7Vlfx1d1O3FrfSRB2fH8Tnk1RP368qr8bO2HwoqT/8f0X1rXHSsifi9i+ta46VKBjlPNWNj7M4qt71s2q74BkZrqoO90Zz0OO1te9clccE123iJQrOBXD3B5NRUVpHVTfulNjzSKaRutIp5pFMsRhCRuGauxW9sw+YY/GqMe3cMmtCFEI+8KszbJls7Q9j+dBs7L+6fzp4jTH3qPJQ/wAVArkLW1kOiU3yrYdIxUxt1/v03yEHVqQ7hEkW4YQflXRWUYWHoB+FYcJgRxucda6GJg0KlehFZz0QpMjh/wCPpqujpVCE/wClsParoNYCZuaB1lreFYPh/ky1vCvTw/8ADRy1PiFrD1z/AFsH0NblYmu/6yD6GnX+Bip/EZdZKHOqTfQVq55rIjP/ABNJ/oK8uR1o3dO+81aNZFlDJO7BJSmPStAadJ3upD+Fehh2+RaHPUS5jW0kgXTZIGVqTWLxfIkgVWJxyccCs600sPIQ91KvoQcVYvNPvkt3CXQeIDkOOcfWtp3cXoZq3MYDcgiuFu9QXw3qc+InmaU7go4x+Ndw7CNSzHAHUmvP/E2tQXsnkWa72H3pMcV5kFdnYrntS1IKyPL1Y/8ALa3X/gJNH2fVD1vY1/3Ur1ufyOLl8xNaH7yI+xrnRxq4Na95a3KMpnuTL6cYrEnBGpKAeo615dd/vLnXT+E7CKR4SkgYY7gmp726ilVcMAfrWFHpwdQXmlPtmpv7Kg/vSf8AfVehGUrbHG1G+5ZE6qfvgD61fSSzaDLXCBvc1lJpNqWG/fj/AHqt/wBkaeq5Ckn61d59hWj3I3vLVWx56n6VGdQt16MT9BViOxtI2yIFNWngtfLykag+mKPfF7pnDVYu0UrfRakGruOEspj+FW4nSI4ZARSuVZsqMD0otLuF12Mm8up5ype1aMe5rEvMm8iAOCT1ro777q1zl4f9Og+tebidJnbR+E6SLRbgxIxvpMEdAaG0odHuZmP+9WmTI1qgAONvWqy8HrmvQjBWORydyoNIt+5kP1ao7jS7ZIiwU5Hqa0x0qG6/1LVNSEeV6FRk7o5iL5ZpF9DQ3DGkT/j8lpz/AHjXls7EdTp5zZxn2q1VPSzmwSruK9en8COKW7GnkGuQuhi6k+tdga5C+GL6Qe9YYr4UaUtzPv8Am0f6VWsT+5WrN9zayf7tUtObMFcD3OjoaIPNSLUHJU7TzTrcSLGBI25vUUCLIpaaOlOoEHJFMjV1zubdk0+igApaSigBaayhh8wB+tLRQA1WyOBS0tIaABP9YKdcnElMQ/vB9aW5/wBZVLYOpET8y/Wr9z0Ss5jyv1q/Of3afSnf3WPqU7//AI9jXZaBg6PBx/DXG3nNq30rrNBuFj0eDd6VphP4jIr/AAmvIQsZY9hWU07FjV24uI2hIU5JrNNemcho20u9cHqKz/EP/HmPrU9ocTCoPEH/AB5j61lW/hsun8SOUf7prsPDn/IKj+tce5+U11/h1gukISeBmuHB/GdNf4TbWn1BbzpOCUzxwc1NmvTOIrXtwbaIOozzT451eFXJAyPWo71A8DAjNZUME02NoO0H14pCN8c0tNjXagHoKdVDCkxS0UAcT4l41M/Ssyx/5Ctr/vitXxP/AMhP/gNZNkcapa/79ePP+N8zuj/DPRl6UuKRegp1ewcQmKaafSUAcLrx/wCJvN+FZZ+8K09e/wCQvL+FZhI3CvIq/Gzuh8KKl1xdwn3rXHQVi3rbbqA+9bCnipQ2Prc04ZthWGDmt3Subb8a6cN8ZlU2OZ8ULtLmvPrk8mu98XzBWkXIzXnlxJk0qnxs6aXwkLHmkHUUhpyikimKI1ZgSSDVmO3GPlmYVUJ+epkY07k2LYgkA4uaQwzdrg0xSccU7c3vSuFkNMM3/Pc1G0UneYmpSze9NOTRdhZD7WMLKCSSc967C0ObZfpXIQ8MM102nXCNGEzyRgVEk2KWxOhxeH6VeB4rOw0eoFWBGRxV5TxWNraEM6Dw/wBZa3xXPeHustdAK9PD/wANHNU+IWsTXfvw/Q1t1ia99+D8aK/wMVP4jJHU1kRf8hSf6CtfvWRH/wAhSb6CvLZ1o6DRxmV/pWyBWPo3+uf6VtgV6WG/ho5avxE8ccX2Vmccg8EVY3NJYNvGDtpbZR5PI4NUvEOrRaJos95Jj5Vwq+p7Ct5OyM0rux5Z4x1dg50y2bDNzKR2HpXDXUqwIYo+v8Rq/eXTyvNdSnMkpLH8aw5GLHJripxR37I+mulLSU9Qa7zzzM1Qfcrm7gf8TRPpXT6spCoTXM3fGoxn2rycT/EOyj8J0sCqyrkgcVc8jK5Xmk07T1nt1lYnp2rYht0hTaK9KnrFHHL4mYoQhsHip/JCkDd1rSltEkBOMGqsenFZMs3y1pckhFs7NhRxSS27x9RxWsoWNMdAKYzxOnJBFAjFKUuMCrcrR4IUCq5FAyjffdWubvR/p1v9a6W/H7sGubvv+PuD/ery8V8Z20PhO9tnVrFeR90VR/iNV7e2lSBZPMO0jpmp0zivRh8KORrVjxUdwMwtUwFRzj90fpSqfCxx3RyfTUJBTn++aR+NSf6Usn+sryGdyOl0Y5sF9q0fwrN0Q5sgPetPFetR+BHHP4mNIrkNTG3UZPrXYYrkdZGNSf3rLFfAXS3Mu8ObWT/dNZmkuHgODnBrTuRut5FHUqawfDtpcWcc4n/ibKjNcGh0G+tSr2qsAxlUg4UdqsA0hEoNOqq11GlykBzvYZHHFWRQIdRSZozQAxxkjmnVl6jdaglxHDZW28N96RvuitFN2wbsbsc4p2AkoqrcXQgkjTYzFzgbR0qwDSAdSGikzQMAQrrn1ouD+8qrdRvKY9r7drBjUkj5bNO+gCMen1rQl5iSs1jwK0HOYENNbMOpXuR/ozfSun0CNJNFi3dhXMT/APHu30rf0FWfSI8MR9K0wn8Qit8JekTa5A6CoyKlZCvBOfemFa9Q5BFYqwI61DrO82ALnvUo4YU3Wzu05Tisq38Nl0/iRyr/AHTXX+G1V9JQMMjJrjm6V2Hhc/8AErH1NcOE/iHTX+E24okhBCKADUF7efZUBC5Jq0KhntknHzdq9Q4inJcyzWm9VxnrUVnqMUUflv1z1q/MqQ2rcfKq1k6YkDyOXwW7A1LuBvRyLIgdTkGnVGjIFwpAA7CnBgehBqgHGiiigDjPFA/4mQ/3ax7Q41K1/wB8Vs+KP+Qiv+7WJAcX9sf9sV49T+M/U7ofAekL90fSnioPNWOLc5wAMk1z+oeNNPsXaMZkYelerzJLU5FFydkdPTa4xfiHaE/NbsKuReONNlHRlPoaXtYdy/Yz7GX4g41mUfSs3vmp9U1G3vdRedGwrAVFFJan70tefODlNtHTG6ihBDHK671Bwe9dBFZwFAdg6VnwvpYwWfP/AAKtNNT01FAEg/Ot6NLl+KxlUbew9bCD/nmKuwxxW8ZxhVHJqidasE580fnWPrniCB7No7d8sRjitnyRV0RGMpOzOW8XalHc6hKImyoOOtcg5y1X7pGZ2J781SKVxOV3c9GMeVWGU9egpuDmpFHbtVIlkDuofk05LlBViO2iZssAa0raC3UD5V/KrtczcrGWLo4+WNj+FL9on7W7/wDfJrpIlhAGAv5VaVk7EU+UnnOQ866P/Lu//fJpyrfSHAt3/EV2G6P1FRvJEAcsv50cqFzs5yHTr6Q/PhB71vabZrbbSWLMO5qCW/tYOZJkX8azpvFVnBny2Mh9hT5Q5jtZFjljVuN69DUIOK4+21y71KYLGCiV2FoqzRqA3OOtZzhzPQNjoPDpyZa6EVg6DA8JkDjr0NbwrqoJqCTOap8QtYev/ehP1rbrF8Q9ID9aK/wMKfxGRmsmL/kKTfQVqA1lRf8AIUm+grymdaOk0QZmf6VtgVi6J/rX+lbgr08N/DRy1fjLtucQivJPiDrrarrA02Jv9Htj8+Ohau+8Sayui6BLPn94w2oPc14jJIyRyTynMspLEnvRWl9lGlCGvMUr+UM+0dFrMdqmmfJJ9arj5mqYKyNZM+p44CwyelWkiVe1PAp4FdRwGLrqYhjb3rjr4YvYjXba+v8AoaH/AGq4nUT/AKRCfevLxa987KHwnfaKc6claVcxYzXUNujwjKEdKvC6v5ANsWPfFd9L4Ecs/iZsM6opLHAFJHIsq7kIIrKQXNxmOViPrV20tjbA5fIPatCCxLH5kZXOKzJbd4u+RV55mb5Y8Z96Z93/AI+HHNMRmUoq/M9sI/lI5qmkTOflGRQNFHUP9RmuZv8A/j6t/wDerrNUgeO0LMMDNclqB/ewn/arzMX8Z2UPhO0V1NhEvfApiiorNd1rG3XirATFehT+FHNLdijpTJRmNvpUmKRx8p+lOS0Ejj5+NUYe1Ok+9Re/LrB+lEv3q8aXU7kdBoJzasPQ1r1i+Hj+7kX0Nbderh/4aOWp8TExXI+IPl1L6rXX1yHiTjUl/wB2oxXwDpfEYU053FMHA71GjYNOuYTNGVVip9RTY7QqBlya846CwripA4qJYB6mniAepoAlBUnNP3r61D5A9T+dHkD1P50XAm8xfWjzV9ag+zIfX86Pskfp+tIRN5qeoo85P7wqD7HF/d/Wj7JF/cFAExnj7sKb9pi/vrUf2WH/AJ5ik+yxf881/KgCU3MX99fzpDdRf89F/OmfZof+ea/lS/Z4h/Av5UxjWvIR1kFVJtWtI5kjaUBnOFGKu+TH/cX8qQwRkgmNSR04o9QAnIrQJ/0dKoEYFXQf9HWqWzAjm/492+ldF4b50hPrXPScwt9K6DwzzpC/U1rhP4hFb4TTkFREVaKZpphJr0zkIoDErfvMe2ara/g2HHSrEtis33iR9KqaxB5OlkAkgeprOqvcZUPiRyjHg11vhVs6aR6Ma5BuhrrfCf8Ax4P/AL1efhP4h01vgOiFLTQaaUJfduP0r1jjK2pzCK1IxktxWRaSRRIxb71bF+nmWrjGSBkVh29lJOT2xUtAWbeUzuYwxXPer8EMlq+5nyneqEdhNDJuHar8crXYMLgr2zQgLcd1DKcJICfSpqyl0dknDpMQAc9K1e1MDjfFJ/4mCf7tYcJ/0y3P+2K2vFZxfx/7tYMbYu4P+ug/nXkVl++Z30/gO31oSyaRKsOd+3gV41O0nnPvB3Z5zXuiqCgz0IrGv/CWlahKZJIAjnqU4zXfVpOdrGNCsqd7nkAkNSpIa9SXwJoq/wDLFj/wKn/8IVoy9Lf8yax+rTOn63A8wEzDuaeJm9TUmt2yWWs3NvENqI2FHtVJSx4UE/SsGmnY2TTVy4Jj6mpBMcdTSW2lahdY8u2fB7kYrZtvCdywDTyhP9lRmqVOb2RLnFbsyfN9zUcnzLXUp4XtUHzNIx+uKc3h60CkAPn3NU6MyfbQOFnizniqMkWK6q/03yGIUEisK6iC5rKzTsbJpq5llcGo5H2jippAxPApnkMeSKtESKnmTk/KcUu26b/luRV5IB6VKIfar5jPlTMw2t24/wCP2QfjUR0+9PTUJPzNbax+1P8AKHpT9pIlwRzzaXfHpqEh+rGmHSr3+K8Yj/eJromhNRFSOop+1kSqcTAGkMWy8xPrxVqDTIEI3AsfetBlpg61LqSZSppGhZskQAVQB7Vv2kxBBQ1y0T4rTs7oxsPSo5i3E9D0nU+Ap/GuijkWRQwNcFZyCVRJGcMK6jSrvcNjde9ddOfQ5KkTarC8RHAg/GtztWB4kOBB+NOv8DIp/EZINZUR/wCJnN9BWkG4rNiU/wBpTHBwQOcV5jR1pnTaFzK/0reHWsHQv9Y9auoG4j0+draJpZtp2IvUmvQw+lM5qmszzbx1qx1LWls42zBb9R6muI1OcbtgPArqD4X8RSGWWTTZjLISxJI/xrHuPA/ieRyf7Lk5/wBpf8azScpXZ0qSjGyOXd9xpUFdB/wgXiXvpj/99r/jUqeBfEQxusNv1kX/ABrXYi92fSKiniuePjPw+g51OD/vqoH+IPhyPrqCH6VvzLucnJLsamvL/wAS8+xrg9SODEfQ1raj8QvD1xbmIXJOfQVh/wDCWeHGP7xi+OmVrhxFPnldM6aV4qzR3/h7EmmITzWwAK4Cy+I3h60gESGQKOgC1Z/4WloQ6ecf+A11U5KMUmzGdOTd0jtXVsfLjNV2W6IIGMVyB+KmiDokx/CmH4raR2hl/MVXPEn2U+x1q2k27JbBp72Dy/fc8Vxh+LGldoJPzFMPxZ08dLZ/++hS9pEPZT7HbjTYwuCSasJCkYworzt/i3adrXP/AAOo/wDhbVv/AM+f/j9P2kR+xn2O411f+JY/sRXB3/34f96mXPxUtbiIxvY5U/7VUD8Q7Ekf8S1DjpkiuSvBVJXTN6alBbHoWmgtYRfSrWK84X4nKi4SyUD/AHqY3xPmPS0jH1auiFSMYpEOjJu9j0nFIw+U15mfidddrWH/AL6NRN8Tb0jAggH4mm6sRexkdDqQK6uOD0pJT8wrkZfHU8z7nhiJ/GoW8ZzE/wCqj/OvPlSbbsdCTPT/AA8fmmH0rerxi28f3tmSYY4QT1zk1Mfifq/YQf8AfNdlGXJBJmU6Um7o9hrj/FHGpJ/uVxbfE7WezQD/AIDWbd+ONQvZRJOYmYDHSlWlzxsghTlF3Z1wOakWuDbxXd+qflTT4rvf7y/lXL7GRrY9AFOrzv8A4Sq//vL+VIfFOof3l/Kj2LCx6NmlzXmx8UaiejgfhTf+Em1L/npT9iwsel0ZrzP/AISXUv8AnrR/wkmpH/lpR7EfKemZpM15p/wkepf89aadf1E/8t6PYisemZ96K8x/tzUP+e5o/tzUP+fg/nR7ELHp1ISPUV5idc1D/n5P50n9t35/5eW/Oj2QWPTtw9RSbh6ivMDrF9/z8t+dNOr3n/Py/wCdHsWPlPT2II4I/OrSkG3AyPzryX+17z/n5k/Ol/te9/5+pPzpqkw5T10Rb4iu5c47mt7QjHaacsU00YbOfvV4GdWvD/y9S/8AfVMOpXZ63Mv/AH2a0pw5HdEyhzK1z6UF7ZjrcR/99Cl/tCxHW6i/76FfNP8AaFz/AM/Ev/fRpPt05/5bSf8AfRro9q+xn9XXc+lTqunjrdw/99CszWdSsZ7Fo47qIsf9qvns3s3/AD0f/vqk+1zf89X/ADqZVHJWGqCWtz1smIjmeMf8CrodA1SwsbNkmvIgxbP3q8E+0y/89G/Oj7Q/99vzrCnFU5XRpKnzKzZ9Jf8ACR6SP+X2L/vqmnxPo4/5fov++q+bvPf++3503zm/vH866PbSM/q8e59Ht4p0YjBvYv8AvqmjxRoidL2EfjXzl5rep/OkMjf3j+dHtZB9XifRh8W6IOt9F+dNHjTw/Fz9sjH0r5z8xv7x/OkLse5/Ol7WQfV4n0YfHvh5Tj7YtIfH2gdrwV857j6mp7OGS6ukiTOSaftZB7CJ7bqN5Hr90stj86AY3UkPh67eaKQuihWDc1T8O2sunWiDHat19QlVOtT7GEnzS3JcpR92JvecsagMw4qtNrEEIPc1zFzqMjZ+Y1lTXTvnJq5VkhRoX3Opn8TBM7QKpSeJpCeDXMPKW71HurB4iRuqETVeS1ubpriaGNpGPJIrTtZrVfuQouPRRXMqxB61chcgjmnGs7hKmdbHdLtyoFRtqpjOClZ1pN2JouSNxrd1Ha6MORXNIatC33hg019QtyOtYhI71XnbA4NT7aRXs0WNQuIZCeBWBcxQv2FFzI+T8xrNllfPU1zTndnTCNkPe2iz2pv2eMVVeV89ajMzetRzFWZe8mOmmJc1S85vWlE7etPmDlZaMYphXHSohMaUS5ouKxIBTXiDDijfmnBqCbFOSIrVcjmtVkDrVKaEp2oGiAcGpo3wagpynBqWUbenX7W8i8/LXb6dcJKqyxtz3ArzSKQg1v6RqTW065PyE81dOfKyKkLo7bxFrtzouj/bYLcTbThgT09683v/AIk3l7t32sQ29MGvUJbeHWdEmgOGSZCK+e761ksr+e1kGGicqfwrtqK6OWnbVW1OnPjm77Qxioz41vychYh+FcrRzWHs4m1zrE8d6tEcxSIh9hTz8Q9f7XY/75FcjRVJW2E7PodU/wAQPELf8v2Pogqs/jbxA/XUpPwAFc7zRTsKyRsv4q1uT72pT/8AfVQSa9qcv37+c/8AAzWWVNN2miyHclEp9TThKaYIqeIqNBq4byaN7U4R07YMUrodmR729aXzH9TUgQU7YKV0OzIt7+ppQz+pqXbTgo9qLofKQZf1o+f1NWNopdopcw+UgBf1pfnqfC0uF9aXMPlIMP60fPVjC0fLRcdit8/rSfvfU1Z+WkJFFxcpBtkP8RoEcnqanzRRdhyoh8p/U0eS/rVnmlpczHyIq+S5/io8hvWrVGKOZhyIqeQfWj7OfWreKSnzsORFfyjS+VU/FGRSuHKiDy6NntUpYetNLii7CyIyp7Ckw3anlx60m8etGoaDdr0bH9ad5i0vmCndishnlv60eU3rT/MFHmD1ouw5UMMTHvSeSfWpPMHrSeYvrRdhaIzyj60eUad5q+tJ5qjvT1FaI0xGjyqUzD1FIZl9aNRe6IY6TZQZl9aTzl9aeoe6O8ul8vFN89aPtC0tQvEdsNL5dIJxSidaLMd4h5ZpRGaTz19ad56etLUfuhsNHlmjzk9aPOT1o1D3Q8ul2UnnJ60nnp60WYPlHbKTZTftCetH2lPWizFeI7ZSbKZ9qSkN0nvTsxc0e48pXa+A9G+0XX2h1+UdMiuHF0hYDB5OK9s8L2qWmjQ7Rglcn61cU76kTkraG1IEjTAwABWTcz5zg1au5eCKyZnzRUlYiEbkEz5NVXPapZDzULDmuSTudKRGRQBT8UoWoGCjNWI6jVamRapMTLcEm0jmrErFhmqSjFXRGTFn2raDbVjCW5Rkkxnmqkk3qakulZSay5ZCpqZNo0STHyuDnNUJVBpzymoXeovctKxA61CQc1YLA0zAqSrkO00bTUuKAtA7keKXFSFaaRQIQGnBqZRmmmKxYR8VIyrIvvVQNipkkqkybFOeIxNnHFQ5rWdVmTBrLljMbkHpSaKTBTzV+B+lUE61Zi4xUMqx3nhjVvKcW8h+U9K5b4n6B9l1FNVhTEU4w+OzVNYy7GVlPI6V2mp26eIfB1xCwDSCMlfYiu2hPnhys5Ki5Jcx4KFp4UVA8jxuyEYKnBBpn2h6fKx88S1tFG2qvnvTTK570+Vh7SJaK00iqplf1pS7HvT5WS5osU3I9arkt602nyi5y0J1p32gelOEcA7j86fmADqKhtGiv3I/tA9DR9o9jT/Ntx3FJ59uO4pfId/MT7QT0U0ec56IacLiDsad9oiHej5Cv/eI/Nl/u0nmzelS/aY6T7THRr2C6/mGb5jRump/2mL0o+1x+ho17BdfzDd09O3zCj7bGOxo+3R+lHvdgvHuHnS+lHnSf3TSfbo/7po+3xj+A0WfYOZdxfPk/umjzpP7v6U37fH/AHDR/aCnpGaVn2Dmj/MO86U/w/pS+ZN/dpn289o6Pt7f88xRZ9h88e5KGnp2Z6g+3ydkFH22Y/wj8qOV9g549yxm4pCLiq5vLgDO3j/dpv22c+n5U+Vh7SPdlry7k96Typ/79Vvtk/r+lIbu4Pc/lRyyDnj5lryZv79Hkyf36qG5uD/F+lAluH6Fj9BRysXPEteQ/wDfo8g92qAJdnoJP++TThBeN/yzlP8AwE0reYcy7Evkf7VIbcf3qb9jvj0hm/74NOGnag3S3nP/AAA0fMLrsJ5A/vUeQP71SDSdSPS1n/75NNl0vUYULyW8qKOpYUadwv5DfJH96jyR/eqtslz1/Wl8uT+9+tVbzFzeRZ8hf71Hkr/eqr5b/wB6jy2/vGly+Yc3kWvJX+9SeSn96nQaNqNxGJIbaR0PRgKsL4b1dv8AlylpNruF/IqeTH/eo8iM/wAVXx4W1k/8uUn5inDwjrJ/5dD+LClzR/mC/kZvkR/3qPIh/v8A61qjwZrJ/wCXdR9XFSDwRrBHMSD/AIGKOeP8wfIyPJg/vj86XyoP74pl5p01hctBOoDr1wc1B5dUlfqK/kW/Lh/vj86Tbb/3x+dVdlJsFHL5j5vItlbf++Pzo2W/98fnVaOBppFjjUs7HAA71pr4a1VsYsZP0pOy3Y+Z9ipsg/vj86bsg/vj860l8Kauelk35injwjrB/wCXTH/AhS5o9wu+xkmOD++Pzo2Q/wB/9a2R4N1c/wDLuo+rinDwTq5/5ZRj/toKOePcXyMTy4P74/OjZb/89P1rcbwVqiKWZYuP9usSW1MTtG64ZTg0KaezHbyE8u3/AL9Gy2/vGozEB2o2Cq+YvkWbdbb7RHk5+YV7hpMijTIsH+GvCYVAmQ/7Qr2fRW36dGB/dqo6MUtUXJ33MaoSdauSoRVOQc1jU3LgVmHNR4qZlpu2udmqI8CnKtO2GpFSpBsESpVUDtQBinDpRczbFHUVpfdtxWfEu5xVi6l2xbc810UdE2Zy10M66cEmsuUAk1ank5NUZH560pM1SsVpIxzVWRCOlWnaoXapsiyqwakGambmmgCpsMaDUlJgUZFIBaYadmmmgYw02nGmmgBCeaQSEGjNMNMRbSTBzT5YllQnvVJHwatxSYNWiWijsKNg1Yj6irM1uJE3r1qCNecGokrFRdzStD0rtvDFx9+A8hx3ribUYxXR6NN5F7E3bPNaUJcsjOqro8r8X2X2DxVfwYwPNLAexrDr0v4taO0d/batGuY5l2OfQivNBXdLc5AorZ0J7KN5DeWv2gdhjOK6i3u9FYcabEh94xWMqii7FqDZ59S4Poa7XUr+1fEFrZwqT/FsAxVGSNFXG1c9+Kn23kWqLZzBB9DTcH0Nbcm0ZOBWZPcEsVXpVxnzdCZU1HqV/L96PL96lpK0uRyoZsFGwU+kouDSNm00GOaOJzMw34zxXUQeBtPdVLTTHI9QKz9OP+hwH2Fdxa/6pPpXBOrO+5tyRS2MJfAelY5acn/fqVfAekdxMf8AtpXSpUgrP2s+4cqOaXwJo2OY5T/20NOPgbRsf6qT/vuumFKelL2k+4WR4fqdmlrqNxAg+VHIGapFBWxr4xrl3/10NZTCvRg20RJEeBSYFPxRVE2NDQ7SG71ARzJuXaeK7yw8M6U8IZ7RSc9TXF+GuNVH+6a9M07m3/GuSvJqWjNIpWIE8OaSvAsYf++amXQdMUYFlB/3wK0lHFOArm5pdyrIoLo9gOlpD/3wKkGm2Y6WsQ/4AKuCildhYwtdsLZdJnKwRrheyivMmjAPQV6t4g/5A8/+7Xl5Fb0W9S0tCuUFNK1MVppFdFwsQEV0/gmJXvbjcoOFHWubYV0vguUR6hKpBywAGKVR+4yLHeJAg/gX8qlEKjsKeq96fiuEBnlj2o2VJijFICPyx7VkeI0A0eY47Vt1jeJf+QNNTW40eXlaaVqZlppHFd1yrEJFNIqXFNI5ppk2PUPDi/8AEjtv92tkKPSsbww27Q7f2GK2xXDLdgG0UoWlFFSIMUhWnUh6UAeXeKhu1yU1gla2vEL79ZnP+1iskjNdtP4UVYiIppFSkUwitLiaL2g/8hy0/wB+vXEWvItD41u0P/TQV6+vQVzV90IdtpdopRS1zgNwKXFLRQBHIuUI9q8n1hAuqXA/2zXrbfdNeVa2P+Jvcf71a0dy46mSVphWpyKYRXVcGhsS5mQf7Qr17w4GFqiH0rySI7JUb0Oa9L0HXrZlij3KrYx1ppkSWh1c0fy1mTJg1qrMsi5yOapXKjrmipaxEW1oZ5Xmm7amKikAFcTZtcjCU8LTsUuKm4mxMUYpaMUhE0J25NU7ybrzU7NtWsu6kJJrqTtGwRV2VppM1Ud+aWRzUDPWdzVIRmqMmlJpDRcdhrU2nE0lACUZoxRQAhNJmg03NAwJ4phNKTTDQAUdaSjNMQ1uKfHJg4NJ1qM/KaYGpbz4IU9KsS2w/wBZGOKyYpe1a9ncfwscg1Ss9CWraodAMEVr2jbXB9KpPAAd69KtWxwRUJcrE3dHW3mmQeKPDMthNyxX5T3Ddq+etQsZtMv5rK4UrLCxVhXv+g3ZhuApPBrl/i14WEsSa/ax8qNs4XuPWvRi+aNzjatKx5ro+MyZrRlkCisfT2Zd+O9aUMRlcbj9a46i9651w+EmtojzKw5PSic8VZf5VwOlZd9ciJSM/NULV6F7Io31xj5FPJpulWRuroMw+RDk+9RW9vJe3QjXkseT6V2Vlp6WsCxoMY7+tbTlyRstzFLmdziaKu3emXdiQLmBoyeme9VdlbXRlYZikxUmz3pdi/8APQCi4+VnT6Yf9Ah+ld3ZHdbxn1FefaNcRP5NsMyNnoor0O3Xaij0FefU0ka9C4hp+cVEnWpayEOBpc0yloEeReIlxrt3n++ayCM16dq/hS1v55LkM6yNyQOleeXcItZnjMMmFJGSMV3U6iasgaKW2jFTeYnaJqTzR/zyrbmZNkXtBOzU1Psa9I0WUywMMcKeteZafLci6X7Pb73PGK9T0iKWLTolnRUlxllXsa5MRvctbGkKcKaKcK5gFpTSUUAZfiD/AJA830rzRkr1uaFJoykihlPUGuY1rwyskBexQCXPToK1pzUdC4tLQ4crTCtS3tlfWcmycKh+tU2EuP8AWqPxrpWvUbfkSMpra8KnZq6571z+xmIBnFdt4f8ACzJ5N810xJ5C7cUqjSjZshnbL0p1NUYFP7VxMQhIUZJAHqaUEMMjBB9KjnhS4hMb52nriljjWKNY0GFUYAoEPrH8Roz6RKEUk+grYprqHUg9DQhp2PIpIyDyKiKkdq6fWfDOoNdSzWzp5R5C55rlZYJ42KvMAR1FdkWpdTS4MtM2moij95xVrTtHutUm2QPnH3j2FXoldsm/keieEz/xJYlzyM1visXw/pTaTZeS0u8k5zW0K4pWb0JY6q8E8ks0qNEUVDgMe9T9qWpAWkb7tLQelAHlGsqTqlwSP4zWaVxXpmr+Hba+ikZECzMOGHrXn9/4evbHLTE7P73auqnOLVmXfsUcUwhfUU1rcDq9IluruEViWJwAK3Vu5N32LuksiatasWAAkFevxnKKfauA0bwW4uIri5kwFIbaDXfooVQo6CuWtJN6CJBS0wqSRzinisACiiigBG+6a8t1sf8AE2uP96vU653WfDEF+Xljby5iOvatINJ6lRdjzskCoyRVvU9EudMl23D9ehHes0wgf8tDXWrMbb7EpYetWLGTF9BhujjvVBLfzZVjRiWY4AFd7YeCLOOGGSWWUzYDNzxn0pTlGK1JuzrYJHWNcHjFSM7N1pscYRFUdAMCpMVyObYiEijFSEU3bUBcbRS4pKACiilQbmxVRV2BDOcLWPcPya1r07QRWFcPya2npoaQK8jVXLU52qAmszQfuo3UzNFMB2aXrSCnCgQYo207FLimBERUZqZhUTCgZGaaacaaaAG0lLSUxDhSOuRQKfjIpiK4O01cgl5HNVnU0iNg0thnUWFyrDy3rQMJibI5BrmLacqR6+tdTplyt0nlSfexWsLS0e5nL3dS9ZttcH0rsrdItW0mS0uFDI67WB71yCwmCUA/cPSuh0ecxyKCeDXVSutGctXXU8R1XQn0HWLqxfOFf5D6r2p0EXlpjuepr0/4j6ELmCPVIly0XD4HavNHYRoWJwK5q8XGR00pc0StezrDGSTXNzStPKSeSe1WNQuzcSnB+UHir3h/TftVz58g/doePc04pQjzMU3zOxq6Hpf2W38yQfvH5+ladlcfabiaNV4iO3PrV1I8DgU+GGOLOxFXccnA6muSU7ttjWmxsXun295GUniVx2yOlcH4h8NrpyefC3yE8r6V6WV45rmPF2P7KI4zmuq7TMovoeYupFJDC00yRr1Y4FSuPaptO41CAkcBxW3NoNo7vQNGh02HIO+VuSxFdJHwKpWxUopU5FXUrzpNt3ZZMtSE8VEtPOdpx1qRCoSRyKkqNCdoz1p9AAao39lFd2zxuinI4JHQ1eqC4lSKMs7AD3NIaPJNRha2u5YuPlYjgVnMW9a2dXcS6hMy8gseaymQ56V6EHoEkd14O06Eact0yBpWY4J7V16DFc54RkX+xo49w3AnIro1rjq3cmHQkFLSClqBCmgUhIHWlBoAWkIpaKAOc8V6fDcaXJKyjegyprzKSLaTxXo3ibVEMMtkmd/euElhPrXTRbS1L5boNFtludYtonGVLjIr1+JAihQMAcCvJtLl+w6hFcEZCHOK9S068S/tFnQEBvWlXd2TayLmKMc5opc1ziCiiigQUGmKX3nP3e1PoAjcZHNebeL7GO31HdGMBxkj3r0O+uktLZppM7R6V5z4gu/7RvDLH93GBmtaV+a5aWhzZU5613vgiFRYySdy+M1xPktntXV+FdTjs1NtJ/E2Qa3rax0FyneL0qSo0OQD60/NcYhwpaapzS5oAWigHNFAAayNehWTSpwQPu1r1zHiTWI4opLNQTIRz7U0tRpannUi4OKt6PGG1WAN03VDIh3dKW3ke3uElT7ynNdzd0VY9ehACgD0qasbQdRbUbISFSCvBrZFcLVmSxscm/PHQ4qUUwADpTqQhaKKKACkNLSE4oA5bxrAr6YrkDKtxXnLIRXd+LNUinjNnH8zA/MfSuMZDmuqi2kXbQseHIRJr1sG5+bNeroMivJ9Mmay1GK4x9016rZzLcW6Sr0YZqK+ruTaxZFLSjpRWAhCKaaeaYaAGHrSUp6000gA1NbJuJPpUBNaNvHstixHWt6EbyFN2Ri6o+GYVz87cmtfU3zI3NYUzcmqnubQ2IXNRZpzGoyazNB1LmmA06mIeKcKYKetADxS4pBT6BDCKiYVMaYwoArsKYRUrCozTGMNJTsUmKBAKkWmAU9aaBiuuRVdhtareMioZFpsELC/Na1lcNG4ZTgisRDg1egfDDmknZg1c9E0u4i1GDY/+sFaUEZgkAriNOumglWRTzXe2M6X0CtxuFehSkp+pxVY8pq3MCahpcsDjO5SK+efE2+xupbPGCrEfhX0RattXbXjHxV0V7bXEukQlJxjj1qq8LpS7E0JWbiefWtrJeXKRRjJJ59q76zsfsVmI4VBYDp0yaqaBpK2kAdxmRutb6JivNrVOZ2Wx0pWGIpCDOM96ihuYppJI0bLIcNVlhxWfa2KWk00in/WNn6VjoM7IrxXAeKLa/jmlkYs1tuyMnpXoNYPitQdGlrtkY03ZnlcsoB6UkDNJOiIBknApZVGafYAC/h/3xV9DV3uei6NavaWapJIXY8k1sLVS3H7sfSra157d2DJVNLJKkMZd2AUdSaRabNEk8TRuMq3WkIljdXUMpyCMgipKhiRYowijCrwBUoNAC1l63YPf2eyNsMDke9alNbkUXtqNbnkl6k1tcPFIvzKcGqJkfP3a6HxEg/tWbisNl9q7ISurltXO18I2QFoLpyd7HGO1dWprB8L8aRHit4Vyzd5EMkFLTBT6kRHLCsuN2eDnrUo4FJTPPj83yt3z4zigCWikzSKxJIK4xQBzniawiNo9wEHmZ+9XAyoc969P15d2ly/SvN5h8xrakzWOqLGgW6z6xDHIoZc5INeoRRpEgRFCqOgArzXw7xrUX416WnSlVepEkPooqtHeRzSyxRnLxnBBrMkfD53mSeZjbn5afM7RxMyIXYDhR3pITIUzLgN6CpTQBFAzvErSKFY9QO1SUUUgIbiFLiMxyKGU9Qa8+8R2MdrelYV2qe1ejGuH8VD/SxVwdpFwOQZGz1Ndh4Q0iJoPtkw3uThc9BXLuPmrvPCnGkIPc1tVk+UclY6BRgUiljIw24UdDnrThThXMZgBS0UUAApaSlFABiuf8QaRBPbS3AGJQM5roapap/yD5v92mnZlLc8nlVgTTbWLzbuKNujMAasTj52+tNtPlvoW9HFdd9DSSPVLOCO3gSONAqgcACrQqvbHdEp9RVgVxmTCnUwuqkAkDNOFAhaWkFLQAUhGRS0GgDh/FunxwEXES4LH5veuOZnya9C8XgGyH1FcC69a6KT0NVsTaSpl1OFW5G7oa9Vt1CRhQMDFeX6IP8Aibw/WvUYvuiprbkSLA6UUg6UtYkiGm040xqAGmmmg000DQ5BucD3rVuf3VmB7VnWib7lR71c1N9sWM9BXZh1aLZnPWSRyeoPmRqx5TWletlzWXIeayludUdiFjTKVutNqRi04Gm0tAEgqRaiU1ItAEgp4pgp1AgNManGo26UARv0qKpWqM0xjaTFONAoATFPUUmKeBQIcBTZFqVRQwyKoRRIw1TRNg0yVcGkQ1LKNqzl6V1eiX5t5gM/K1cTbPtxW7Zy5A5rWnOzMakU0eoQsGCyDoao+JdGi1rS9jKC6Hch9DVfQr9Z4BG5+YVvxnK4NeqrTicDvCR5I1kyHyiSm04OKkkKxJuY4A710viDTRDcmVBw3JrnZUDjDDODnmvFrU3TlY7Yy5lcgLZXPrVO4uUjyCwBA6Zq1KwVTXK6zGgZplYq56kGoirspHqOKw/FQ/4k8tbxFYXinjR5K7ZrQwp/EjyqUcmltTsuYmPZhT5RyaiXhwfQ0uh0M9RtW3QofUCri9ay9Lk8ywhYf3RWolcL3JZMKWkFRXHneWPJxuz3pCLApRTVBCjPWnjpQAtITxS0h6UmB534hGdUlrDYc1veIB/xM5frWG/Wuqn8KN+h3PhVw2lqvoa6AdK5XwjLm2kj7qa6pawnpJmUtxqS7iw2nipweKbikXduOcYqSSSoSqLOGx8zcZqaoppEhXzJCAo70AOVX87O75MdKlxSIQVBHIPNOoAzta/5Bsv0rzaYcmvSNbONMl+lecyjrWtM2hsTaG2zWYD74r02M8V5ZYv5epQt/tivUIGzGp9RRV3ImT0xYUR2ZVAZup9aeKi8yT7V5flnZtzvz39KzIJsUU13CAZzz6U7PFIQlFMLFlO0c9s0se7YN+N3fFACmuI8U/8AH2BXbmuH8UHN9j2qobmtPc5lutdv4SbOm49GIriGrsPBz5tpU9Grap8JUtjrRS00dKdXOYhRmimtkj5TzQBBeQSzqixTGPDZJHcVaQbVAznA60nanCmAtUNWbGnTf7tXqzNcbbpsv0pFR3PN5h8xqGP5J0PowqzKM1Wbhs11LY2aPUtPffaRH1UVcFZWhyeZpkLe1aimuZmDKl/aNdeUFJG1gcg1dUYAopRQIUUtIKWkAUUUUAcz4u/48vxrg3Fd34t/48vxrhWrelsbR2LGjHbqsJ969PiPyivLNOfZqELf7VeoQNmNT7VNXciRaBpc1GGp2ayIFJpjUpNMJoAQ0wnFOJphNAzQ0tPmaTtUOqyZDVds08u0z6jNY+pyfKa9CK5aSRktZnO3LZc1nSHmrtwetUH61ys60RE5NJQaO9Awp1NpRSAeKkWo1p4oAlU06mLTqAFNRtT6Y1AiNqjNSGoz1pjEoHWilXrQA8Cngc00CpFFAhyilIpyinFeKoRTlWq44arkq8VTYYapY0Wom6Vq2cuCBmsaM8ir0D4I9aEwaujrdOuTDKrKfeu6sbkTxKwrzSzmyAa6vRb7YwjJ4Nejh6nQ4a0NLm9q1sLi2JxkgVwWqoLSN3IPyivRg29MdiK4zxRbiON2bpjNPF07xuTQlZ2PP/7TLRM82EOT8voKw5hJqtwSCRbjv61ZurZtQuwXUrFGfxaqep6gtshtrYgHGCR2rghHXTc6/U9lxWB4s40d/rXREVy/jOXZpqr/AHmrpmtDmpfEjzWUcmoMc1Zk61ARzWaOtnceGLgS6cFzyhxXRIa4TwteCK9aBjxIOPrXdIa5aitIhk608CmL0p4rMQuOKSNCmcnOacc44oHIoAdSN0paZIQqkntSY0efa+f+JnL9axWHJrU1aQSX8rDoWrMauqGxv0NzwrNsvmjJwGFd2vSvMdMuPs2oRP8A7XNelwuHQMOQRms6q1Mp7k4PFKMU0U4ACsiBaZNCk8eyQZX0p4paABAFUAdBTqTFGaAMvXnC6bJzXnsvU12viecLarHnljXFSc5rWmbwVokCtsmVvQg16fp0ols4n9VFeXP1rvfC92J9NVSfmTirqLS5EzohS01TTq5zMQjNLRRQAYoxRRQAhrg/Er51Fh6V3bnCk151rUvm6jKR64q4bmtNamQ9dF4Qm23MseeozXPPWj4fn8jVo8nhuK2lrEqWx6Opp4qJTwKkBrmMBaRV2jilpaAEXOOacKKKAFNYniOTZp7Dua2q5fxVPwkQPvTW5cFqchJ3qs3WrT1WcV0o2Z3XhaXfpirn7pxXQKa4zwhdYaWAn3FdkprnmrMwktSQUopopwqSR1FFFABSGlpDxQByni+T9yqDua4pq6TxTciW+8tei9a5tq3prQ6FsNjbZKrehzXpmnzCWzjcHORXmJ612/he8E1kYifmSiqtLkTR0oNOzUYNOrnMh1NNGaQ0wENNHLge9Kxp1um+dR704q7SG9jUkby7VV9q57UXyDW3evjCiufvz1rvqvQiktTFnPNUnq3OeaqNXKdRGaTvSmgUCCkp2KbQMcvSpBUYFSCkA9afUY607NACk8VGaeelRmgBhpppxpppgJTl602nKKAJBUqio1qVRQIkUU/HFNUcVLjiqEVpVzVCVcNWnIvFUZgc0mNEaGrcLciqa1YjPNSUzYtJdrYrespyrAg8iuYhfkVs2knTmtqcrGFSJ6Dpt0JoAM81U8Q2K3lkcjPFZulXflSgE8GuncCWAj1FepBqpCzOGS5ZHgXiK9NpctbQ8MOCR2rkpDliSc5613vxB0j7FefaFH3jzXF2On3GqXYgt1yT1Y9FHqa4+XkbudV7rQ9+IxXB+OLoNPFAD90ZIrvZWCxMx6AZrx3Xb2S81SeTPG4gUqm1iKCu7ma5yagPWlO496YV9SazR0skgnNvcJKpwVOa9QsLlbq1jnU5DgGvKCoFdl4Nvy0b2jn7vK/SorRvG5B2a1IKiQ1IK5BEg6UUgpaQBVLVLgQWMjZ7VeNcx4qvNkKwDq3JppXdiorU5GdyzknvzVdjTpHJPAqElq60jZi7ipyD0r0Lw9ei705MnLLwa85Oa3vCl8be/wDJY/LJ/OlUjdGctT0IGng8VEpzUtcpmLSimMSOlPFAC0jHAJpao6tc/ZrCR++KBpXOU8QXYuLsqrcLxWBI2BT5WaRyxY5JqBkPrXRFWR0bKxExJrX8N6mbG9CO2I34NZLJUWCDkcGtGk0ZtXPYY23AEdKlFYfhu8N5pMTMcuvymtoVyNWZkOoqreG5/di225LfMW9KtDpSEFFFFAytfyiG0kc9hXm1w/mSs3qc12fie78my8sHlziuFeTnpWtNG0NiN6SKQxTq46qc0jMT2qMsa2Q2eo6bci6s4pQeoGav1xfhHUTve0c+612S9K5pKzMWrMeKWkpoY7sY49akRJRSUtACE4BNcHrlx59+/ovFdpfS+TaSP7V57O5kkZj3OaqO5tSXUqPUDjrU7VC9bo0ZZ0a7NpqcTk/KTg16VGwZQw6EZryYkg5HavRfD959r0yJifmAwaioupjNG0KcKYvSnisTMdRSUtABUF1KIbd5D0AqesHxPd+TYeWDhnOKFqOKuzir6Y3F1JKTncapkVK1RkV0rQ6bEJrU8O3v2TUlUn5H4NZjCkRykiuOqnNU1dWIaPWFIIBHQ06qemzefYxSeqirlcjMWFIaWm4Z22qCSegFC1Cw01d02Iu7P2FT2egXV0waXMUfv1Na89pDYWwjiH4110KEr80jOc1sjnrx/wB7isO+PWta6bM5NZN7VVHc0pox5utVHq3PVRqwNyM0q0nelQUCHEcUwjmpSOKjIOaGAop4qMU9aQx4606min0ANPSmGpGqM0AMNNNK1NoAWnLTKetMCVamWoUqZaBEqipQOKjUVMo4piZE61TnStFl4qnMvWgEZ3Q1PGeaicYY05DzUll+I1p20nSsiFqvwNg1UWRJHQ28nQg9K7LTbjzrdeeQK4O2k4FdJo10VbZng134edmcVaOhV8b6INTs1x68n0rjLLT10yQw2kfJ79zXrcyrLCQwyCK4LxLcx+H7WW4ihMkr9MLwPc0sVTb1QUZ20NXWZvs+kXMncIa8ZlJZyT1JzXrXitiugXGPSvJmXmsqr1NsOvdICKYRzUxFMIqUbNELDir+g3JtdXgbOAW2n8apEUQMVuYyOoYU3qrEs9djOQDUoNVrdt0KHuVFWBXAxEimnUwU+kID0rg/E0pl1IjsorvG+6a891vJ1OX61cPiNKa1Mgio2FTkc0wrXQmaNFcin28hhuo5B1Vgacy1GRiquTY9VtJBLbxuP4lBq0prJ0OTfpVuf9mtQGuR7mDHModcGnAYGKSlpALWB4okxZBR3Nb9c74oGYU+tNblw+I45hz0qNqsMtQsK3TOhohYVEy81O4qJhVpkM6vwVNxPCexyK7JTXB+DiRqEo/2a7pTxXPU3MZbklLTQaWoJFpD0paRulAHFeK5i94kY6KK5xhW54h51J/YVisK2hsdMVoQsKjYVMwqMitEDRb0aUwarAw7tg16ahyoPrXldmCL2H/fFepQf6pM+lZVdzKZNmjGRRTI3di25cAHisjMkUbRinU3NLmgDK1+XZp7D14rh2712fiJc2R+tca/WridNP4SuwwaherDCoHFaxZbK7da63wZMTHNFngHIrlHrpfBoPnTntRU+EykdqtSColNPBrmMB9LTQaWgBa43xZKWu0jHQDNdjXEeJgf7R/CqjuaU1qYBFRkVKajIroN2QNUZqZhUZqkQz0Hw1IZNHiz1HFbNYXhUY0df941umuWW7MXuBrp9BjtTb5WNfM7k9a5fNXtMvTZ3atn5W4IrTDzUZ6mc4uUdDtjwvFYmpyZLc9K1RMJIt69K5/VJsIx7mvWm9LnLBanOTNukY1nXnSrZJJJ9ap3dedLU7omTNVRqtzVUbpWZsR96elMqRBTQiRhxUVTMPlqIihghuKeKaKcKkY8CnU1aeKBCEZFRNxU/aoW60DITSU5utNpgFPWmU5etAEydanTrUCdanj60CJ0FTKKhSrCCmhMCvFVZVzV0jiq8q0CMqZec1GvWrU6VVA5qWaLYsRnmrsTdKz1NXIm7UITNi2fpzW3YS7JV5rnLd+RWxavyDXRSlqc80drHJujHPasbxNawy+HdQ3gZMLHJ7GrlnNujFUfE9tc3+gXNraYM0oCjJxxnmu+TvE5ErSKniC3NzotygGTtyPwryORdrEGvbpIw6lDyCMGvMfE2hTafctLGhaBjkEDpXJWi9zpw818LOZYdajNLJKqnBOKrvcx+tZpM6G0hWNFohlvYUHOXFQed5jhEBZicACus8O6C8c4urpcEfcX096c2oLUzunsdlbjbEi+gAqypquh4qZTXCBKDTwaiBp2aQmOY5FcLr8Jj1Fz6813PasPXtMN1GJox86jp61UXZl03ZnGEUwrUsgMTEOMEetQmRPUVujdtDGFQsOae88Y/iFaek6TJqMysylYOpY96vZXZm5I7HQU8vSLcH+7WoKrwIsUaxqMKowKnBrlb1MXuPpwNMzQDSEO53Z7Vk+IIDNZkj+HmtcVHNGssZRhkEYoKi7O550VqJl5rpb7QHXc8LAjrtNcncX0UMjI2dynBFbQ97Y6OeNr3HEVERVZ9Sj7KTWtoenvrG5zmOJTjPrWnK4q7M3Ui9jX8IQH7RNNjjGK7IGqGn2cVjbiKIcCroNc8ndmb1ZLmlzTAaXNSSPzSE8UmaKAOK8RwlL/AHkcMOtYbd69A1PTkvotp+8OhridS0+fT2PmL8nZh0rSD6HRCaaM9qjNNacU+zilv7pbeEDc3c9q3sDmi5o1s1zqkKgcBtxr0lOFA9KxtF0RNLQsW8yVurf4Vsg1hN3ZjJ3JM0UzNLurMkfS0wNS5oAq6lB9os3QdcVwc8ZjkKsMEHFejHkVh6noiXRaVCVfHbvVJm1OXLozjm6VA1MurtLeZ4myWU4OKqNqCdga3jBlupHuTPXaeFbQwWBlYYMhzWB4f05dTbz5T+7Rvu+tdzEAiBVGAO1RUf2TKUk9iyDTwahVqeGrEglBpc1GGpd1AiTNcp4otWLLOo4xg105aqt3ElxCY3GQaaKi7O55w1Rsa0dX02WwctjMR6H0rFacCuiOuqN3JEjGmdSMVEZwa3fDumfbp1nk/wBWhz9ap6K7IcjrtCtzbaVCjfeIya0siolwowOgpd1cjMr6j80hb3phcetN3j1pWGdHpeoloDCetVdVkyMUzSo9qmQ9+lQ375Jr0oyfslcwSXPoZlU7rkVcA+WqVzWD2OhbmVNVQ9KuTDmqhrM1Iz1qSMc0zFSp1poRKeVqu3WrRHy1XcU2JDKcKTFKKgoeOtSCo161IDQA4jioHFWgMrVeUYpiK7dabTm602gYU8U2nCgCVKsIKgjqwlAiZKsp0qugq0g4poTHY4qGRatbeKikWmyTMmXg1QYYataVcg1mzLg1LNIiLViNulVlqdOlIZoQt0rYtX44rDibkVp2j8gVcHqZTR09hNjg1ols1iWz7dprXjbcorvpy0scc0FRyxpKhV1DD0IzUlIapmSPIfGOjLp+omQRqI5SSoFcnIqg8LXpHxFI32o+tedyAZrDZ2OtO8bssaJC02qwhcDBySRXpcWABXnegEJq0XvkV6DG3FcuI+IqJcU8VKDVZWqVWrnKLCmng1ApqQGkBJmkPIozRQI5rxRpaz2geKL5wcllrgXiA43NXq+otiymPop/lXlsv3jXTQk7WKsmiCOFDKvBOSK9RsF2WkQ27flGRXmUZxIp9CDXpdnKHtom9VFOu27E2saCnvUm6oFbtT81zDJQ1OBqu0ixqWY4A709JAyhgcg96CSYGlpgNOzQAjqGXB715v4osIrLUP3S8OMn616QTXB+MCDfp/u1pRfvFHJng9BXovhy1FrpseGJLjca88Yc16FoVyJdNh56DFb1/hJRvKakBqsj1IGrkGT5qC5mljjBhj8xicYzT93FAIFAMkRyVBbg45p+aiBpc0CJM5rD8TWRu9Lcq+1k+b61sg5NUfEAWPQrlpGC5XAHrTgnfQadjypl55etnwvBGNUE8khAiG6sgrW34WMZ1URSY2yDbzXa9rDZ1D+KtMjJAkZ8f3VJqL/hLrT+C3uG+kZrV1TR7ywsBNpdnDcEfwdDj2rhbrxZqtrKYpbOOCQdVZCCKj2L7GfOmdKfFWR8mnXTf8ApP+Elum+5pNyfqMVyDeM9VboYh9EqFvFmrt/y3UfRRR7B9g5kdqNc1d/uaNJ+LYp39peIX+7paL/vPXLWdx4o1CMSQ3DCNuhOBV4aT4okGW1IL/wP/wCtUuEVu0O5sm68Tt0tLdf+BinI3iV3HmLahe/z1xOrnWtLmWO5vZG3DIKucV0Hg7Tr69zqF3cymCPkBnODTdNcvMrAm27GTrlqbfUXEsaB2+YkVlELn7o/KtnxFdreatK6fcX5R+FY+KuOxpodbo+oQ6fpiHyXbd/cXNXD4mA+7YXJ/wCA4pfBUSXts0O9Q6noar+KrzUtBuQptY2gf7koJx+NZum29iXJX1JD4nm/h06f8aQ+Jr3+HTX/ABNcm3iu+Y/cj/KnJr2rTf6u3Lf7qGn7F9hc8e51P/CR6mfu6fj6tR/wkGsHpYxj6tXNC88QP92zk/79mqtxrGrW0hjmPluOoK80Kk32Dmidadc1xulpEPxpBqevSdIIR+NcWde1E/8ALfH4U+LWdYcbopJGHqqZ/pVewl5E+0gdTfjWLi0cXEcezGTg1yr9xXcaZJdXGieZdg+YVPJGK4mUYlce9TT3aNE7or10mkazcWVmI4bQuM8tmudwc1tR3QttMDbckCrmrqwGq3ibUscWiL9TUDeI9YbokQFYD62WPMX61GdXY9I/1qfZS7BeBvNr2sMfvRD8KZHq2sSSqodCSfSsE6q/ZK3PCjS6lrsUZT5V+Y5FNU32BygeraWsqaVE04AkK8gVUvDnNatwRHCqjjisi55H1reporGENXcqdFNULjmr7D5aozjrXNI6EZcwqoRV6YdapuOazNRmKfGOaQCpEHShCZNj5aruKtgfJVeQVTEiDFLSkUgqChy9aeKaKcKALCDIqCUVZh5BqGdcZq+hJTYUzHNSMKYakoTvTxSAU8CgCRBVhBUCCrKCgRNGKtRiq8Yq3EOlNEskC8VHItWQKY61TRNzOkXrWfOla0i9apTx9ahmiM8CpVpCuDT1FSUTxcEVo2zYYVnxjkVdg61UdyZHRWvzIK1LdscGsrTjkDNaY+Ug1309rnFU3sWKp6jqEOnWzTTHAH61ZLhVLMcAck1wXjzVY5bFI7SZJG3fMAe1XN2RlCN2cz4j8QvrF5u2YjTIQVz7MT2qNprhj90Ugac+lY27nVdJWRLG8kTh0JVh0Irs9D1EyW4WeUF88ZNcNslbqwFbug2MbFpt5kkQ8LnAFZ1UnHUEd2rZqZWrItZLrcTNsA7Bea0UbPNcTRZbU1IDVZWqYNSAmBoLADJqMNUVyN8DLu2kjrQBka7rMUFu8SHc7AjjtXCO+STiptUa5gvZI2Ifng1nmaY/wV2U6dkHNYlyc8Cut8OajJJEY5iMD7tccpuG6KK0NPtJXnV5rpYUBz15NVOKa1Fc9IRwR1qQPWdbTRtEvlsWUcZqyJK42gJ5AsiFWGQeop8QVECqMAcAVAHqRXqRlgGnhhVcOKUuACSaAsUNX1dbKIqnMh6CuEvru4u5S8o3NVjX3nl1KQwyDZ61kGG7brIK6acElce3QHDn+HFaOlarPp7hWOYj2rMNrP3lFNNnIfvTVs0mrMnXsek6dqcV9FvjPfBrQVq5fw1HFBZYjbc2fmNdAsnFcc0k7IC2Gp273qqJKXzKgZZ30eZVXzax9Y1oW4+z2/zXD8ADtTSu7IRtHVII5Su7LKMkDtXKeI9Wl1GQIrYiU/d9alQxWGkzebKDdSqTk9c1yDR3bdZq6acUFrPYnZTToi0Th1bawOQRVX7PN3mo+zv3lrXTuPXsepeFfFZuIUsLo7puiN61ifEyNWWxm2jdllJ/CszwRbKuu+YW3MqHHtW38RI/M0m2kx92bn8RVqXQxlG0jzOnxIZJURerECt9vC7roX9oead+zfsx2qjoFsJ9TRm+7H81PnVm10Fyu6PQNNhW2s4o1GAqitEEVQicACrCyV5j1dzoMLxVp7XogaNNxVsHHpT5dSNrpAsLfEfy4NbTsMEt0ArgNYjkudRkkhlIQngVtTd9GCutURPE+STg1CY39qaLO47zUv2KXvNXRddw17FqwubnT7pLi3lKOpz1616RfSReJfCznAZzGcj0YV5d9ibvKa7PwTOsEE9puLMzbqTkl1InG6vY47w9pn2zUx5gHlxHLZ7+1ekRbEQBVAA9q5LSl+zazqCDgCU8fia6RJeOtYYiTlIIRsi6TXC+MocXccoH3lrsvMyKxPEVi1/agxjLrzioovlnccloeenrXpngaNR4dLEDmVjXmbAqxB6jivRPDkoh8GSktt5fB967MR8BjD4itrniWQyPb2igIMgv6/SuWMjFiSOtMdJy5+fNN8qU9WpQhGK0NrvsPy3pWjpV4IrpEuFVoieQwzWX5D/3qURMD941TSaDV9D1OCxsJoldLaEgjrsFTjT7L/n1h/74FZHhW4Mmlqh/gOOtb2a4pXTsBB9hte0Ef/fIrU0q1hikLJGqk8ZAxVQcmtazXEe6taCblcmb0C8bkCs+cZFXJiXlqtOtdM2TEpOPlqjOOtaTrxVKdawkbIyZhzVRhzV+ZeaqMvNZGqIcVIgpMVIi800BYUZSq8g5q4i/u6ryLzVPYlFQikxUhGDSYqChtOAoxSgUAW7YZzTLlcVLZrk0+7jrRLQi+pksKZip5FwaixUFiAU9RTQKlUUAPUc1YQc1Ei81YRaBE0Yq3EORUEa1ciWqSIZKq0OnFTItKyVpYzuZsiVUlTI6VqyJnNVJI6ykjSLMh48HpQq81ceLmmeVg9Kg0uMReatQjBqNYzViNcGmhM2tNySAK1ZHCD5jg1R0YfNXO+PNcm0rUbWOIcMhY8+9d0HaNzjkrysdLrDkaPdkHH7pv5V4fKHZjmRuvrXtmtH/AIk15/1yNeJOfmNVPcVLYi8s/wB40eWf7xp+aM1JpoM8setdH4XwGmX8a54mtvw0+LuQeq1nUV4sa3Oq3YYCrKPxVEt89TJJxXEzQvK9Sh6pLIPWpQ9SIth653xXdzRWsYglKMTzitnzB61yviibc8SZ96umveQHLuJ5GJeUknvSeW/981IWpN1d5Azy3/56GtTQIVOrxb/m+tZ26tHQnxq8NTK/Kw0O8n2x4AGKhWYU3UJNu2qIuOetcVrlo1VlqVZKy1nz3qdJvelYDRElVtRlxYy4OPlqMT1W1GbNm4z2oS1GtzkWUlvvE0wxk/xGpSaTIrpNLEJhz/EaabcdyfzqfIpCad2JpHQaAFisSAOhrWEtYujvizfnvVwTVhNXZmzQ873pfO96zjPtHWsi/wBbOTBa/NIeC3pSUG9hXSNLVdbFsPIgO+dv/HayI2iso2ubh90zcknrVAyJZIZpW3ytzz3qlBHcaxeAHOzPPoBXRGmreQnK3qaj3P8AaCiQg4HTNR+SvpV6e2S0CRJ0AqvkVN+xtFaakPkr6Unkp6CptwpCRRcqyNnwkBHrAwMZQ1seOznQPpIKxPDLBdYT/dNbPjY50Fh/tiqjuc9T4iJZQ/hgL62+P0rk/DZ2vK3uK3LWbd4fUH/nj/Suc0WURpJz/FSS92QvtI7OKXjrVlJPesOG6HHNXY7jJrBxZZpSvmBwfQ1yLoAxGO9dJJKfIbr0rm3bLGnFGkBuwelBQelGaUmrNBhQVt+FsLfyD/ZrFJrW8MP/AMTRh6rSexE9iAny/EGo+8ma1I5Mgc1k3h2+Ir0Aclh0q5EW9D+VKauZLY0kkpJXxGzHsCagTf8A3W/Ki5L/AGaX5T909qhR1Bnn81zD9okPkg/Me9dPa3SyeECEXYPMIxXGPy7fWunsWx4VA9ZT/Ou2rBcqMabvIpGkxRuFJuqToEIptOzTSKBM7Dwg/wDo8i+9dVniuP8ACDcTD3rrAa5qi94TJ4hukArcRdkH4VlWEfmTZ9K3JV2xYx2rpw8fduY1HrYzlG5iainX5quRpweKhmTLVpJaBFlFl4qnMnWtVo+OlVZIuOlYyRpFmDOmDVRl5rXuIuTxVF4/asWjZMp7akRKk8v2p6oQelCGyeOPMdVplxWulsy2wLKRmqE6c9K0krIhPUznWmFasstRlazNCLbTlWn7acFoAs2C5lqzfxYXNN01P3+KvalEfLzit4q8TGTtI5px81Qkc1alGDUB61izVDAKkWminCkBOozirMa1DEM1dhiZugNNIlsfGtXIhikitnParcdqR1rSMWZuSJIlBqyLdWHWmxw47VX1C6W0iLMwH41vFaamLZYe0jxyRUDWMTdCM15/rPi6Uu0dvIc+oqDw3rN/c6wkctyxU9ieKfKuwanoEmmp2qnJZbeldGkHmRg5BOOagksic8iolST2KVRnOGLbTlHNbDaZnq+KE0uIEF5M/jWfsZF+1iTaOCOccVwvxSI/tSy/65n+dejQmCBcK6gfWvM/iZMs+pWhjIbCEHH1rdK0bGN7yudrrd3CdHuwsikmJu9eLu3zGu31F2Gnz8n7hrgmbmlGfPqNLlJN9G+od9Luq7C5iQvWpoMu3UPqtY+6rOnS+Xeq3saU1eLGpanZGbMoANWU3npWDDPJLOojGTXTW0TbBu61xONja4ipIehqVYpD/F+lWki9qmWKp5QuUDbuR981yfiVDFdopOeK78RcVwfjP5NSjHqlaUo+8Js54tSbhURak3V2cplzE24Vd0qTZqcB/wBqszdU9m+y8ib0apktGNSO61WZjt288Vmp57Hp+tStP9odQOeK0ba1Y4OK40rGhTjjuD2FWFt7lh94CtWKz9qvw2BP8NVy3G2c+LG6b/ltj6Cq99p88do8jTMQBnBrto9MPpWf4ltPs+gXUmOi1SpsXOjzMyYPWk8z3qoZKb5vPWteQrnLnmCguMVU8w0eYaOUOc6XRQ8lrIEGTmtSKwuG7VV8Fr5yzDrgiu9trHcB8tZundkSmcVeeH7+7h8uGUR56moLfwPexrgOmT1NeowacuBxV+LT1HatI02lYz5+p5D/AMK5vLmbfNc8egFb1j4Ha0jCo4H0WvS0sVHapRaLjpVundWZHtLO6PDPFFg+l3yRO27cuRWD5ldh8VP3OvQKO8X9a4Eyms/ZpaI6Y1NC75lHmVSEp/yaPNP+TRyFc50Hh6TGsxc9c103ie2mu9IaKFC7kggCuM0GbGtW3u2K9Xt7bzSOMiptqZzfU4/S9Gun0lYJEKMVxg1JpHgGe3VhLIjknOdtejWunqAPlrUhtFGOK1jTMXM4q18HKoGQn/fNacPhVF/u/wDfNdakAHaphEPSrVGJLqM4698OhbCcgjhCfu145JJiRhxwTX0ZqEeNOuP+ubfyr5onl/fyf7x/nWdSmlsbUZ3vcs+YPal8ziqXm+9J53vWfIdHMW2krV8LvnWlX1U1zrTe9a/hGXPiO3HrkUcmhMpaHY2/hu6m1m6udo2SEbc1vQeGpSOSo/Cuks4AEHFaSQj0rZUE9zkdVnKL4abHLj8qjufDJeB038MCOldn5VRyRCn7CAvaSPELn4X3CysVvPlJyPkqvq+ivoOiR27vvy+c4xXtM0Iz0rzn4noI9PtT0y5olFlwlqebb6N+areZTg9TymvMWN1Jmog/vS76Vh8x2fge2Ny1wA2NuK7iPSWP8dch8NDvu7tfYGvUYoT6UlSjJ3ZnObTKun6YIBlmyatzQFhVlITipBCxFdEYqKsjFyu7mX5BVarSR/NW1JbtjpVOSE56UnEakZxTIpjW4YVceL8KjKkVPIVzGTPYM3SqD2MgJGK6F6hK5NZypo0jUZgCwkJ6Vo6ZpAkuFMnIz0q+sftWjp0H7zd6U6dJXCdV2K+s2yQ2iqoHFchOOtdp4gP7gCuNnU5NOstdAovQostN281MynPSkEbHtXNynTcYEFPEdTwWzSNjGK27PS4sBpSPxqo0myZVEjP0qEtcjArT1tFt7AyP0FaQuNLsYxmWJMdywrn/ABTqtteaUyW8gfkcqa6FBRjZnM580jl5ryIk81Xa5j9az3qLvXLY60aLXS0gu+eBWfk1PbIZJlGO9TYZ2ej2qSWqySLkmtiOCNegxWRazNBAqL2FWlvZK1jUglqc0k2zUWNalCqOT0rI+2yVR1O7nFhOyuQQpIxVe2j0J5Waep+ILDTIWZpkZh2BrzDX/FE+qylUO2OsCeeWaQtI7MSe5qLdW6RFx5YnrUlvcy2solhba471X3ZpN1Owcx2vhzxTqk160EtySpXIFdb/AGndN1lb868w8PybdYi565Fd+jcda5K7aehpBJo0DeTt1kb86a1xIert+dVQ9Beue7NLIfJIxHLH86z7iJJWy6gn1Iqyz8VXdqNQK10yy2UoB6qa8/dwCRXqstlFsI2DBGK5W60CzSQkRn866qU1HcylFy2OR3ijfXRtpVsp4iFINOhzgRL+Vbe1iR7ORzu6r2nWVxdTrsBVe7EdK34dMiBBMa/lWva26pjAAHtUyrK2g1SfUfpunJbIABlu5PetyKLAqC3jwBWjEnFczdzUEjHpUwjp6LTwKQEezivPfH6eXf2zdihFejHiuZ8T2MF75bSxhinAqoStK7E1c8tLmk311L6VbKeIVqL7BCOkS/lXV7aJn7KRzYfNT2kc1xdRxQKS5PFdHBpqyvhYxj6V0ml6XFBjagB7nHNJ1VbYFTfcbpGjmCJTL80h6muigs/QVPbW3Titm0suhIrKMeZlOVkVbXTdxGRxWvDYKoHy1big29qsqgFdEYJIxc2ymtsB2rmPHMkUPhq8Qn5ih4rqb67S2iJzzXn+t3JvS6t8ynjBqak1HQqEXI8eaak82u1m02DJxCn/AHzUB0+EdIl/KpVWPY05JdzkPNb0pfMc/wALflXW/Ykz9xfyq1aaaJplREGSfSj2q7C9nLuWPhrHNNcXKvGwTg5IwK9ftbQBRxWV4b0VLG2HyfMeprqoogB0rSKvqZzfQjjgAHSrKRAVIq+1SBa0M7jAgpJCEQk9qk6CsrVrryoWANJuyBK7PFvitd+b4iiI6CPH6158Zea9b1u0hvLgySxq5HQkZrEfTbdekKD/AIDXN7ZdUdKg+h575p9DR5zf3TXdPYw9o1/KoDaID9wflTVZdh+zl3Oa0iecatamOJmbzBwBX0Jpdt+4RmXkjmuO8GeHfOuvtMseEXlfevTIrcLjA4rSFpa2Mp3WgsMQHaraJx0oRCO1TKK0RkwC4paMU0kimIram23Trg/9Mz0+lfK15Pi6lAzw5/nX03q900dq49QRXjl9psBupGES8kk8VjUmkzelF20PP/tDf3TR58nZW/Ku0NlGOiL+VRtaKP4R+VZ+1j2NOSXc47fOekbflWv4Wa5TxJYsInK+YA2B2rWNuB2roPCenGfURIse7ZzT9onokJwa1bPWLNf3anHatBFrPtfPVQPJ/WryNN/zy/WulM5mT7aidadulx9wfnUUjSc8L+dFwRVmXrXl/wAWm2afZ/8AXQ/yr0yZpOc4rzz4gWq38UKTchDkAVE2krlxTb0PGzLR5pro/wCxbZf+WefqacNJtv8AnkKz9rE05JnN+c1HnNXTf2Vb/wDPIUv9mwD/AJZLS9rHsP2c+5tfCifdrd1Ex5MQIH417XFGPSvJvAix2eqkJEAWGPlFeuQvkDCHP0rSEkzOomtyZIx6VOsQ9Kahb/nmfzqUb/7o/OtDIPKGOlQy2kbZ4qxlsdBUUjsooA8yvvH2n2Gu3WnXMUi+S+3eBkVrWfiXR74Dy7lMnsa8/wDF1qW8Q3Unlr8zZziue8iVDlflPtWDq2OlUrnuKNaXHMcqH6GpfscWM+YmPrXhiXGown93cSL9Gp7axre3Av5gPrVKqhOkz1PxFrVnoFi8jurSY+UA1xHhfxxrd/4iSD5Tbykkgj7gFcjdte3zBrqZpSOm41NpX26xvRLZYWXaRz6UvaIfs3Y9U8TavPFbBwAea48+IZCeYxSSXGrXenytqG3ap+UisY9aznJt3NKcbKxtf8JB/wBMzSHxCe0fPvWIaSouzWyN2LXbmSTCgLWNrl74hnumWCafyO2xsVJaf62i6tLmS4Z0unVT0UdqqM2jOcUznZLHWJTmRJ2P+0+f6111hFJDpAWT7+Bms9bK5B+a6lNbqwlLAZOT603U5iFBLUx3HJqIjmrDrgmoiKyZ0IYBWrpMOZN5FZqrk1tWiGOMbeM1LB7Gwp4qVTzWehkJ+/VhBJ/fNZGdi2Kg1BS2nzqOpQ09Ec/xmkliyhyx/OmkB5G9rcbj+5k6/wB0037JcdoX/Ku+vLBNx6/nWZJYJ6H867FX8jL2Ryn2O5/55NR9juP+eZ/MV0T2SD+GozaJ/dp+2D2Rl6bb3MV/FIFAwecsK7dJ/lGXT865+DTo5ZQCpxW7DoNptGUb86xqSUty4xcSf7Qg/wCWi/nQ13COsqfnSjQrLH+qz+JpG0SxH/Lup+tZ8sStSvJqNsnWdPzqpJrFmv8Ay2U1ck0izA4t0/Ks64063HSFB+FNRiGp20sY2msS8i5NdLJHkdKy7m2yTnpVNEJnNtAWPAp6QBO1aEkQU4xUewUrlkSR81chSmInNW4lpXAtwLV6MVWhGKuIKRJKBS0Cg0XEMY8Vjaou8VrueKzLxd1T1KRzk0OCaZDZNM3Qhe5rXWyMrZYYWra24UADpWiG2U4LVUAUDite0h5FRxw81r2NrvI44pxV2Q3Yu2VvnHFbUMYUcVBBEEUYFW0rrirHPJ3JVIxUdxcLDGWJodwi5Nc5quobiUU0pz5UEY3ZT1S/M7kZ4rBm5Jq1I+41XcZrhlK7udcY2RnvHyahaGtEx57U3yM9qLlGcIM9BXYeG9E24mkXk9Kh0bRTPMJHX5R0ru7O0WJAAMYropQvqzGpPoia2hCKOKuquBTY1wMVKBXUlY5mLinDpSZoJ4pkkc0gRCa5PVrrexGa3NTuNkZFcheSbmNc9adtDanG+pkXQyTWdIgrTmGc1TdK4jqRmvFzT7Cwa7vUiAzk81Z8rJx1rr/DGkiJftDr8x6VpTjzOwpSsjf0uxS0tkjUYwOa1kXFRxLgCrCiu9KyscbdxVFOoApaoliHpULnANSk1VuX2oaGCOc12f5GGa4G6Xc5NdZrU25mFcxKuSa4Ksrs7KasjNeOoGjq+6ZqBkrI0KTR16J4G00wae07KcynP4VxdraNc3UcKjljivXtNtUtbGKFQBtUCuigru5jWdkXYlwKnA4pqLxUgFdhzDCKgcdasGq7nrSAozjg1wfipd8oHoK72foa4jxCu6Wsa3wmtLc41ovameUBWg0Y9KjMdcdzqKflCkMQq75dJ5XtRcDofA1iPtUtwy9OBXpkA4Fcp4WthBYJxy3NdZCOBXdSVonJVd5FlelSAUxelSCtTEQ9KrzfdNWTVecfKaBo8j8TW2dSlbHU1zUkA9K7rxJBm5Y46muUliwTxXnz0kzug/dMhouaiaL2rSeOoWjqUyjOMXtVrT48XiGnNH7VNZrtukp3A376PGjmuRZa7W/XOjn6Vx7itXsTArEU3FSsKbipNSWz/wBcK1SgPas+xTM4ra2VEmSyqIxV+dMWI+lRiPkVcvk22IqqZnLc5qQdahIqzIOagI5oZqh0CbpBW1EMYFZtmmXzWsi4rNiZMg6VYQVCoqwnaixJMgp7rlaEFSEZWqRBj3UWazpYq27hMg8Vnyx0MpGTJDzUDRVpyR1A0dK4yOyh/e5roIU4FZlnH81bUScCmJibaY61a21G60WEjOmXg1l3I61szLwaybkdaCjvGhwORWddx8dK23UYrNvE4NdEloc8Wc5OvzVBtq5OMNVfHNc7N0CLzVuJagQc1biFIGWohVpOlV46sL0pNkskFITRSGpAY54qE2xkOT0q6kJbk9Km8sAdK0jDqxNmcYQowBim+VV5k5pEgLtx0q7Bcit7Yuw44retYBGoFQ21uEA4q/GtbQjYyk7kqCpc4FNA2iqGo3qwxkZ5q27akWuytquobFKKa5qWQuxJ6mpJ5WlcsTUBriqT5mdUI8qGkZpu3NSAVLHFk1CKIFhJ7VoWGnG4mAx8vepbe0LsFA611em6esEY45renTu9TOc7IlsrJYUAArTRMAUJGAKlArtSsczdxAKdQBRTEFMkfapNKTVC+n8uI80m7IEjH1W43ORXPynJJq3dzF3NUXOa8+pK7OqEbIrSCoGSrTDNIkRdgAKyRpcfptiZ5144zXeWUAihVQKydIshGgYjmuhiTAruowsjmqTuSIvFTAU1RUgFbmQUUtIaZIxqzdQk2xmtBzgGsLVJsAjNRN2RcVdnK6k+6Q1jutaV025yaoOK8+T1OyK0KjLUTJmrTLSLGWYAd6RRreF7DfeGZh93pXoluvygVzmgWogtl45NdNCMCu+iuWJyVXdlhBTj0pF6UrHitTK5G1V5D1qZjiq8hpMZTuD8prjtaG6Q111y3ymuR1U5kNYVvhNqW5z7R0zy/arTLzTdtcZ0lfy/anxw7nUepqXbU9nFuuE+tC3EzudIj2W0Y9BW7HWTp42xKPataOvRitDiluWE6VIKiWpR0qyANQSjIqftUMnSgaOJ8Q2+SzVxs8fJ4r0XWoQ6GuGu4trEVw1o63Ouk9DIdKrPHV+ReagZawNSkyU+BdsyH3qRloUYYH3p3A6K6Xdo7fSuOkFdo43aK3+7XHSL1rolsiaZVIpu2pmFNxUGpc0uPdKTW1sqjo8XylvWtXZWcmQyJI8sPrU+qjFoBT4I8yr9aTV8CNVJrWkvdbM5PVHLydaixzVyWMMeKriM7wMVMjVMu2UWFzWgq1DDHtQCrIFZiY9R0qdOtQCp0600SyylTY4qFOlTCrRLK0qZ/GqEyVqSDg1RmXmhjRnOvWoCnPSrkg5NQ7eakoktUwRWvEvFZ9svNakY4qkSxcVE4qwRxUUgpiM+ccVj3I61sz9DWRc96llI9GcVm3nQ1pPWbedDXVM54mDcD5qrY5qzP96olWuV7nQthUXvVqPioFHNWYxUsCwlWF6VAlTqM1Ih45qeKHdyRSwQE8mryRYranT6siUiJY8DpQUqzspDHW1iLlPyyzYxVqGALip44QOcVOkeTTURNjY0q0ibaVIgBRKwRSa0tYhsr3dyIIic81yl5cmeQ5PFXNUvDJIVB4rJNctapd2R0U421Yh5pMU7FOVCTXOaAiZPSr9vB7UlvBmtiwtPMkHHFbwgRKRb0uxwN7D6V0EUYUdKit4QigY4q2Biu2MbI5ZO4gFOxS4oqibiUwmnmomoAZIwVSa53VbrJK5rXvZhHGTXJ3c3mSE1z1pWRrTjcqu2STULU9jTD1rhOhDMZNaem2nmSBiKqW8JlkA7V1en2gjQcVvRhd3InKyLltCEUDFXlWmRpgVOoruSOZiqMCnCgCloEwprd6dTGOBTEVrh9qGuX1SbJIrfvpcIa5O/k3MaxquyNqaMqU5Jqs4qxJ3qE81ws6kQFauWFv5lwvHAqELk1uaRb8hiKqCuxSdkdFYx7I1GK1Y6p26YAq8gwK9BbHG2SrQ3SgDikNMkhc1Xk4qw9VZT1pMZQuj8hrk9SOXNdReNhDXKXxzIawqvQ3pmew5pu2nnrQBXIbiBau6dHm5U4qqorT0tP34NVDcUnodfZj5V+lacdULUfKK0IxzXoLY42SrUo6VGoqQVRAHpUT9KlNRPQNGNqaBozXDahHiQ8V396u5DXF6pHhzXNXWh0Umc9IvNV3FXpF5qs61xnQVWWo9vNWGWo8UAdHEu7RG/3a46Qcmu0gH/ABJW/wB2uOmHzmumXwomnuyqRTcVMRSKvzCszU3NMj22o461ex7VDaLiBB7VZxzWbM2yezjzJnsKxdbnJmIB6V0NsBHbO59K4/UJfMmY+9brSBMVeVyp5p3dat2oEkgzzVLFaGnRnzM1m2atGiFp9OCUhFSQAqZDzUIqVaALUZqwvSqqGrKHirRDGyVUlFXHqrKOaGCKEi1FjmrLjmogvNIsnt1rRjHFUoFq+g4qkQxxHFQyVOelQSUMChP0NY10etbFx0NYt2eTUMtHo8h4NZF9KACO5rSnbCmsW4O5ya6asjngigwJOTTcVK9R1ys6EOTrViOoEqxEuaQFiME1ft4ckE1Dbx9K04U4rWEDKTJIowBVkJSxpxUwSuhIybItme1SLEAMkVOsZ61IEzgAVVhXIBHk4A5qykIUe9TRwhRk9aVhVJEtkRGBWLqt4I0Kg81qXkywxFicVxt5cG4mJ7VlWnyo0pxuyu7FmJPemUppQK4tzpADNW4IckVHDHk1owx9ABya0hEmTJraAswUCuksbURoMCqmn2m0BiOTW1Em0V2QjY5pyuSKuBTwKAKcK1MmGKQinUhoAjaoZDgE1Mx4qldyhEPNJuxSMbVrnGVBrnXbJJq7qE3mSnms9jzXBVldnVBWQhOaAu44pK0LC18xwzDis4x5nYptJF7TLPGGIrooUwAKq2sQUCtCMV6EI8qOWTuyVBipQKaop4qyRcUUtBpkjTUUpwM1IxqpdPtU0AjI1GbrXMXTZY1sahLkmsKdua46rOqmis5qOntTcVzmw6JNzgV1OmQbEHFYdhCXkHFdVaR4UCumjHqY1GaEI4q0gFQxLxVlBXUc46mtT6Y1AEMlVJT1q3J0qlN0NJjMq/bCGuXujlzXQ6i3BrnJzlzXNVZvTKpHNKBQRzTgK5rGw5RWvpK/vRWWgrZ0gfvK0pr3iJ7HUWwwoq9HVS3HyirqDiu9HKyRRUlMWn0yGBqJ+1SmonpMaKF1901yOqx/Ma6+45Brm9TjyDxWVRaGtN6nJyryaqutaE6YY1TkHNcLR1FVhUeKmamheR9aQzoUOzRT/uVx8ikkmuwn+TSSp4+WuXdR3rpnsiKb1ZRKnPSnxoS4+tSHAp8WDIv1rNmtzbhXES/Spaan3F+lSKMsB71nbUyZblib+zTtOCRXGXVvKshyM12mpz+RYqo6muXlkLE5roqJJJBSuZaL83NbWnR/KTiqQjVzz1raso9sNY2NJPQftphWrBWo2GKCEyDGKctDUgPNIZYjNWVPFU0PNWUPFWiWStVeUdamJ4qJ6GIqOKjA5qZhTFHNIq5YhHSrqdKqxDpVtOlUiWKelV5asnpVaWhgZ0/Q1i3Z5NbU/Q1iXZ61DNEd1cyF8gdKzpBV9xVKbitZmMSlJUdOc5NIKxZqSIKuQryKqxjkVdiHSmkSzQt1rSgTNULcdK1IBwK6IGMiwiVZji7mmwx5OatAVsjNsaFzwKsxQhRkjmnQxfxGp9tWkQ2QsuKglYIpJq0w4rn9c1AQRlFPzGlJ2VykrsydZ1AyOY0PArD6053LsSe9Mrzpycnc64xshR1qZEyaYgq1ClEVcbZPDHitfT7be4Y1Qtoi7getdNZQBEHFdVOJhORbt4toFW1FMQVKBXQjFsUClFGKWmSFNNOpjHFAEMrAA1g6nc4BANad5PtU881y9/NuY81lUlZGsFczpny5NQ0rtk01eTiuB6s6VoT28RlkHpXR2UAUDis+xt9oB71u2yYArrpQsrmFSRbiTAFWFHFRRirCCuhGQ8DinCgUvamJi0hpaQ0CI26VlX0mARmtKZtq1gX0vWpk7IuKMe8kyTWVIcmr1y3JrPc81xTd2dUURN1p0a5Ipp5q1aR75BWcVdlN6GtpsGMGugt16cVnWce0AVrQjArugrI5Zu5ajHFTLUSDgVMorQgWmNUhqNqAIJKpz8A1ckqhcnCmkxowNQfqKwZetbF++WNY0nWuWodMCLFOAoxTlFYmg9BW3pC/PWOgrb0hfnrSkveMp7HTQD5RVxBVWAYAq4ldpzMkAp1NFLTJA1E9PNRvQNFOfoawb9cqa3puhrGvBlTWcy47nK3KYY1myDk1s3a/MaypR1rhmjriym4pYFDToD60r1d0m2EtwXYDC+tKKvKw27K5D4g1D7PDHCnXrXMteyt/D+ldFqzRtdMMAkcVkS7ewFbT3CnsUDPIf/1VLBM4kUkd6V6ZWVzXQ6u3cSQqR6VahGZV+tYulXW5fLJ5FbURw4NCWpjJWI9ZmVtsfcVhSIQav6m3+khqqeYrcGtZ6scFZFdQQ4retF/cisYp84xW7aDEAzWaWo57DytROtWStROKZCKjCo+hqdxULcGpZQ5TU6NVYGpUbFNMTLWcio2pA1BpkkTDmmqOacaF6ikMniHSraDiq0fWrSdKpCYHpVWbNXDVaUUMSMyfoaw7zvW9cDg1h3Y61DNUdvMwUZNZNxPuOFpbm5aVuOB6VVPU1U5XehnCPcOppyjmmipFFZlk0dXYappVyHqKtEs1LYZxWvbR7sVmWURcjHSt2FQgArpgjCbJ0XAxVmCLcdx6VHDGXPtV9F2jFbJGTYY4oxT8VHK4RCxPAqiSnqF0trbs7HnHFcBfXTXM7OSea1Nd1I3ExjU/KKwTya4q9S7sjqpQsrsSnAU0VKozWCRs2PjWrsS5OAKrovStOxtzI4OK2hEzkzQ0626MRXQQphaq2kG0DitBBXXFWRzSdxyjFSYpoFPFWSwooooEIarzPtU1O5wKzbuXCmk2NIzL+fIPNc7cvlq0b2XJIzWPM2TXJVkdUERMeas2cReQccVUUEnFblhBtUE1nCN2VJ2NG2jwBWnCvFVYE4FX4xiu2KOaRMgqdRwKiUVKOgqiR4pRSCnCmSFMNOpjHAoAqXkm1DXN3kmSea2NQk461z1y/JrGozamihOck1TbrVmU9aqmuR7nQhAMmtbT4uhrNhQs4roLKLCitKa1uRNmlbJgCtGJaqQJgCr0YrqSOdk6ipBTFp4qiWKajapKjagCCSs67OEatCQ1mXrfI1JlI5q9b5z9ay361fvGy5qg3WuOe51RG09aZUi1mMlQVvaQvOawkFdBpK8cVtS+Iiex0MPQVbWqcatgc1YVT3NdZzE46UppgBHelzTJFNRvTs0xqBoqy9KyLscGteWsu6HBqJFxOcvF5NY8wwTW7eLyaxbgcmuOZ0wM9+tatuy2emvKeCRk1QiiMs6rjjvUfiK78q3W2Q8twR7UUlb3ipauxlSXRlkZyepzULyCoIo5ZOFUmrSWLnlzt9qNWaaIrMQaAjN0FX1tEUUOqoKloakQQk27Bx2rpbaYSxK47iuYfMrbIwTXQ2SGKBUPUCiO5NTYq6qf3o+lZLuynNdFPZ+fIHb7uKry2sQGNoqnFthGSSsZttchmAaultuYlNc89mA4KcV0FkCIFz1oQp2a0LBFROKmqNgKGZlVxVdxVtxVZxUlIipytTDSZpDLIanbqrK9P3UxMkzSr1pmacp5oEWY6tL0qpGatKeKpEseary9DUxNQSUMEZ9x901i3a5zW1cdDWRcjrUM0RqE800ilpwXNKwXGqKlUUmMU4UxIlQVo2cJlcAVRhQuwUDk10lhbCJAcc1pTjcicrF62iESACtCFC7D0qrCpcgYrXt4wijiutI52yeKMKBUtIKWrMwPArA13UBFEY0PJrVvLgQwk55rhdTujPOxJrKrPlRpTjdmfI5Zix71CacxptcG52DlGTU0YqNBVmNckAVUUSye3iMjAV01hahFHFZ+nWvRiK6GGMKBXXTjYwnImjXAAqwBUaipRWxkOFLSClpkhQTRTGNAEcr4FYl9LgGtK5kwDXO383J5rObsjSKuzNupeTWc7ZNTTvkmq/U1xSd2dSWhYtIvMkFdFbRYArNsIcAHFbcK8V0Uo2RjNlqJcCrSVBGKsLW6MSZakFRrUgpiHijNIKXNMQE1DK2FNSMcVUuXwpoGjIv5M5rDnbJrTvZM55rGnbk1y1GdEEVpG61AeTUjmmou5qwNS5ZRZINdDbR4ArMsosAVswLgV1U1ZGE2W4h0q3GKrxirKdK1RkTL0pwpq9KdTEFRsaeTUbGgCCQ9ayNQbCNWrKeCawtTfg1Eti47nPXLfOapt1qxMfmNVjXJLc6UFSLUYqVakZPH2rodK4WufiFdDpnC1tT3M57G7GTgVOpqvEeKsrXUc47mlCmlHSloJGnimNUhqNulA0V5KzLkda0pOlZ1z3qZFIwLwdaxZhk1t3neqMcKs5dugrllG7sdEWVo41tLdppOGIrFkjS8mM8qZ/ug+la1/LHLw0iiMds1gXms2dsdofcfRatpJWKV9y5hVXaoAFRMwHU1iPrpkP7tcD3rT0xHvCWY8Vm2Xay1JGYnhRmhLCSY5c4Fa0dqiDgc1LtxUsOfsUYbKOAcLzVqP7wpxFKg+bgUIlsddXUNtFl2H0rHfU43b7pxS6lbvJMWLZ9qy2hdT3qpSZcIRtqa8cySHg1sW/EQArlLfesg+tdRb5EK5qU7inGxZzTWNGaaTTIIpKrPVhzVdz1qRogaoyae5qLNIocGp4aoc0oNAFgNUimq6tUqmmItxmrCtVOM1ZU8U0SyUtxUMhp5aoJGp3Aqz/dNZNz0rUmPBrLuOallI1gtOpaaTigQUq8mmZ5q9p9sZnDN90UJXdgbsaOmWu0eYw61tRjtVaJQqgDtWlZw7juPSuqEbKxzydy9aQ4AY1oLUMYxgVMtbIzZLTXcKpJpC2BWdqF4Io25xTegkrmXrV71UGuUlfcxq1fXRmlJzWeWya4as+ZnXTjZATSCkJpRWVjQnTrWnYwb3BNZ8Cb3AFdLp9sFUcVtTjdmU3Y0rSEKorRQYFQRLtAFWF6V2JWOdslXpTxTFNPBpkscKWkpc0CENQyNgVIxqpcS7VNJjRQvZsA81zN5NknmtLUbnAPNc7cS5Y1zVZHRTiRSPkmpbVN8gqqTk1radDwCetYQV2ay0Rq20e1QK0YxiqsQwKtx12paHMyynSp1NVlNTKeKpEFhTUimoFNSA0wJs0hNM3UhbimKwO3BrPupODVqR+KyruTg1LdiktTKu3yTWXK2TVy5fk1nSNzXJNnRFETHmrFtHuaq3U1p2UfSpirscmadqmAK04hVOBcAVejFdcTBlmOp0qulTKapEE4NOzUYNLmmA4monPFOLVE7UAV52wtc7qMmSRW1dSYBrnL18sazm9DSC1MuU8moKkkPNRZrkZ0CipVqGpVpAW4q3tP4WsGHrW7ZHit6e5lPY3Ij0q0hqjCeKtoa6TBk4NLmmA0ZoFYUmo3PFKTUTtQMjkPFZtyeDV2RuKz7huDUspGLdcmsTVLhlh8uJ8E1papN5UTsOoBrzW91W9W6dixIzXO3qdUI3J7ixuHYl52OfeqD6YQfvVftdYiuf3co2P8AoakndFGcioNLmWtltYAE9a7bS4BDZoAOSOawNFh/tK+2IMonLN2rsBGsYCjoOKai9zOcuggWlK06ik0SQlcUxpRCjOw4AzUxFVriPzIXU9xQhnP3Gtq8jZU4qt/asJPORSXNkiycHiqrWkfqBSubqMTZsZ4Z5l24PNdCpwK5HS4Rb3QbcCK6dXoM5otbqaWqMPSFqRAOeKruakZqgdqBojc1CTT2PNRMakoQmlDVGTQDSGThqmRqqq1Sq1MRdRqnVuKpI9Tq/FMksFuKhkagvULtTAilbINZ85q5K3FUZjSGbDNioy1NdqYMu4UdTTEizbRNPKFHTua6O3iWJAoFU7G1EEQ/vHqa0I+WAraEbGUpXLluhdhxW1AgRQKpWkQVAe9X0NbxRk2WVNTA1XU08tgVZA25nESEk1xuraiZHKg8Vp63f7EKqa4+WQu5JNc9af2Ub0odWKzknk03dUeaM1ynQSZp69ahBqzbL5kgFNLUTNXTbfOGPU11FrHtUVlafD044FbcYwBXZCNkc03csLUq1EtSKa0MyTtxTkzjmmipB0piY7NBNJTSaBDXbArKvptqnmtCZsA1zuqT4BqZOyLgrmNfXG5iM1kSPnNT3MpZjVJmzXBN3Z1xRNCpeUAV0lnHtQViaZFvkJNdFEMCtqUdLmdR62LSVOh4qspqZD0roRiWkNSqarKcVKpoEWAakDVXVqkDUxE26mM1N3Ux2pgMlfise7k61oTvxWLdv15qJlxRn3L5Jqi55qxM3Jqox5rkkzoQ+IbnFbVquAKyLUZcVuW4wBWlNETZfi6CrkZqnH0q0nauhGLLKGplNV1qVTVEk4bijdUeaXNACsahkbApzNVaVsA0MCjePwea5+7bk1sXj8GsG5PJrCobQKbnmos05zzUZPNc5sh4qZKrqamQ80AXYDyK3LM8VhQda2rQ4AranuZTNmI8VaRqoxNxVlGrpMS0Gpd1QhqC1Ah7NUDtQzVA7UDQyR+DWfcvwasytWdcvweaiRaOW8SXghhIz8x7Vwc8gYkmtjxRemS+aPPC1zLvXM9zsirIiuAudy8GojcTygI0hK02aQnikiWrS0IkzsvD+pQ2Fv5SAAt1NdNDexzgHPWvOLYEHg1v2Vw6ADJpcxHKdiGBFGay7a6LLg1cWTI61LCxKxqCdsQOfanF6palceVYyHvjikNGBcS89apNJz1qvLcsxzmoDMT3pG5qQz7WHPeuktpxJErVw6zEV0mkTmS25PQ0EyWhuB6UvVYPS7qRmSM1RM1IWqNjSAR2qFjSsaiZqRQFqTdUbNTN5oAsh6kR6ph6lR+aBl1Hqwr1QVqmV+KZJa31G7VGXpjPQAkjVTlPJqd24NVnNIZ//9k="
358
+ ]
359
+ }
360
+ },
361
+ "widgets_values": [
362
+ "{\"positive\":[{\"x\":620.2460000000001,\"y\":359.37000000000006},{\"x\":620.73,\"y\":245.63000000000002}],\"negative\":[{\"x\":0,\"y\":0}]}",
363
+ "[{\"x\":620.2460000000001,\"y\":359.37000000000006},{\"x\":620.73,\"y\":245.63000000000002}]",
364
+ "[{\"x\":0,\"y\":0}]",
365
+ "[{}]",
366
+ "[{}]",
367
+ "xyxy",
368
+ 768,
369
+ 768,
370
+ false,
371
+ null,
372
+ null,
373
+ null
374
+ ]
375
+ }
376
+ ],
377
+ "links": [
378
+ [
379
+ 40,
380
+ 106,
381
+ 0,
382
+ 105,
383
+ 0,
384
+ "SAM2MODEL"
385
+ ],
386
+ [
387
+ 41,
388
+ 102,
389
+ 0,
390
+ 105,
391
+ 1,
392
+ "IMAGE"
393
+ ],
394
+ [
395
+ 42,
396
+ 105,
397
+ 0,
398
+ 107,
399
+ 1,
400
+ "MASK"
401
+ ],
402
+ [
403
+ 43,
404
+ 102,
405
+ 0,
406
+ 107,
407
+ 0,
408
+ "IMAGE"
409
+ ],
410
+ [
411
+ 52,
412
+ 102,
413
+ 0,
414
+ 114,
415
+ 0,
416
+ "IMAGE"
417
+ ],
418
+ [
419
+ 53,
420
+ 114,
421
+ 0,
422
+ 112,
423
+ 0,
424
+ "STRING"
425
+ ],
426
+ [
427
+ 54,
428
+ 114,
429
+ 0,
430
+ 105,
431
+ 3,
432
+ "STRING"
433
+ ]
434
+ ],
435
+ "groups": [],
436
+ "config": {},
437
+ "extra": {
438
+ "ds": {
439
+ "scale": 0.7513148009015777,
440
+ "offset": {
441
+ "0": 226.08052057760656,
442
+ "1": 820.3321624947772
443
+ }
444
+ }
445
+ },
446
+ "version": 0.4
447
+ }
custom_nodes/comfyui-segment-anything-2/load_model.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ from .sam2.modeling.sam2_base import SAM2Base
3
+ from .sam2.modeling.backbones.image_encoder import ImageEncoder
4
+ from .sam2.modeling.backbones.hieradet import Hiera
5
+ from .sam2.modeling.backbones.image_encoder import FpnNeck
6
+ from .sam2.modeling.position_encoding import PositionEmbeddingSine
7
+ from .sam2.modeling.memory_attention import MemoryAttention, MemoryAttentionLayer
8
+ from .sam2.modeling.sam.transformer import RoPEAttention
9
+ from .sam2.modeling.memory_encoder import MemoryEncoder, MaskDownSampler, Fuser, CXBlock
10
+
11
+ from .sam2.sam2_image_predictor import SAM2ImagePredictor
12
+ from .sam2.sam2_video_predictor import SAM2VideoPredictor
13
+ from .sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
14
+ from comfy.utils import load_torch_file
15
+
16
+ def load_model(model_path, model_cfg_path, segmentor, dtype, device):
17
+ # Load the YAML configuration
18
+ with open(model_cfg_path, 'r') as file:
19
+ config = yaml.safe_load(file)
20
+
21
+ # Extract the model configuration
22
+ model_config = config['model']
23
+
24
+ # Instantiate the image encoder components
25
+ trunk_config = model_config['image_encoder']['trunk']
26
+ neck_config = model_config['image_encoder']['neck']
27
+ position_encoding_config = neck_config['position_encoding']
28
+
29
+ position_encoding = PositionEmbeddingSine(
30
+ num_pos_feats=position_encoding_config['num_pos_feats'],
31
+ normalize=position_encoding_config['normalize'],
32
+ scale=position_encoding_config['scale'],
33
+ temperature=position_encoding_config['temperature']
34
+ )
35
+
36
+ neck = FpnNeck(
37
+ position_encoding=position_encoding,
38
+ d_model=neck_config['d_model'],
39
+ backbone_channel_list=neck_config['backbone_channel_list'],
40
+ fpn_top_down_levels=neck_config['fpn_top_down_levels'],
41
+ fpn_interp_model=neck_config['fpn_interp_model']
42
+ )
43
+
44
+ keys_to_include = ['embed_dim', 'num_heads', 'global_att_blocks', 'window_pos_embed_bkg_spatial_size', 'stages']
45
+ trunk_kwargs = {key: trunk_config[key] for key in keys_to_include if key in trunk_config}
46
+ trunk = Hiera(**trunk_kwargs)
47
+
48
+ image_encoder = ImageEncoder(
49
+ scalp=model_config['image_encoder']['scalp'],
50
+ trunk=trunk,
51
+ neck=neck
52
+ )
53
+ # Instantiate the memory attention components
54
+ memory_attention_layer_config = config['model']['memory_attention']['layer']
55
+ self_attention_config = memory_attention_layer_config['self_attention']
56
+ cross_attention_config = memory_attention_layer_config['cross_attention']
57
+
58
+ self_attention = RoPEAttention(
59
+ rope_theta=self_attention_config['rope_theta'],
60
+ feat_sizes=self_attention_config['feat_sizes'],
61
+ embedding_dim=self_attention_config['embedding_dim'],
62
+ num_heads=self_attention_config['num_heads'],
63
+ downsample_rate=self_attention_config['downsample_rate'],
64
+ dropout=self_attention_config['dropout']
65
+ )
66
+
67
+ cross_attention = RoPEAttention(
68
+ rope_theta=cross_attention_config['rope_theta'],
69
+ feat_sizes=cross_attention_config['feat_sizes'],
70
+ rope_k_repeat=cross_attention_config['rope_k_repeat'],
71
+ embedding_dim=cross_attention_config['embedding_dim'],
72
+ num_heads=cross_attention_config['num_heads'],
73
+ downsample_rate=cross_attention_config['downsample_rate'],
74
+ dropout=cross_attention_config['dropout'],
75
+ kv_in_dim=cross_attention_config['kv_in_dim']
76
+ )
77
+
78
+ memory_attention_layer = MemoryAttentionLayer(
79
+ activation=memory_attention_layer_config['activation'],
80
+ dim_feedforward=memory_attention_layer_config['dim_feedforward'],
81
+ dropout=memory_attention_layer_config['dropout'],
82
+ pos_enc_at_attn=memory_attention_layer_config['pos_enc_at_attn'],
83
+ self_attention=self_attention,
84
+ d_model=memory_attention_layer_config['d_model'],
85
+ pos_enc_at_cross_attn_keys=memory_attention_layer_config['pos_enc_at_cross_attn_keys'],
86
+ pos_enc_at_cross_attn_queries=memory_attention_layer_config['pos_enc_at_cross_attn_queries'],
87
+ cross_attention=cross_attention
88
+ )
89
+
90
+ memory_attention = MemoryAttention(
91
+ d_model=config['model']['memory_attention']['d_model'],
92
+ pos_enc_at_input=config['model']['memory_attention']['pos_enc_at_input'],
93
+ layer=memory_attention_layer,
94
+ num_layers=config['model']['memory_attention']['num_layers']
95
+ )
96
+
97
+ # Instantiate the memory encoder components
98
+ memory_encoder_config = config['model']['memory_encoder']
99
+ position_encoding_mem_enc_config = memory_encoder_config['position_encoding']
100
+ mask_downsampler_config = memory_encoder_config['mask_downsampler']
101
+ fuser_layer_config = memory_encoder_config['fuser']['layer']
102
+
103
+ position_encoding_mem_enc = PositionEmbeddingSine(
104
+ num_pos_feats=position_encoding_mem_enc_config['num_pos_feats'],
105
+ normalize=position_encoding_mem_enc_config['normalize'],
106
+ scale=position_encoding_mem_enc_config['scale'],
107
+ temperature=position_encoding_mem_enc_config['temperature']
108
+ )
109
+
110
+ mask_downsampler = MaskDownSampler(
111
+ kernel_size=mask_downsampler_config['kernel_size'],
112
+ stride=mask_downsampler_config['stride'],
113
+ padding=mask_downsampler_config['padding']
114
+ )
115
+
116
+ fuser_layer = CXBlock(
117
+ dim=fuser_layer_config['dim'],
118
+ kernel_size=fuser_layer_config['kernel_size'],
119
+ padding=fuser_layer_config['padding'],
120
+ layer_scale_init_value=float(fuser_layer_config['layer_scale_init_value'])
121
+ )
122
+ fuser = Fuser(
123
+ num_layers=memory_encoder_config['fuser']['num_layers'],
124
+ layer=fuser_layer
125
+ )
126
+
127
+ memory_encoder = MemoryEncoder(
128
+ position_encoding=position_encoding_mem_enc,
129
+ mask_downsampler=mask_downsampler,
130
+ fuser=fuser,
131
+ out_dim=memory_encoder_config['out_dim']
132
+ )
133
+
134
+ sam_mask_decoder_extra_args = {
135
+ "dynamic_multimask_via_stability": True,
136
+ "dynamic_multimask_stability_delta": 0.05,
137
+ "dynamic_multimask_stability_thresh": 0.98,
138
+ }
139
+
140
+ def initialize_model(model_class, model_config, segmentor, image_encoder, memory_attention, memory_encoder, sam_mask_decoder_extra_args, dtype, device):
141
+ return model_class(
142
+ image_encoder=image_encoder,
143
+ memory_attention=memory_attention,
144
+ memory_encoder=memory_encoder,
145
+ sam_mask_decoder_extra_args=sam_mask_decoder_extra_args,
146
+ num_maskmem=model_config['num_maskmem'],
147
+ image_size=model_config['image_size'],
148
+ sigmoid_scale_for_mem_enc=model_config['sigmoid_scale_for_mem_enc'],
149
+ sigmoid_bias_for_mem_enc=model_config['sigmoid_bias_for_mem_enc'],
150
+ use_mask_input_as_output_without_sam=model_config['use_mask_input_as_output_without_sam'],
151
+ directly_add_no_mem_embed=model_config['directly_add_no_mem_embed'],
152
+ use_high_res_features_in_sam=model_config['use_high_res_features_in_sam'],
153
+ multimask_output_in_sam=model_config['multimask_output_in_sam'],
154
+ iou_prediction_use_sigmoid=model_config['iou_prediction_use_sigmoid'],
155
+ use_obj_ptrs_in_encoder=model_config['use_obj_ptrs_in_encoder'],
156
+ add_tpos_enc_to_obj_ptrs=model_config['add_tpos_enc_to_obj_ptrs'],
157
+ only_obj_ptrs_in_the_past_for_eval=model_config['only_obj_ptrs_in_the_past_for_eval'],
158
+ pred_obj_scores=model_config['pred_obj_scores'],
159
+ pred_obj_scores_mlp=model_config['pred_obj_scores_mlp'],
160
+ fixed_no_obj_ptr=model_config['fixed_no_obj_ptr'],
161
+ multimask_output_for_tracking=model_config['multimask_output_for_tracking'],
162
+ use_multimask_token_for_obj_ptr=model_config['use_multimask_token_for_obj_ptr'],
163
+ compile_image_encoder=model_config['compile_image_encoder'],
164
+ multimask_min_pt_num=model_config['multimask_min_pt_num'],
165
+ multimask_max_pt_num=model_config['multimask_max_pt_num'],
166
+ use_mlp_for_obj_ptr_proj=model_config['use_mlp_for_obj_ptr_proj'],
167
+ proj_tpos_enc_in_obj_ptrs=model_config['proj_tpos_enc_in_obj_ptrs'],
168
+ no_obj_embed_spatial=model_config['no_obj_embed_spatial'],
169
+ use_signed_tpos_enc_to_obj_ptrs=model_config['use_signed_tpos_enc_to_obj_ptrs'],
170
+ binarize_mask_from_pts_for_mem_enc=True if segmentor == 'video' else False,
171
+ ).to(dtype).to(device).eval()
172
+
173
+ # Load the state dictionary
174
+ sd = load_torch_file(model_path)
175
+
176
+ # Initialize model based on segmentor type
177
+ if segmentor == 'single_image':
178
+ model_class = SAM2Base
179
+ model = initialize_model(model_class, model_config, segmentor, image_encoder, memory_attention, memory_encoder, sam_mask_decoder_extra_args, dtype, device)
180
+ model.load_state_dict(sd)
181
+ model = SAM2ImagePredictor(model)
182
+ elif segmentor == 'video':
183
+ model_class = SAM2VideoPredictor
184
+ model = initialize_model(model_class, model_config, segmentor, image_encoder, memory_attention, memory_encoder, sam_mask_decoder_extra_args, dtype, device)
185
+ model.load_state_dict(sd)
186
+ elif segmentor == 'automaskgenerator':
187
+ model_class = SAM2Base
188
+ model = initialize_model(model_class, model_config, segmentor, image_encoder, memory_attention, memory_encoder, sam_mask_decoder_extra_args, dtype, device)
189
+ model.load_state_dict(sd)
190
+ model = SAM2AutomaticMaskGenerator(model)
191
+ else:
192
+ raise ValueError(f"Segmentor {segmentor} not supported")
193
+
194
+ return model
custom_nodes/comfyui-segment-anything-2/nodes.py ADDED
@@ -0,0 +1,771 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.functional import F
3
+ import os
4
+ import numpy as np
5
+ import json
6
+ import random
7
+
8
+ from tqdm import tqdm
9
+ from contextlib import nullcontext
10
+
11
+ from .load_model import load_model
12
+
13
+ import comfy.model_management as mm
14
+ from comfy.utils import ProgressBar, common_upscale
15
+ import folder_paths
16
+
17
+ script_directory = os.path.dirname(os.path.abspath(__file__))
18
+
19
+ class DownloadAndLoadSAM2Model:
20
+ @classmethod
21
+ def INPUT_TYPES(s):
22
+ return {"required": {
23
+ "model": ([
24
+ 'sam2_hiera_base_plus.safetensors',
25
+ 'sam2_hiera_large.safetensors',
26
+ 'sam2_hiera_small.safetensors',
27
+ 'sam2_hiera_tiny.safetensors',
28
+ 'sam2.1_hiera_base_plus.safetensors',
29
+ 'sam2.1_hiera_large.safetensors',
30
+ 'sam2.1_hiera_small.safetensors',
31
+ 'sam2.1_hiera_tiny.safetensors',
32
+ ],),
33
+ "segmentor": (
34
+ ['single_image','video', 'automaskgenerator'],
35
+ ),
36
+ "device": (['cuda', 'cpu', 'mps'], ),
37
+ "precision": ([ 'fp16','bf16','fp32'],
38
+ {
39
+ "default": 'fp16'
40
+ }),
41
+
42
+ },
43
+ }
44
+
45
+ RETURN_TYPES = ("SAM2MODEL",)
46
+ RETURN_NAMES = ("sam2_model",)
47
+ FUNCTION = "loadmodel"
48
+ CATEGORY = "SAM2"
49
+
50
+ def loadmodel(self, model, segmentor, device, precision):
51
+ if precision != 'fp32' and device == 'cpu':
52
+ raise ValueError("fp16 and bf16 are not supported on cpu")
53
+
54
+ if device == "cuda":
55
+ if torch.cuda.get_device_properties(0).major >= 8:
56
+ # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
57
+ torch.backends.cuda.matmul.allow_tf32 = True
58
+ torch.backends.cudnn.allow_tf32 = True
59
+ dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[precision]
60
+ device = {"cuda": torch.device("cuda"), "cpu": torch.device("cpu"), "mps": torch.device("mps")}[device]
61
+
62
+ download_path = os.path.join(folder_paths.models_dir, "sam2")
63
+ if precision != 'fp32' and "2.1" in model:
64
+ base_name, extension = model.rsplit('.', 1)
65
+ model = f"{base_name}-fp16.{extension}"
66
+ model_path = os.path.join(download_path, model)
67
+ print("model_path: ", model_path)
68
+
69
+ if not os.path.exists(model_path):
70
+ print(f"Downloading SAM2 model to: {model_path}")
71
+ from huggingface_hub import snapshot_download
72
+ snapshot_download(repo_id="Kijai/sam2-safetensors",
73
+ allow_patterns=[f"*{model}*"],
74
+ local_dir=download_path,
75
+ local_dir_use_symlinks=False)
76
+
77
+ model_mapping = {
78
+ "2.0": {
79
+ "base": "sam2_hiera_b+.yaml",
80
+ "large": "sam2_hiera_l.yaml",
81
+ "small": "sam2_hiera_s.yaml",
82
+ "tiny": "sam2_hiera_t.yaml"
83
+ },
84
+ "2.1": {
85
+ "base": "sam2.1_hiera_b+.yaml",
86
+ "large": "sam2.1_hiera_l.yaml",
87
+ "small": "sam2.1_hiera_s.yaml",
88
+ "tiny": "sam2.1_hiera_t.yaml"
89
+ }
90
+ }
91
+ version = "2.1" if "2.1" in model else "2.0"
92
+
93
+ model_cfg_path = next(
94
+ (os.path.join(script_directory, "sam2_configs", cfg)
95
+ for key, cfg in model_mapping[version].items() if key in model),
96
+ None
97
+ )
98
+ print(f"Using model config: {model_cfg_path}")
99
+
100
+ model = load_model(model_path, model_cfg_path, segmentor, dtype, device)
101
+
102
+ sam2_model = {
103
+ 'model': model,
104
+ 'dtype': dtype,
105
+ 'device': device,
106
+ 'segmentor' : segmentor,
107
+ 'version': version
108
+ }
109
+
110
+ return (sam2_model,)
111
+
112
+
113
+ class Florence2toCoordinates:
114
+ @classmethod
115
+ def INPUT_TYPES(s):
116
+ return {
117
+ "required": {
118
+ "data": ("JSON", ),
119
+ "index": ("STRING", {"default": "0"}),
120
+ "batch": ("BOOLEAN", {"default": False}),
121
+ },
122
+
123
+ }
124
+
125
+ RETURN_TYPES = ("STRING", "BBOX")
126
+ RETURN_NAMES =("center_coordinates", "bboxes")
127
+ FUNCTION = "segment"
128
+ CATEGORY = "SAM2"
129
+
130
+ def segment(self, data, index, batch=False):
131
+ try:
132
+ coordinates = coordinates.replace("'", '"')
133
+ coordinates = json.loads(coordinates)
134
+ except:
135
+ coordinates = data
136
+
137
+ if len(data)==0:
138
+ return (json.dumps([{'x': 0, 'y': 0}]),)
139
+ center_points = []
140
+
141
+ def get_bboxes(item):
142
+ return item["bboxes"] if isinstance(item, dict) else item
143
+
144
+ if index.strip(): # Check if index is not empty
145
+ indexes = [int(i) for i in index.split(",")]
146
+ else: # If index is empty, use all indices from data[0]
147
+ indexes = list(range(len(get_bboxes(data[0]))))
148
+
149
+ print("Indexes:", indexes)
150
+ bboxes = []
151
+
152
+ if batch:
153
+ for idx in indexes:
154
+ if 0 <= idx < len(get_bboxes(data[0])):
155
+ for i in range(len(data)):
156
+ bbox = get_bboxes(data[i])[idx]
157
+ min_x, min_y, max_x, max_y = bbox
158
+ center_x = int((min_x + max_x) / 2)
159
+ center_y = int((min_y + max_y) / 2)
160
+ center_points.append({"x": center_x, "y": center_y})
161
+ bboxes.append(bbox)
162
+ else:
163
+ for idx in indexes:
164
+ if 0 <= idx < len(get_bboxes(data[0])):
165
+ bbox = get_bboxes(data[0])[idx]
166
+ min_x, min_y, max_x, max_y = bbox
167
+ center_x = int((min_x + max_x) / 2)
168
+ center_y = int((min_y + max_y) / 2)
169
+ center_points.append({"x": center_x, "y": center_y})
170
+ bboxes.append(bbox)
171
+ else:
172
+ raise ValueError(f"There's nothing in index: {idx}")
173
+
174
+ coordinates = json.dumps(center_points)
175
+ print("Coordinates:", coordinates)
176
+ return (coordinates, bboxes)
177
+
178
+ class Sam2Segmentation:
179
+ @classmethod
180
+ def INPUT_TYPES(s):
181
+ return {
182
+ "required": {
183
+ "sam2_model": ("SAM2MODEL", ),
184
+ "image": ("IMAGE", ),
185
+ "keep_model_loaded": ("BOOLEAN", {"default": False}),
186
+ },
187
+ "optional": {
188
+ "coordinates_positive": ("STRING", {"forceInput": True}),
189
+ "coordinates_negative": ("STRING", {"forceInput": True}),
190
+ "bboxes": ("BBOX", ),
191
+ "individual_objects": ("BOOLEAN", {"default": False}),
192
+ "mask": ("MASK", ),
193
+
194
+ },
195
+ }
196
+
197
+ RETURN_TYPES = ("MASK", )
198
+ RETURN_NAMES =("mask", )
199
+ FUNCTION = "segment"
200
+ CATEGORY = "SAM2"
201
+
202
+ def segment(self, image, sam2_model, keep_model_loaded, coordinates_positive=None, coordinates_negative=None,
203
+ individual_objects=False, bboxes=None, mask=None):
204
+ offload_device = mm.unet_offload_device()
205
+ model = sam2_model["model"]
206
+ device = sam2_model["device"]
207
+ dtype = sam2_model["dtype"]
208
+ segmentor = sam2_model["segmentor"]
209
+ B, H, W, C = image.shape
210
+
211
+ if mask is not None:
212
+ input_mask = mask.clone().unsqueeze(1)
213
+ input_mask = F.interpolate(input_mask, size=(256, 256), mode="bilinear")
214
+ input_mask = input_mask.squeeze(1)
215
+
216
+ if segmentor == 'automaskgenerator':
217
+ raise ValueError("For automaskgenerator use Sam2AutoMaskSegmentation -node")
218
+ if segmentor == 'single_image' and B > 1:
219
+ print("Segmenting batch of images with single_image segmentor")
220
+
221
+ if segmentor == 'video' and bboxes is not None and "2.1" not in sam2_model["version"]:
222
+ raise ValueError("2.0 model doesn't support bboxes with video segmentor")
223
+
224
+ if segmentor == 'video': # video model needs images resized first thing
225
+ model_input_image_size = model.image_size
226
+ print("Resizing to model input image size: ", model_input_image_size)
227
+ image = common_upscale(image.movedim(-1,1), model_input_image_size, model_input_image_size, "bilinear", "disabled").movedim(1,-1)
228
+
229
+ #handle point coordinates
230
+ if coordinates_positive is not None:
231
+ try:
232
+ coordinates_positive = json.loads(coordinates_positive.replace("'", '"'))
233
+ coordinates_positive = [(coord['x'], coord['y']) for coord in coordinates_positive]
234
+ if coordinates_negative is not None:
235
+ coordinates_negative = json.loads(coordinates_negative.replace("'", '"'))
236
+ coordinates_negative = [(coord['x'], coord['y']) for coord in coordinates_negative]
237
+ except:
238
+ pass
239
+
240
+ if not individual_objects:
241
+ positive_point_coords = np.atleast_2d(np.array(coordinates_positive))
242
+ else:
243
+ positive_point_coords = np.array([np.atleast_2d(coord) for coord in coordinates_positive])
244
+
245
+ if coordinates_negative is not None:
246
+ negative_point_coords = np.array(coordinates_negative)
247
+ # Ensure both positive and negative coords are lists of 2D arrays if individual_objects is True
248
+ if individual_objects:
249
+ assert negative_point_coords.shape[0] <= positive_point_coords.shape[0], "Can't have more negative than positive points in individual_objects mode"
250
+ if negative_point_coords.ndim == 2:
251
+ negative_point_coords = negative_point_coords[:, np.newaxis, :]
252
+ # Extend negative coordinates to match the number of positive coordinates
253
+ while negative_point_coords.shape[0] < positive_point_coords.shape[0]:
254
+ negative_point_coords = np.concatenate((negative_point_coords, negative_point_coords[:1, :, :]), axis=0)
255
+ final_coords = np.concatenate((positive_point_coords, negative_point_coords), axis=1)
256
+ else:
257
+ final_coords = np.concatenate((positive_point_coords, negative_point_coords), axis=0)
258
+ else:
259
+ final_coords = positive_point_coords
260
+
261
+ # Handle possible bboxes
262
+ if bboxes is not None:
263
+ boxes_np_batch = []
264
+ for bbox_list in bboxes:
265
+ boxes_np = []
266
+ for bbox in bbox_list:
267
+ boxes_np.append(bbox)
268
+ boxes_np = np.array(boxes_np)
269
+ boxes_np_batch.append(boxes_np)
270
+ if individual_objects:
271
+ final_box = np.array(boxes_np_batch)
272
+ else:
273
+ final_box = np.array(boxes_np)
274
+ final_labels = None
275
+
276
+ #handle labels
277
+ if coordinates_positive is not None:
278
+ if not individual_objects:
279
+ positive_point_labels = np.ones(len(positive_point_coords))
280
+ else:
281
+ positive_labels = []
282
+ for point in positive_point_coords:
283
+ positive_labels.append(np.array([1])) # 1)
284
+ positive_point_labels = np.stack(positive_labels, axis=0)
285
+
286
+ if coordinates_negative is not None:
287
+ if not individual_objects:
288
+ negative_point_labels = np.zeros(len(negative_point_coords)) # 0 = negative
289
+ final_labels = np.concatenate((positive_point_labels, negative_point_labels), axis=0)
290
+ else:
291
+ negative_labels = []
292
+ for point in positive_point_coords:
293
+ negative_labels.append(np.array([0])) # 1)
294
+ negative_point_labels = np.stack(negative_labels, axis=0)
295
+ #combine labels
296
+ final_labels = np.concatenate((positive_point_labels, negative_point_labels), axis=1)
297
+ else:
298
+ final_labels = positive_point_labels
299
+ print("combined labels: ", final_labels)
300
+ print("combined labels shape: ", final_labels.shape)
301
+
302
+ mask_list = []
303
+ try:
304
+ model.to(device)
305
+ except:
306
+ model.model.to(device)
307
+
308
+ autocast_condition = not mm.is_device_mps(device)
309
+ with torch.autocast(mm.get_autocast_device(device), dtype=dtype) if autocast_condition else nullcontext():
310
+ if segmentor == 'single_image':
311
+ image_np = (image.contiguous() * 255).byte().numpy()
312
+ comfy_pbar = ProgressBar(len(image_np))
313
+ tqdm_pbar = tqdm(total=len(image_np), desc="Processing Images")
314
+ for i in range(len(image_np)):
315
+ model.set_image(image_np[i])
316
+ if bboxes is None:
317
+ input_box = None
318
+ else:
319
+ if len(image_np) > 1:
320
+ input_box = final_box[i]
321
+ input_box = final_box
322
+
323
+ out_masks, scores, logits = model.predict(
324
+ point_coords=final_coords if coordinates_positive is not None else None,
325
+ point_labels=final_labels if coordinates_positive is not None else None,
326
+ box=input_box,
327
+ multimask_output=True if not individual_objects else False,
328
+ mask_input = input_mask[i].unsqueeze(0) if mask is not None else None,
329
+ )
330
+
331
+ if out_masks.ndim == 3:
332
+ sorted_ind = np.argsort(scores)[::-1]
333
+ out_masks = out_masks[sorted_ind][0] #choose only the best result for now
334
+ scores = scores[sorted_ind]
335
+ logits = logits[sorted_ind]
336
+ mask_list.append(np.expand_dims(out_masks, axis=0))
337
+ else:
338
+ _, _, H, W = out_masks.shape
339
+ # Combine masks for all object IDs in the frame
340
+ combined_mask = np.zeros((H, W), dtype=bool)
341
+ for out_mask in out_masks:
342
+ combined_mask = np.logical_or(combined_mask, out_mask)
343
+ combined_mask = combined_mask.astype(np.uint8)
344
+ mask_list.append(combined_mask)
345
+ comfy_pbar.update(1)
346
+ tqdm_pbar.update(1)
347
+
348
+ elif segmentor == 'video':
349
+ mask_list = []
350
+ if hasattr(self, 'inference_state') and self.inference_state is not None:
351
+ model.reset_state(self.inference_state)
352
+ self.inference_state = model.init_state(image.permute(0, 3, 1, 2).contiguous(), H, W, device=device)
353
+ if bboxes is None:
354
+ input_box = None
355
+ else:
356
+ input_box = bboxes[0]
357
+
358
+ if individual_objects and bboxes is not None:
359
+ raise ValueError("bboxes not supported with individual_objects")
360
+
361
+
362
+ if individual_objects:
363
+ for i, (coord, label) in enumerate(zip(final_coords, final_labels)):
364
+ _, out_obj_ids, out_mask_logits = model.add_new_points_or_box(
365
+ inference_state=self.inference_state,
366
+ frame_idx=0,
367
+ obj_id=i,
368
+ points=final_coords[i],
369
+ labels=final_labels[i],
370
+ clear_old_points=True,
371
+ box=input_box
372
+ )
373
+ else:
374
+ _, out_obj_ids, out_mask_logits = model.add_new_points_or_box(
375
+ inference_state=self.inference_state,
376
+ frame_idx=0,
377
+ obj_id=1,
378
+ points=final_coords if coordinates_positive is not None else None,
379
+ labels=final_labels if coordinates_positive is not None else None,
380
+ clear_old_points=True,
381
+ box=input_box
382
+ )
383
+
384
+ pbar = ProgressBar(B)
385
+ video_segments = {}
386
+ for out_frame_idx, out_obj_ids, out_mask_logits in model.propagate_in_video(self.inference_state):
387
+ video_segments[out_frame_idx] = {
388
+ out_obj_id: (out_mask_logits[i] > 0.0).cpu().numpy()
389
+ for i, out_obj_id in enumerate(out_obj_ids)
390
+ }
391
+ pbar.update(1)
392
+ if individual_objects:
393
+ _, _, H, W = out_mask_logits.shape
394
+ # Combine masks for all object IDs in the frame
395
+ combined_mask = np.zeros((H, W), dtype=np.uint8)
396
+ for i, out_obj_id in enumerate(out_obj_ids):
397
+ out_mask = (out_mask_logits[i] > 0.0).cpu().numpy()
398
+ combined_mask = np.logical_or(combined_mask, out_mask)
399
+ video_segments[out_frame_idx] = combined_mask
400
+
401
+ if individual_objects:
402
+ for frame_idx, combined_mask in video_segments.items():
403
+ mask_list.append(combined_mask)
404
+ else:
405
+ for frame_idx, obj_masks in video_segments.items():
406
+ for out_obj_id, out_mask in obj_masks.items():
407
+ mask_list.append(out_mask)
408
+
409
+ if not keep_model_loaded:
410
+ try:
411
+ model.to(offload_device)
412
+ except:
413
+ model.model.to(offload_device)
414
+ if hasattr(self, 'inference_state') and self.inference_state is not None and hasattr(model, "reset_state"):
415
+ model.reset_state(self.inference_state)
416
+ self.inference_state = None
417
+ mm.soft_empty_cache()
418
+
419
+ out_list = []
420
+ for mask in mask_list:
421
+ mask_tensor = torch.from_numpy(mask)
422
+ mask_tensor = mask_tensor.permute(1, 2, 0)
423
+ mask_tensor = mask_tensor[:, :, 0]
424
+ out_list.append(mask_tensor)
425
+ mask_tensor = torch.stack(out_list, dim=0).cpu().float()
426
+ return (mask_tensor,)
427
+
428
+ class Sam2VideoSegmentationAddPoints:
429
+ @classmethod
430
+ def IS_CHANGED(s): # TODO: smarter reset?
431
+ return ""
432
+ @classmethod
433
+ def INPUT_TYPES(s):
434
+ return {
435
+ "required": {
436
+ "sam2_model": ("SAM2MODEL", ),
437
+ "coordinates_positive": ("STRING", {"forceInput": True}),
438
+ "frame_index": ("INT", {"default": 0}),
439
+ "object_index": ("INT", {"default": 0}),
440
+ },
441
+ "optional": {
442
+ "image": ("IMAGE", ),
443
+ "coordinates_negative": ("STRING", {"forceInput": True}),
444
+ "prev_inference_state": ("SAM2INFERENCESTATE", ),
445
+ },
446
+ }
447
+
448
+ RETURN_TYPES = ("SAM2MODEL", "SAM2INFERENCESTATE", )
449
+ RETURN_NAMES =("sam2_model", "inference_state", )
450
+ FUNCTION = "segment"
451
+ CATEGORY = "SAM2"
452
+
453
+ def segment(self, sam2_model, coordinates_positive, frame_index, object_index, image=None, coordinates_negative=None, prev_inference_state=None):
454
+ offload_device = mm.unet_offload_device()
455
+ model = sam2_model["model"]
456
+ device = sam2_model["device"]
457
+ dtype = sam2_model["dtype"]
458
+ segmentor = sam2_model["segmentor"]
459
+
460
+
461
+ if segmentor != 'video':
462
+ raise ValueError("Loaded model is not SAM2Video")
463
+ if image is not None:
464
+ B, H, W, C = image.shape
465
+ model_input_image_size = model.image_size
466
+ print("Resizing to model input image size: ", model_input_image_size)
467
+ image = common_upscale(image.movedim(-1,1), model_input_image_size, model_input_image_size, "bilinear", "disabled").movedim(1,-1)
468
+
469
+ try:
470
+ coordinates_positive = json.loads(coordinates_positive.replace("'", '"'))
471
+ coordinates_positive = [(coord['x'], coord['y']) for coord in coordinates_positive]
472
+ if coordinates_negative is not None:
473
+ coordinates_negative = json.loads(coordinates_negative.replace("'", '"'))
474
+ coordinates_negative = [(coord['x'], coord['y']) for coord in coordinates_negative]
475
+ except:
476
+ pass
477
+
478
+ positive_point_coords = np.array(coordinates_positive)
479
+ positive_point_labels = [1] * len(positive_point_coords) # 1 = positive
480
+ positive_point_labels = np.array(positive_point_labels)
481
+ print("positive coordinates: ", positive_point_coords)
482
+
483
+ if coordinates_negative is not None:
484
+ negative_point_coords = np.array(coordinates_negative)
485
+ negative_point_labels = [0] * len(negative_point_coords) # 0 = negative
486
+ negative_point_labels = np.array(negative_point_labels)
487
+ print("negative coordinates: ", negative_point_coords)
488
+
489
+ # Combine coordinates and labels
490
+ else:
491
+ negative_point_coords = np.empty((0, 2))
492
+ negative_point_labels = np.array([])
493
+ # Ensure both positive and negative coordinates are 2D arrays
494
+ positive_point_coords = np.atleast_2d(positive_point_coords)
495
+ negative_point_coords = np.atleast_2d(negative_point_coords)
496
+
497
+ # Ensure both positive and negative labels are 1D arrays
498
+ positive_point_labels = np.atleast_1d(positive_point_labels)
499
+ negative_point_labels = np.atleast_1d(negative_point_labels)
500
+
501
+ combined_coords = np.concatenate((positive_point_coords, negative_point_coords), axis=0)
502
+ combined_labels = np.concatenate((positive_point_labels, negative_point_labels), axis=0)
503
+
504
+ model.to(device)
505
+
506
+ autocast_condition = not mm.is_device_mps(device)
507
+ with torch.autocast(mm.get_autocast_device(model.device), dtype=dtype) if autocast_condition else nullcontext():
508
+ if prev_inference_state is None:
509
+ print("Initializing inference state")
510
+ if hasattr(self, 'inference_state'):
511
+ model.reset_state(self.inference_state)
512
+ self.inference_state = model.init_state(image.permute(0, 3, 1, 2).contiguous(), H, W, device=device)
513
+ else:
514
+ print("Using previous inference state")
515
+ B = prev_inference_state['num_frames']
516
+ self.inference_state = prev_inference_state['inference_state']
517
+ _, out_obj_ids, out_mask_logits = model.add_new_points(
518
+ inference_state=self.inference_state,
519
+ frame_idx=frame_index,
520
+ obj_id=object_index,
521
+ points=combined_coords,
522
+ labels=combined_labels,
523
+ )
524
+ inference_state = {
525
+ "inference_state": self.inference_state,
526
+ "num_frames": B,
527
+ }
528
+ sam2_model = {
529
+ 'model': model,
530
+ 'dtype': dtype,
531
+ 'device': device,
532
+ 'segmentor' : segmentor
533
+ }
534
+ return (sam2_model, inference_state,)
535
+
536
+ class Sam2VideoSegmentation:
537
+ @classmethod
538
+ def INPUT_TYPES(s):
539
+ return {
540
+ "required": {
541
+ "sam2_model": ("SAM2MODEL", ),
542
+ "inference_state": ("SAM2INFERENCESTATE", ),
543
+ "keep_model_loaded": ("BOOLEAN", {"default": True}),
544
+ },
545
+ }
546
+
547
+ RETURN_TYPES = ("MASK", )
548
+ RETURN_NAMES =("mask", )
549
+ FUNCTION = "segment"
550
+ CATEGORY = "SAM2"
551
+
552
+ def segment(self, sam2_model, inference_state, keep_model_loaded):
553
+ offload_device = mm.unet_offload_device()
554
+ model = sam2_model["model"]
555
+ device = sam2_model["device"]
556
+ dtype = sam2_model["dtype"]
557
+ segmentor = sam2_model["segmentor"]
558
+ inference_state = inference_state["inference_state"]
559
+ B = inference_state["num_frames"]
560
+
561
+ if segmentor != 'video':
562
+ raise ValueError("Loaded model is not SAM2Video")
563
+
564
+ model.to(device)
565
+
566
+ autocast_condition = not mm.is_device_mps(device)
567
+ with torch.autocast(mm.get_autocast_device(device), dtype=dtype) if autocast_condition else nullcontext():
568
+
569
+ #if hasattr(self, 'inference_state'):
570
+ # model.reset_state(self.inference_state)
571
+
572
+ pbar = ProgressBar(B)
573
+ video_segments = {}
574
+ for out_frame_idx, out_obj_ids, out_mask_logits in model.propagate_in_video(inference_state):
575
+ print("out_mask_logits",out_mask_logits.shape)
576
+ _, _, H, W = out_mask_logits.shape
577
+ # Combine masks for all object IDs in the frame
578
+ combined_mask = np.zeros((H, W), dtype=np.uint8)
579
+ for i, out_obj_id in enumerate(out_obj_ids):
580
+ out_mask = (out_mask_logits[i] > 0.0).cpu().numpy()
581
+ combined_mask = np.logical_or(combined_mask, out_mask)
582
+ video_segments[out_frame_idx] = combined_mask
583
+ pbar.update(1)
584
+
585
+ mask_list = []
586
+ # Collect the combined masks
587
+ for frame_idx, combined_mask in video_segments.items():
588
+ mask_list.append(combined_mask)
589
+ print(f"Total masks collected: {len(mask_list)}")
590
+
591
+ if not keep_model_loaded:
592
+ model.to(offload_device)
593
+
594
+ out_list = []
595
+ for mask in mask_list:
596
+ mask_tensor = torch.from_numpy(mask)
597
+ mask_tensor = mask_tensor.permute(1, 2, 0)
598
+ mask_tensor = mask_tensor[:, :, 0]
599
+ out_list.append(mask_tensor)
600
+ mask_tensor = torch.stack(out_list, dim=0).cpu().float()
601
+ return (mask_tensor,)
602
+
603
+ class Sam2AutoSegmentation:
604
+ @classmethod
605
+ def INPUT_TYPES(s):
606
+ return {
607
+ "required": {
608
+ "sam2_model": ("SAM2MODEL", ),
609
+ "image": ("IMAGE", ),
610
+ "points_per_side": ("INT", {"default": 32}),
611
+ "points_per_batch": ("INT", {"default": 64}),
612
+ "pred_iou_thresh": ("FLOAT", {"default": 0.8, "min": 0.0, "max": 1.0, "step": 0.01}),
613
+ "stability_score_thresh": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 1.0, "step": 0.01}),
614
+ "stability_score_offset": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
615
+ "mask_threshold": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}),
616
+ "crop_n_layers": ("INT", {"default": 0}),
617
+ "box_nms_thresh": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.01}),
618
+ "crop_nms_thresh": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.01}),
619
+ "crop_overlap_ratio": ("FLOAT", {"default": 0.34, "min": 0.0, "max": 1.0, "step": 0.01}),
620
+ "crop_n_points_downscale_factor": ("INT", {"default": 1}),
621
+ "min_mask_region_area": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}),
622
+ "use_m2m": ("BOOLEAN", {"default": False}),
623
+ "keep_model_loaded": ("BOOLEAN", {"default": True}),
624
+ },
625
+ }
626
+
627
+ RETURN_TYPES = ("MASK", "IMAGE", "BBOX",)
628
+ RETURN_NAMES =("mask", "segmented_image", "bbox" ,)
629
+ FUNCTION = "segment"
630
+ CATEGORY = "SAM2"
631
+
632
+ def segment(self, image, sam2_model, points_per_side, points_per_batch, pred_iou_thresh, stability_score_thresh,
633
+ stability_score_offset, crop_n_layers, box_nms_thresh, crop_n_points_downscale_factor, min_mask_region_area,
634
+ use_m2m, mask_threshold, crop_nms_thresh, crop_overlap_ratio, keep_model_loaded):
635
+ offload_device = mm.unet_offload_device()
636
+ model = sam2_model["model"]
637
+ device = sam2_model["device"]
638
+ dtype = sam2_model["dtype"]
639
+ segmentor = sam2_model["segmentor"]
640
+
641
+ if segmentor != 'automaskgenerator':
642
+ raise ValueError("Loaded model is not SAM2AutomaticMaskGenerator")
643
+
644
+ model.points_per_side=points_per_side
645
+ model.points_per_batch=points_per_batch
646
+ model.pred_iou_thresh=pred_iou_thresh
647
+ model.stability_score_thresh=stability_score_thresh
648
+ model.stability_score_offset=stability_score_offset
649
+ model.crop_n_layers=crop_n_layers
650
+ model.box_nms_thresh=box_nms_thresh
651
+ model.crop_n_points_downscale_factor=crop_n_points_downscale_factor
652
+ model.crop_nms_thresh=crop_nms_thresh
653
+ model.crop_overlap_ratio=crop_overlap_ratio
654
+ model.min_mask_region_area=min_mask_region_area
655
+ model.use_m2m=use_m2m
656
+ model.mask_threshold=mask_threshold
657
+
658
+ model.predictor.model.to(device)
659
+
660
+ B, H, W, C = image.shape
661
+ image_np = (image.contiguous() * 255).byte().numpy()
662
+
663
+ out_list = []
664
+ segment_out_list = []
665
+ mask_list=[]
666
+
667
+ pbar = ProgressBar(B)
668
+ autocast_condition = not mm.is_device_mps(device)
669
+ with torch.autocast(mm.get_autocast_device(device), dtype=dtype) if autocast_condition else nullcontext():
670
+ for img_np in image_np:
671
+ result_dict = model.generate(img_np)
672
+ mask_list = [item['segmentation'] for item in result_dict]
673
+ bbox_list = [item['bbox'] for item in result_dict]
674
+
675
+ # Generate random colors for each mask
676
+ num_masks = len(mask_list)
677
+ colors = [tuple(random.choices(range(256), k=3)) for _ in range(num_masks)]
678
+
679
+ # Create a blank image to overlay masks
680
+ overlay_image = np.zeros((H, W, 3), dtype=np.uint8)
681
+
682
+ # Create a combined mask initialized to zeros
683
+ combined_mask = np.zeros((H, W), dtype=np.uint8)
684
+
685
+ # Iterate through masks and color them
686
+ for mask, color in zip(mask_list, colors):
687
+
688
+ # Combine masks using logical OR
689
+ combined_mask = np.logical_or(combined_mask, mask).astype(np.uint8)
690
+
691
+ # Convert mask to numpy array
692
+ mask_np = mask.astype(np.uint8)
693
+
694
+ # Color the mask
695
+ colored_mask = np.zeros_like(overlay_image)
696
+ for i in range(3): # Apply color channel-wise
697
+ colored_mask[:, :, i] = mask_np * color[i]
698
+
699
+ # Blend the colored mask with the overlay image
700
+ overlay_image = np.where(colored_mask > 0, colored_mask, overlay_image)
701
+ out_list.append(torch.from_numpy(combined_mask))
702
+ segment_out_list.append(overlay_image)
703
+ pbar.update(1)
704
+
705
+ stacked_array = np.stack(segment_out_list, axis=0)
706
+ segment_image_tensor = torch.from_numpy(stacked_array).float() / 255
707
+
708
+ if not keep_model_loaded:
709
+ model.predictor.model.to(offload_device)
710
+
711
+ mask_tensor = torch.stack(out_list, dim=0)
712
+ return (mask_tensor.cpu().float(), segment_image_tensor.cpu().float(), bbox_list)
713
+
714
+ #WIP
715
+ # class OwlV2Detector:
716
+ # @classmethod
717
+ # def INPUT_TYPES(s):
718
+ # return {
719
+ # "required": {
720
+ # "image": ("IMAGE", ),
721
+ # },
722
+ # }
723
+
724
+ # RETURN_TYPES = ("MASK", )
725
+ # RETURN_NAMES =("mask", )
726
+ # FUNCTION = "segment"
727
+ # CATEGORY = "SAM2"
728
+
729
+ # def segment(self, image):
730
+ # from transformers import Owlv2Processor, Owlv2ForObjectDetection
731
+ # device = mm.get_torch_device()
732
+ # offload_device = mm.unet_offload_device()
733
+ # processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
734
+ # model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
735
+
736
+ # url = "http://images.cocodataset.org/val2017/000000039769.jpg"
737
+ # image = Image.open(requests.get(url, stream=True).raw)
738
+ # texts = [["a photo of a cat", "a photo of a dog"]]
739
+ # inputs = processor(text=texts, images=image, return_tensors="pt")
740
+ # outputs = model(**inputs)
741
+
742
+ # # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
743
+ # target_sizes = torch.Tensor([image.size[::-1]])
744
+ # # Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
745
+ # results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
746
+ # i = 0 # Retrieve predictions for the first image for the corresponding text queries
747
+ # text = texts[i]
748
+ # boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
749
+ # for box, score, label in zip(boxes, scores, labels):
750
+ # box = [round(i, 2) for i in box.tolist()]
751
+ # print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
752
+
753
+
754
+ # return (mask_tensor,)
755
+
756
+ NODE_CLASS_MAPPINGS = {
757
+ "DownloadAndLoadSAM2Model": DownloadAndLoadSAM2Model,
758
+ "Sam2Segmentation": Sam2Segmentation,
759
+ "Florence2toCoordinates": Florence2toCoordinates,
760
+ "Sam2AutoSegmentation": Sam2AutoSegmentation,
761
+ "Sam2VideoSegmentationAddPoints": Sam2VideoSegmentationAddPoints,
762
+ "Sam2VideoSegmentation": Sam2VideoSegmentation
763
+ }
764
+ NODE_DISPLAY_NAME_MAPPINGS = {
765
+ "DownloadAndLoadSAM2Model": "(Down)Load SAM2Model",
766
+ "Sam2Segmentation": "Sam2Segmentation",
767
+ "Florence2toCoordinates": "Florence2 Coordinates",
768
+ "Sam2AutoSegmentation": "Sam2AutoSegmentation",
769
+ "Sam2VideoSegmentationAddPoints": "Sam2VideoSegmentationAddPoints",
770
+ "Sam2VideoSegmentation": "Sam2VideoSegmentation"
771
+ }
custom_nodes/comfyui-segment-anything-2/pyproject.toml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "comfyui-segment-anything-2"
3
+ description = "Nodes to use [a/segment-anything-2](https://github.com/facebookresearch/segment-anything-2) for image or video segmentation."
4
+ version = "1.0.2"
5
+ license = {file = "LICENSE"}
6
+ dependencies = []
7
+
8
+ [project.urls]
9
+ Repository = "https://github.com/kijai/ComfyUI-segment-anything-2"
10
+ # Used by Comfy Registry https://comfyregistry.org
11
+
12
+ [tool.comfy]
13
+ PublisherId = "kijai"
14
+ DisplayName = "ComfyUI-segment-anything-2"
15
+ Icon = ""
custom_nodes/comfyui-segment-anything-2/readme.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # WORK IN PROGRESS
2
+
3
+ PointsEditor is now available for testing in KJNodes: https://github.com/kijai/ComfyUI-KJNodes
4
+
5
+ https://github.com/user-attachments/assets/c4a88647-679f-4cf2-ba1f-4fa8c7308c1e
6
+
7
+ https://github.com/user-attachments/assets/f15fafe8-72e8-41cc-b246-e947b1efe5ec
8
+
9
+ https://github.com/user-attachments/assets/c1efb595-0fb1-4ae7-b4fa-2def08eda0a8
10
+
11
+ For testing only currently.
12
+
13
+ Functional, but needs better coordinate selector.
14
+
15
+ For now mask postprocessing is disabled due to it needing cuda extension compilation. We can use other nodes for this purpose anyway, so might leave it that way, we'll see.
16
+
17
+ Models are automatically downloade from https://huggingface.co/Kijai/sam2-safetensors/tree/main
18
+
19
+ to `ComfyUI/models/sam2`
20
+
21
+
22
+
23
+ Original repo:
24
+
25
+ https://github.com/facebookresearch/segment-anything-2
custom_nodes/comfyui-segment-anything-2/sam2/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
custom_nodes/comfyui-segment-anything-2/sam2/automatic_mask_generator.py ADDED
@@ -0,0 +1,436 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Adapted from https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/automatic_mask_generator.py
8
+ from typing import Any, Dict, List, Optional, Tuple
9
+
10
+ import numpy as np
11
+ import torch
12
+ from torchvision.ops.boxes import batched_nms, box_area # type: ignore
13
+
14
+ from ..sam2.modeling.sam2_base import SAM2Base
15
+ from ..sam2.sam2_image_predictor import SAM2ImagePredictor
16
+ from ..sam2.utils.amg import (
17
+ area_from_rle,
18
+ batch_iterator,
19
+ batched_mask_to_box,
20
+ box_xyxy_to_xywh,
21
+ build_all_layer_point_grids,
22
+ calculate_stability_score,
23
+ coco_encode_rle,
24
+ generate_crop_boxes,
25
+ is_box_near_crop_edge,
26
+ mask_to_rle_pytorch,
27
+ MaskData,
28
+ remove_small_regions,
29
+ rle_to_mask,
30
+ uncrop_boxes_xyxy,
31
+ uncrop_masks,
32
+ uncrop_points,
33
+ )
34
+
35
+
36
+ class SAM2AutomaticMaskGenerator:
37
+ def __init__(
38
+ self,
39
+ model: SAM2Base,
40
+ points_per_side: Optional[int] = 32,
41
+ points_per_batch: int = 64,
42
+ pred_iou_thresh: float = 0.8,
43
+ stability_score_thresh: float = 0.95,
44
+ stability_score_offset: float = 1.0,
45
+ mask_threshold: float = 0.0,
46
+ box_nms_thresh: float = 0.7,
47
+ crop_n_layers: int = 0,
48
+ crop_nms_thresh: float = 0.7,
49
+ crop_overlap_ratio: float = 512 / 1500,
50
+ crop_n_points_downscale_factor: int = 1,
51
+ point_grids: Optional[List[np.ndarray]] = None,
52
+ min_mask_region_area: int = 0,
53
+ output_mode: str = "binary_mask",
54
+ use_m2m: bool = False,
55
+ multimask_output: bool = True,
56
+ ) -> None:
57
+ """
58
+ Using a SAM 2 model, generates masks for the entire image.
59
+ Generates a grid of point prompts over the image, then filters
60
+ low quality and duplicate masks. The default settings are chosen
61
+ for SAM 2 with a HieraL backbone.
62
+
63
+ Arguments:
64
+ model (Sam): The SAM 2 model to use for mask prediction.
65
+ points_per_side (int or None): The number of points to be sampled
66
+ along one side of the image. The total number of points is
67
+ points_per_side**2. If None, 'point_grids' must provide explicit
68
+ point sampling.
69
+ points_per_batch (int): Sets the number of points run simultaneously
70
+ by the model. Higher numbers may be faster but use more GPU memory.
71
+ pred_iou_thresh (float): A filtering threshold in [0,1], using the
72
+ model's predicted mask quality.
73
+ stability_score_thresh (float): A filtering threshold in [0,1], using
74
+ the stability of the mask under changes to the cutoff used to binarize
75
+ the model's mask predictions.
76
+ stability_score_offset (float): The amount to shift the cutoff when
77
+ calculated the stability score.
78
+ mask_threshold (float): Threshold for binarizing the mask logits
79
+ box_nms_thresh (float): The box IoU cutoff used by non-maximal
80
+ suppression to filter duplicate masks.
81
+ crop_n_layers (int): If >0, mask prediction will be run again on
82
+ crops of the image. Sets the number of layers to run, where each
83
+ layer has 2**i_layer number of image crops.
84
+ crop_nms_thresh (float): The box IoU cutoff used by non-maximal
85
+ suppression to filter duplicate masks between different crops.
86
+ crop_overlap_ratio (float): Sets the degree to which crops overlap.
87
+ In the first crop layer, crops will overlap by this fraction of
88
+ the image length. Later layers with more crops scale down this overlap.
89
+ crop_n_points_downscale_factor (int): The number of points-per-side
90
+ sampled in layer n is scaled down by crop_n_points_downscale_factor**n.
91
+ point_grids (list(np.ndarray) or None): A list over explicit grids
92
+ of points used for sampling, normalized to [0,1]. The nth grid in the
93
+ list is used in the nth crop layer. Exclusive with points_per_side.
94
+ min_mask_region_area (int): If >0, postprocessing will be applied
95
+ to remove disconnected regions and holes in masks with area smaller
96
+ than min_mask_region_area. Requires opencv.
97
+ output_mode (str): The form masks are returned in. Can be 'binary_mask',
98
+ 'uncompressed_rle', or 'coco_rle'. 'coco_rle' requires pycocotools.
99
+ For large resolutions, 'binary_mask' may consume large amounts of
100
+ memory.
101
+ use_m2m (bool): Whether to add a one step refinement using previous mask predictions.
102
+ multimask_output (bool): Whether to output multimask at each point of the grid.
103
+ """
104
+
105
+ assert (points_per_side is None) != (
106
+ point_grids is None
107
+ ), "Exactly one of points_per_side or point_grid must be provided."
108
+ if points_per_side is not None:
109
+ self.point_grids = build_all_layer_point_grids(
110
+ points_per_side,
111
+ crop_n_layers,
112
+ crop_n_points_downscale_factor,
113
+ )
114
+ elif point_grids is not None:
115
+ self.point_grids = point_grids
116
+ else:
117
+ raise ValueError("Can't have both points_per_side and point_grid be None.")
118
+
119
+ assert output_mode in [
120
+ "binary_mask",
121
+ "uncompressed_rle",
122
+ "coco_rle",
123
+ ], f"Unknown output_mode {output_mode}."
124
+ if output_mode == "coco_rle":
125
+ try:
126
+ from pycocotools import mask as mask_utils # type: ignore # noqa: F401
127
+ except ImportError as e:
128
+ print("Please install pycocotools")
129
+ raise e
130
+
131
+ self.predictor = SAM2ImagePredictor(
132
+ model,
133
+ max_hole_area=min_mask_region_area,
134
+ max_sprinkle_area=min_mask_region_area,
135
+ )
136
+ self.points_per_batch = points_per_batch
137
+ self.pred_iou_thresh = pred_iou_thresh
138
+ self.stability_score_thresh = stability_score_thresh
139
+ self.stability_score_offset = stability_score_offset
140
+ self.mask_threshold = mask_threshold
141
+ self.box_nms_thresh = box_nms_thresh
142
+ self.crop_n_layers = crop_n_layers
143
+ self.crop_nms_thresh = crop_nms_thresh
144
+ self.crop_overlap_ratio = crop_overlap_ratio
145
+ self.crop_n_points_downscale_factor = crop_n_points_downscale_factor
146
+ self.min_mask_region_area = min_mask_region_area
147
+ self.output_mode = output_mode
148
+ self.use_m2m = use_m2m
149
+ self.multimask_output = multimask_output
150
+
151
+ @torch.no_grad()
152
+ def generate(self, image: np.ndarray) -> List[Dict[str, Any]]:
153
+ """
154
+ Generates masks for the given image.
155
+
156
+ Arguments:
157
+ image (np.ndarray): The image to generate masks for, in HWC uint8 format.
158
+
159
+ Returns:
160
+ list(dict(str, any)): A list over records for masks. Each record is
161
+ a dict containing the following keys:
162
+ segmentation (dict(str, any) or np.ndarray): The mask. If
163
+ output_mode='binary_mask', is an array of shape HW. Otherwise,
164
+ is a dictionary containing the RLE.
165
+ bbox (list(float)): The box around the mask, in XYWH format.
166
+ area (int): The area in pixels of the mask.
167
+ predicted_iou (float): The model's own prediction of the mask's
168
+ quality. This is filtered by the pred_iou_thresh parameter.
169
+ point_coords (list(list(float))): The point coordinates input
170
+ to the model to generate this mask.
171
+ stability_score (float): A measure of the mask's quality. This
172
+ is filtered on using the stability_score_thresh parameter.
173
+ crop_box (list(float)): The crop of the image used to generate
174
+ the mask, given in XYWH format.
175
+ """
176
+
177
+ # Generate masks
178
+ mask_data = self._generate_masks(image)
179
+
180
+ # Encode masks
181
+ if self.output_mode == "coco_rle":
182
+ mask_data["segmentations"] = [
183
+ coco_encode_rle(rle) for rle in mask_data["rles"]
184
+ ]
185
+ elif self.output_mode == "binary_mask":
186
+ mask_data["segmentations"] = [rle_to_mask(rle) for rle in mask_data["rles"]]
187
+ else:
188
+ mask_data["segmentations"] = mask_data["rles"]
189
+
190
+ # Write mask records
191
+ curr_anns = []
192
+ for idx in range(len(mask_data["segmentations"])):
193
+ ann = {
194
+ "segmentation": mask_data["segmentations"][idx],
195
+ "area": area_from_rle(mask_data["rles"][idx]),
196
+ "bbox": box_xyxy_to_xywh(mask_data["boxes"][idx]).tolist(),
197
+ "predicted_iou": mask_data["iou_preds"][idx].item(),
198
+ "point_coords": [mask_data["points"][idx].tolist()],
199
+ "stability_score": mask_data["stability_score"][idx].item(),
200
+ "crop_box": box_xyxy_to_xywh(mask_data["crop_boxes"][idx]).tolist(),
201
+ }
202
+ curr_anns.append(ann)
203
+
204
+ return curr_anns
205
+
206
+ def _generate_masks(self, image: np.ndarray) -> MaskData:
207
+ orig_size = image.shape[:2]
208
+ crop_boxes, layer_idxs = generate_crop_boxes(
209
+ orig_size, self.crop_n_layers, self.crop_overlap_ratio
210
+ )
211
+
212
+ # Iterate over image crops
213
+ data = MaskData()
214
+ for crop_box, layer_idx in zip(crop_boxes, layer_idxs):
215
+ crop_data = self._process_crop(image, crop_box, layer_idx, orig_size)
216
+ data.cat(crop_data)
217
+
218
+ # Remove duplicate masks between crops
219
+ if len(crop_boxes) > 1:
220
+ # Prefer masks from smaller crops
221
+ scores = 1 / box_area(data["crop_boxes"])
222
+ scores = scores.to(data["boxes"].device)
223
+ keep_by_nms = batched_nms(
224
+ data["boxes"].float(),
225
+ scores,
226
+ torch.zeros_like(data["boxes"][:, 0]), # categories
227
+ iou_threshold=self.crop_nms_thresh,
228
+ )
229
+ data.filter(keep_by_nms)
230
+ data.to_numpy()
231
+ return data
232
+
233
+ def _process_crop(
234
+ self,
235
+ image: np.ndarray,
236
+ crop_box: List[int],
237
+ crop_layer_idx: int,
238
+ orig_size: Tuple[int, ...],
239
+ ) -> MaskData:
240
+ # Crop the image and calculate embeddings
241
+ x0, y0, x1, y1 = crop_box
242
+ cropped_im = image[y0:y1, x0:x1, :]
243
+ cropped_im_size = cropped_im.shape[:2]
244
+ self.predictor.set_image(cropped_im)
245
+
246
+ # Get points for this crop
247
+ points_scale = np.array(cropped_im_size)[None, ::-1]
248
+ points_for_image = self.point_grids[crop_layer_idx] * points_scale
249
+
250
+ # Generate masks for this crop in batches
251
+ data = MaskData()
252
+ for (points,) in batch_iterator(self.points_per_batch, points_for_image):
253
+ batch_data = self._process_batch(
254
+ points, cropped_im_size, crop_box, orig_size, normalize=True
255
+ )
256
+ data.cat(batch_data)
257
+ del batch_data
258
+ self.predictor.reset_predictor()
259
+
260
+ # Remove duplicates within this crop.
261
+ keep_by_nms = batched_nms(
262
+ data["boxes"].float(),
263
+ data["iou_preds"],
264
+ torch.zeros_like(data["boxes"][:, 0]), # categories
265
+ iou_threshold=self.box_nms_thresh,
266
+ )
267
+ data.filter(keep_by_nms)
268
+
269
+ # Return to the original image frame
270
+ data["boxes"] = uncrop_boxes_xyxy(data["boxes"], crop_box)
271
+ data["points"] = uncrop_points(data["points"], crop_box)
272
+ data["crop_boxes"] = torch.tensor([crop_box for _ in range(len(data["rles"]))])
273
+
274
+ return data
275
+
276
+ def _process_batch(
277
+ self,
278
+ points: np.ndarray,
279
+ im_size: Tuple[int, ...],
280
+ crop_box: List[int],
281
+ orig_size: Tuple[int, ...],
282
+ normalize=False,
283
+ ) -> MaskData:
284
+ orig_h, orig_w = orig_size
285
+
286
+ # Run model on this batch
287
+ points = torch.as_tensor(
288
+ points, dtype=torch.float32, device=self.predictor.device
289
+ )
290
+ in_points = self.predictor._transforms.transform_coords(
291
+ points, normalize=normalize, orig_hw=im_size
292
+ )
293
+ in_labels = torch.ones(
294
+ in_points.shape[0], dtype=torch.int, device=in_points.device
295
+ )
296
+ masks, iou_preds, low_res_masks = self.predictor._predict(
297
+ in_points[:, None, :],
298
+ in_labels[:, None],
299
+ multimask_output=self.multimask_output,
300
+ return_logits=True,
301
+ )
302
+
303
+ # Serialize predictions and store in MaskData
304
+ data = MaskData(
305
+ masks=masks.flatten(0, 1),
306
+ iou_preds=iou_preds.flatten(0, 1),
307
+ points=points.repeat_interleave(masks.shape[1], dim=0),
308
+ low_res_masks=low_res_masks.flatten(0, 1),
309
+ )
310
+ del masks
311
+
312
+ if not self.use_m2m:
313
+ # Filter by predicted IoU
314
+ if self.pred_iou_thresh > 0.0:
315
+ keep_mask = data["iou_preds"] > self.pred_iou_thresh
316
+ data.filter(keep_mask)
317
+
318
+ # Calculate and filter by stability score
319
+ data["stability_score"] = calculate_stability_score(
320
+ data["masks"], self.mask_threshold, self.stability_score_offset
321
+ )
322
+ if self.stability_score_thresh > 0.0:
323
+ keep_mask = data["stability_score"] >= self.stability_score_thresh
324
+ data.filter(keep_mask)
325
+ else:
326
+ # One step refinement using previous mask predictions
327
+ in_points = self.predictor._transforms.transform_coords(
328
+ data["points"], normalize=normalize, orig_hw=im_size
329
+ )
330
+ labels = torch.ones(
331
+ in_points.shape[0], dtype=torch.int, device=in_points.device
332
+ )
333
+ masks, ious = self.refine_with_m2m(
334
+ in_points, labels, data["low_res_masks"], self.points_per_batch
335
+ )
336
+ data["masks"] = masks.squeeze(1)
337
+ data["iou_preds"] = ious.squeeze(1)
338
+
339
+ if self.pred_iou_thresh > 0.0:
340
+ keep_mask = data["iou_preds"] > self.pred_iou_thresh
341
+ data.filter(keep_mask)
342
+
343
+ data["stability_score"] = calculate_stability_score(
344
+ data["masks"], self.mask_threshold, self.stability_score_offset
345
+ )
346
+ if self.stability_score_thresh > 0.0:
347
+ keep_mask = data["stability_score"] >= self.stability_score_thresh
348
+ data.filter(keep_mask)
349
+
350
+ # Threshold masks and calculate boxes
351
+ data["masks"] = data["masks"] > self.mask_threshold
352
+ data["boxes"] = batched_mask_to_box(data["masks"])
353
+
354
+ # Filter boxes that touch crop boundaries
355
+ keep_mask = ~is_box_near_crop_edge(
356
+ data["boxes"], crop_box, [0, 0, orig_w, orig_h]
357
+ )
358
+ if not torch.all(keep_mask):
359
+ data.filter(keep_mask)
360
+
361
+ # Compress to RLE
362
+ data["masks"] = uncrop_masks(data["masks"], crop_box, orig_h, orig_w)
363
+ data["rles"] = mask_to_rle_pytorch(data["masks"])
364
+ del data["masks"]
365
+
366
+ return data
367
+
368
+ @staticmethod
369
+ def postprocess_small_regions(
370
+ mask_data: MaskData, min_area: int, nms_thresh: float
371
+ ) -> MaskData:
372
+ """
373
+ Removes small disconnected regions and holes in masks, then reruns
374
+ box NMS to remove any new duplicates.
375
+
376
+ Edits mask_data in place.
377
+
378
+ Requires open-cv as a dependency.
379
+ """
380
+ if len(mask_data["rles"]) == 0:
381
+ return mask_data
382
+
383
+ # Filter small disconnected regions and holes
384
+ new_masks = []
385
+ scores = []
386
+ for rle in mask_data["rles"]:
387
+ mask = rle_to_mask(rle)
388
+
389
+ mask, changed = remove_small_regions(mask, min_area, mode="holes")
390
+ unchanged = not changed
391
+ mask, changed = remove_small_regions(mask, min_area, mode="islands")
392
+ unchanged = unchanged and not changed
393
+
394
+ new_masks.append(torch.as_tensor(mask).unsqueeze(0))
395
+ # Give score=0 to changed masks and score=1 to unchanged masks
396
+ # so NMS will prefer ones that didn't need postprocessing
397
+ scores.append(float(unchanged))
398
+
399
+ # Recalculate boxes and remove any new duplicates
400
+ masks = torch.cat(new_masks, dim=0)
401
+ boxes = batched_mask_to_box(masks)
402
+ keep_by_nms = batched_nms(
403
+ boxes.float(),
404
+ torch.as_tensor(scores),
405
+ torch.zeros_like(boxes[:, 0]), # categories
406
+ iou_threshold=nms_thresh,
407
+ )
408
+
409
+ # Only recalculate RLEs for masks that have changed
410
+ for i_mask in keep_by_nms:
411
+ if scores[i_mask] == 0.0:
412
+ mask_torch = masks[i_mask].unsqueeze(0)
413
+ mask_data["rles"][i_mask] = mask_to_rle_pytorch(mask_torch)[0]
414
+ mask_data["boxes"][i_mask] = boxes[i_mask] # update res directly
415
+ mask_data.filter(keep_by_nms)
416
+
417
+ return mask_data
418
+
419
+ def refine_with_m2m(self, points, point_labels, low_res_masks, points_per_batch):
420
+ new_masks = []
421
+ new_iou_preds = []
422
+
423
+ for cur_points, cur_point_labels, low_res_mask in batch_iterator(
424
+ points_per_batch, points, point_labels, low_res_masks
425
+ ):
426
+ best_masks, best_iou_preds, _ = self.predictor._predict(
427
+ cur_points[:, None, :],
428
+ cur_point_labels[:, None],
429
+ mask_input=low_res_mask[:, None, :],
430
+ multimask_output=False,
431
+ return_logits=True,
432
+ )
433
+ new_masks.append(best_masks)
434
+ new_iou_preds.append(best_iou_preds)
435
+ masks = torch.cat(new_masks, dim=0)
436
+ return masks, torch.cat(new_iou_preds, dim=0)
custom_nodes/comfyui-segment-anything-2/sam2/modeling/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
custom_nodes/comfyui-segment-anything-2/sam2/modeling/backbones/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
custom_nodes/comfyui-segment-anything-2/sam2/modeling/backbones/hieradet.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from functools import partial
8
+ from typing import List, Tuple, Union
9
+
10
+ import torch
11
+ import torch.nn as nn
12
+ import torch.nn.functional as F
13
+ #from iopath.common.file_io import g_pathmgr
14
+
15
+ from ....sam2.modeling.backbones.utils import (
16
+ PatchEmbed,
17
+ window_partition,
18
+ window_unpartition,
19
+ )
20
+
21
+ from ....sam2.modeling.sam2_utils import DropPath, MLP
22
+
23
+
24
+ def do_pool(x: torch.Tensor, pool: nn.Module, norm: nn.Module = None) -> torch.Tensor:
25
+ if pool is None:
26
+ return x
27
+ # (B, H, W, C) -> (B, C, H, W)
28
+ x = x.permute(0, 3, 1, 2)
29
+ x = pool(x)
30
+ # (B, C, H', W') -> (B, H', W', C)
31
+ x = x.permute(0, 2, 3, 1)
32
+ if norm:
33
+ x = norm(x)
34
+
35
+ return x
36
+
37
+
38
+ class MultiScaleAttention(nn.Module):
39
+ def __init__(
40
+ self,
41
+ dim: int,
42
+ dim_out: int,
43
+ num_heads: int,
44
+ q_pool: nn.Module = None,
45
+ ):
46
+ super().__init__()
47
+
48
+ self.dim = dim
49
+ self.dim_out = dim_out
50
+ self.num_heads = num_heads
51
+ self.q_pool = q_pool
52
+ self.qkv = nn.Linear(dim, dim_out * 3)
53
+ self.proj = nn.Linear(dim_out, dim_out)
54
+
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ B, H, W, _ = x.shape
57
+ # qkv with shape (B, H * W, 3, nHead, C)
58
+ qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1)
59
+ # q, k, v with shape (B, H * W, nheads, C)
60
+ q, k, v = torch.unbind(qkv, 2)
61
+
62
+ # Q pooling (for downsample at stage changes)
63
+ if self.q_pool:
64
+ q = do_pool(q.reshape(B, H, W, -1), self.q_pool)
65
+ H, W = q.shape[1:3] # downsampled shape
66
+ q = q.reshape(B, H * W, self.num_heads, -1)
67
+
68
+ # Torch's SDPA expects [B, nheads, H*W, C] so we transpose
69
+ x = F.scaled_dot_product_attention(
70
+ q.transpose(1, 2),
71
+ k.transpose(1, 2),
72
+ v.transpose(1, 2),
73
+ )
74
+ # Transpose back
75
+ x = x.transpose(1, 2)
76
+ x = x.reshape(B, H, W, -1)
77
+
78
+ x = self.proj(x)
79
+
80
+ return x
81
+
82
+
83
+ class MultiScaleBlock(nn.Module):
84
+ def __init__(
85
+ self,
86
+ dim: int,
87
+ dim_out: int,
88
+ num_heads: int,
89
+ mlp_ratio: float = 4.0,
90
+ drop_path: float = 0.0,
91
+ norm_layer: Union[nn.Module, str] = "LayerNorm",
92
+ q_stride: Tuple[int, int] = None,
93
+ act_layer: nn.Module = nn.GELU,
94
+ window_size: int = 0,
95
+ ):
96
+ super().__init__()
97
+
98
+ if isinstance(norm_layer, str):
99
+ norm_layer = partial(getattr(nn, norm_layer), eps=1e-6)
100
+
101
+ self.dim = dim
102
+ self.dim_out = dim_out
103
+ self.norm1 = norm_layer(dim)
104
+
105
+ self.window_size = window_size
106
+
107
+ self.pool, self.q_stride = None, q_stride
108
+ if self.q_stride:
109
+ self.pool = nn.MaxPool2d(
110
+ kernel_size=q_stride, stride=q_stride, ceil_mode=False
111
+ )
112
+
113
+ self.attn = MultiScaleAttention(
114
+ dim,
115
+ dim_out,
116
+ num_heads=num_heads,
117
+ q_pool=self.pool,
118
+ )
119
+ self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
120
+
121
+ self.norm2 = norm_layer(dim_out)
122
+ self.mlp = MLP(
123
+ dim_out,
124
+ int(dim_out * mlp_ratio),
125
+ dim_out,
126
+ num_layers=2,
127
+ activation=act_layer,
128
+ )
129
+
130
+ if dim != dim_out:
131
+ self.proj = nn.Linear(dim, dim_out)
132
+
133
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
134
+ shortcut = x # B, H, W, C
135
+ x = self.norm1(x)
136
+
137
+ # Skip connection
138
+ if self.dim != self.dim_out:
139
+ shortcut = do_pool(self.proj(x), self.pool)
140
+
141
+ # Window partition
142
+ window_size = self.window_size
143
+ if window_size > 0:
144
+ H, W = x.shape[1], x.shape[2]
145
+ x, pad_hw = window_partition(x, window_size)
146
+
147
+ # Window Attention + Q Pooling (if stage change)
148
+ x = self.attn(x)
149
+ if self.q_stride:
150
+ # Shapes have changed due to Q pooling
151
+ window_size = self.window_size // self.q_stride[0]
152
+ H, W = shortcut.shape[1:3]
153
+
154
+ pad_h = (window_size - H % window_size) % window_size
155
+ pad_w = (window_size - W % window_size) % window_size
156
+ pad_hw = (H + pad_h, W + pad_w)
157
+
158
+ # Reverse window partition
159
+ if self.window_size > 0:
160
+ x = window_unpartition(x, window_size, pad_hw, (H, W))
161
+
162
+ x = shortcut + self.drop_path(x)
163
+ # MLP
164
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
165
+ return x
166
+
167
+
168
+ class Hiera(nn.Module):
169
+ """
170
+ Reference: https://arxiv.org/abs/2306.00989
171
+ """
172
+
173
+ def __init__(
174
+ self,
175
+ embed_dim: int = 96, # initial embed dim
176
+ num_heads: int = 1, # initial number of heads
177
+ drop_path_rate: float = 0.0, # stochastic depth
178
+ q_pool: int = 3, # number of q_pool stages
179
+ q_stride: Tuple[int, int] = (2, 2), # downsample stride bet. stages
180
+ stages: Tuple[int, ...] = (2, 3, 16, 3), # blocks per stage
181
+ dim_mul: float = 2.0, # dim_mul factor at stage shift
182
+ head_mul: float = 2.0, # head_mul factor at stage shift
183
+ window_pos_embed_bkg_spatial_size: Tuple[int, int] = (14, 14),
184
+ # window size per stage, when not using global att.
185
+ window_spec: Tuple[int, ...] = (
186
+ 8,
187
+ 4,
188
+ 14,
189
+ 7,
190
+ ),
191
+ # global attn in these blocks
192
+ global_att_blocks: Tuple[int, ...] = (
193
+ 12,
194
+ 16,
195
+ 20,
196
+ ),
197
+ weights_path=None,
198
+ return_interm_layers=True, # return feats from every stage
199
+ ):
200
+ super().__init__()
201
+
202
+ assert len(stages) == len(window_spec)
203
+ self.window_spec = window_spec
204
+
205
+ depth = sum(stages)
206
+ self.q_stride = q_stride
207
+ self.stage_ends = [sum(stages[:i]) - 1 for i in range(1, len(stages) + 1)]
208
+ assert 0 <= q_pool <= len(self.stage_ends[:-1])
209
+ self.q_pool_blocks = [x + 1 for x in self.stage_ends[:-1]][:q_pool]
210
+ self.return_interm_layers = return_interm_layers
211
+
212
+ self.patch_embed = PatchEmbed(
213
+ embed_dim=embed_dim,
214
+ )
215
+ # Which blocks have global att?
216
+ self.global_att_blocks = global_att_blocks
217
+
218
+ # Windowed positional embedding (https://arxiv.org/abs/2311.05613)
219
+ self.window_pos_embed_bkg_spatial_size = window_pos_embed_bkg_spatial_size
220
+ self.pos_embed = nn.Parameter(
221
+ torch.zeros(1, embed_dim, *self.window_pos_embed_bkg_spatial_size)
222
+ )
223
+ self.pos_embed_window = nn.Parameter(
224
+ torch.zeros(1, embed_dim, self.window_spec[0], self.window_spec[0])
225
+ )
226
+
227
+ dpr = [
228
+ x.item() for x in torch.linspace(0, drop_path_rate, depth)
229
+ ] # stochastic depth decay rule
230
+
231
+ cur_stage = 1
232
+ self.blocks = nn.ModuleList()
233
+
234
+ for i in range(depth):
235
+ dim_out = embed_dim
236
+ # lags by a block, so first block of
237
+ # next stage uses an initial window size
238
+ # of previous stage and final window size of current stage
239
+ window_size = self.window_spec[cur_stage - 1]
240
+
241
+ if self.global_att_blocks is not None:
242
+ window_size = 0 if i in self.global_att_blocks else window_size
243
+
244
+ if i - 1 in self.stage_ends:
245
+ dim_out = int(embed_dim * dim_mul)
246
+ num_heads = int(num_heads * head_mul)
247
+ cur_stage += 1
248
+
249
+ block = MultiScaleBlock(
250
+ dim=embed_dim,
251
+ dim_out=dim_out,
252
+ num_heads=num_heads,
253
+ drop_path=dpr[i],
254
+ q_stride=self.q_stride if i in self.q_pool_blocks else None,
255
+ window_size=window_size,
256
+ )
257
+
258
+ embed_dim = dim_out
259
+ self.blocks.append(block)
260
+
261
+ self.channel_list = (
262
+ [self.blocks[i].dim_out for i in self.stage_ends[::-1]]
263
+ if return_interm_layers
264
+ else [self.blocks[-1].dim_out]
265
+ )
266
+
267
+ # if weights_path is not None:
268
+ # with g_pathmgr.open(weights_path, "rb") as f:
269
+ # chkpt = torch.load(f, map_location="cpu")
270
+ # logging.info("loading Hiera", self.load_state_dict(chkpt, strict=False))
271
+
272
+ def _get_pos_embed(self, hw: Tuple[int, int]) -> torch.Tensor:
273
+ h, w = hw
274
+ window_embed = self.pos_embed_window
275
+ pos_embed = F.interpolate(self.pos_embed, size=(h, w), mode="bicubic")
276
+ pos_embed = pos_embed + window_embed.tile(
277
+ [x // y for x, y in zip(pos_embed.shape, window_embed.shape)]
278
+ )
279
+ pos_embed = pos_embed.permute(0, 2, 3, 1)
280
+ return pos_embed
281
+
282
+ def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
283
+ x = self.patch_embed(x)
284
+ # x: (B, H, W, C)
285
+
286
+ # Add pos embed
287
+ x = x + self._get_pos_embed(x.shape[1:3])
288
+
289
+ outputs = []
290
+ for i, blk in enumerate(self.blocks):
291
+ x = blk(x)
292
+ if (i == self.stage_ends[-1]) or (
293
+ i in self.stage_ends and self.return_interm_layers
294
+ ):
295
+ feats = x.permute(0, 3, 1, 2)
296
+ outputs.append(feats)
297
+
298
+ return outputs
299
+
300
+ def get_layer_id(self, layer_name):
301
+ # https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33
302
+ num_layers = self.get_num_layers()
303
+
304
+ if layer_name.find("rel_pos") != -1:
305
+ return num_layers + 1
306
+ elif layer_name.find("pos_embed") != -1:
307
+ return 0
308
+ elif layer_name.find("patch_embed") != -1:
309
+ return 0
310
+ elif layer_name.find("blocks") != -1:
311
+ return int(layer_name.split("blocks")[1].split(".")[1]) + 1
312
+ else:
313
+ return num_layers + 1
314
+
315
+ def get_num_layers(self) -> int:
316
+ return len(self.blocks)
custom_nodes/comfyui-segment-anything-2/sam2/modeling/backbones/image_encoder.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from typing import List, Optional
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+
13
+
14
+ class ImageEncoder(nn.Module):
15
+ def __init__(
16
+ self,
17
+ trunk: nn.Module,
18
+ neck: nn.Module,
19
+ scalp: int = 0,
20
+ ):
21
+ super().__init__()
22
+ self.trunk = trunk
23
+ self.neck = neck
24
+ self.scalp = scalp
25
+ assert (
26
+ self.trunk.channel_list == self.neck.backbone_channel_list
27
+ ), f"Channel dims of trunk and neck do not match. Trunk: {self.trunk.channel_list}, neck: {self.neck.backbone_channel_list}"
28
+
29
+ def forward(self, sample: torch.Tensor):
30
+ # Forward through backbone
31
+ features, pos = self.neck(self.trunk(sample))
32
+ if self.scalp > 0:
33
+ # Discard the lowest resolution features
34
+ features, pos = features[: -self.scalp], pos[: -self.scalp]
35
+
36
+ src = features[-1]
37
+ output = {
38
+ "vision_features": src,
39
+ "vision_pos_enc": pos,
40
+ "backbone_fpn": features,
41
+ }
42
+ return output
43
+
44
+
45
+ class FpnNeck(nn.Module):
46
+ """
47
+ A modified variant of Feature Pyramid Network (FPN) neck
48
+ (we remove output conv and also do bicubic interpolation similar to ViT
49
+ pos embed interpolation)
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ position_encoding: nn.Module,
55
+ d_model: int,
56
+ backbone_channel_list: List[int],
57
+ kernel_size: int = 1,
58
+ stride: int = 1,
59
+ padding: int = 0,
60
+ fpn_interp_model: str = "bilinear",
61
+ fuse_type: str = "sum",
62
+ fpn_top_down_levels: Optional[List[int]] = None,
63
+ ):
64
+ """Initialize the neck
65
+ :param trunk: the backbone
66
+ :param position_encoding: the positional encoding to use
67
+ :param d_model: the dimension of the model
68
+ :param neck_norm: the normalization to use
69
+ """
70
+ super().__init__()
71
+ self.position_encoding = position_encoding
72
+ self.convs = nn.ModuleList()
73
+ self.backbone_channel_list = backbone_channel_list
74
+ self.d_model = d_model
75
+ for dim in backbone_channel_list:
76
+ current = nn.Sequential()
77
+ current.add_module(
78
+ "conv",
79
+ nn.Conv2d(
80
+ in_channels=dim,
81
+ out_channels=d_model,
82
+ kernel_size=kernel_size,
83
+ stride=stride,
84
+ padding=padding,
85
+ ),
86
+ )
87
+
88
+ self.convs.append(current)
89
+ self.fpn_interp_model = fpn_interp_model
90
+ assert fuse_type in ["sum", "avg"]
91
+ self.fuse_type = fuse_type
92
+
93
+ # levels to have top-down features in its outputs
94
+ # e.g. if fpn_top_down_levels is [2, 3], then only outputs of level 2 and 3
95
+ # have top-down propagation, while outputs of level 0 and level 1 have only
96
+ # lateral features from the same backbone level.
97
+ if fpn_top_down_levels is None:
98
+ # default is to have top-down features on all levels
99
+ fpn_top_down_levels = range(len(self.convs))
100
+ self.fpn_top_down_levels = list(fpn_top_down_levels)
101
+
102
+ def forward(self, xs: List[torch.Tensor]):
103
+
104
+ out = [None] * len(self.convs)
105
+ pos = [None] * len(self.convs)
106
+ assert len(xs) == len(self.convs)
107
+ # fpn forward pass
108
+ # see https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/fpn.py
109
+ prev_features = None
110
+ # forward in top-down order (from low to high resolution)
111
+ n = len(self.convs) - 1
112
+ for i in range(n, -1, -1):
113
+ x = xs[i]
114
+ lateral_features = self.convs[n - i](x)
115
+ if i in self.fpn_top_down_levels and prev_features is not None:
116
+ top_down_features = F.interpolate(
117
+ prev_features.to(dtype=torch.float32),
118
+ scale_factor=2.0,
119
+ mode=self.fpn_interp_model,
120
+ align_corners=(
121
+ None if self.fpn_interp_model == "nearest" else False
122
+ ),
123
+ antialias=False,
124
+ )
125
+ prev_features = lateral_features + top_down_features
126
+ if self.fuse_type == "avg":
127
+ prev_features /= 2
128
+ else:
129
+ prev_features = lateral_features
130
+ x_out = prev_features
131
+ out[i] = x_out
132
+ pos[i] = self.position_encoding(x_out).to(x_out.dtype)
133
+
134
+ return out, pos
custom_nodes/comfyui-segment-anything-2/sam2/modeling/backbones/utils.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Some utilities for backbones, in particular for windowing"""
8
+
9
+ from typing import Tuple
10
+
11
+ import torch
12
+ import torch.nn as nn
13
+ import torch.nn.functional as F
14
+
15
+
16
+ def window_partition(x, window_size):
17
+ """
18
+ Partition into non-overlapping windows with padding if needed.
19
+ Args:
20
+ x (tensor): input tokens with [B, H, W, C].
21
+ window_size (int): window size.
22
+ Returns:
23
+ windows: windows after partition with [B * num_windows, window_size, window_size, C].
24
+ (Hp, Wp): padded height and width before partition
25
+ """
26
+ B, H, W, C = x.shape
27
+
28
+ pad_h = (window_size - H % window_size) % window_size
29
+ pad_w = (window_size - W % window_size) % window_size
30
+ if pad_h > 0 or pad_w > 0:
31
+ x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h))
32
+ Hp, Wp = H + pad_h, W + pad_w
33
+
34
+ x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C)
35
+ windows = (
36
+ x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
37
+ )
38
+ return windows, (Hp, Wp)
39
+
40
+
41
+ def window_unpartition(windows, window_size, pad_hw, hw):
42
+ """
43
+ Window unpartition into original sequences and removing padding.
44
+ Args:
45
+ x (tensor): input tokens with [B * num_windows, window_size, window_size, C].
46
+ window_size (int): window size.
47
+ pad_hw (Tuple): padded height and width (Hp, Wp).
48
+ hw (Tuple): original height and width (H, W) before padding.
49
+ Returns:
50
+ x: unpartitioned sequences with [B, H, W, C].
51
+ """
52
+ Hp, Wp = pad_hw
53
+ H, W = hw
54
+ B = windows.shape[0] // (Hp * Wp // window_size // window_size)
55
+ x = windows.view(
56
+ B, Hp // window_size, Wp // window_size, window_size, window_size, -1
57
+ )
58
+ x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1)
59
+
60
+ if Hp > H or Wp > W:
61
+ x = x[:, :H, :W, :].contiguous()
62
+ return x
63
+
64
+
65
+ class PatchEmbed(nn.Module):
66
+ """
67
+ Image to Patch Embedding.
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ kernel_size: Tuple[int, ...] = (7, 7),
73
+ stride: Tuple[int, ...] = (4, 4),
74
+ padding: Tuple[int, ...] = (3, 3),
75
+ in_chans: int = 3,
76
+ embed_dim: int = 768,
77
+ ):
78
+ """
79
+ Args:
80
+ kernel_size (Tuple): kernel size of the projection layer.
81
+ stride (Tuple): stride of the projection layer.
82
+ padding (Tuple): padding size of the projection layer.
83
+ in_chans (int): Number of input image channels.
84
+ embed_dim (int): embed_dim (int): Patch embedding dimension.
85
+ """
86
+ super().__init__()
87
+ self.proj = nn.Conv2d(
88
+ in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding
89
+ )
90
+
91
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
92
+ x = self.proj(x)
93
+ # B C H W -> B H W C
94
+ x = x.permute(0, 2, 3, 1)
95
+ return x
custom_nodes/comfyui-segment-anything-2/sam2/modeling/memory_attention.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from typing import Optional
8
+
9
+ import torch
10
+ from torch import nn, Tensor
11
+
12
+ from ...sam2.modeling.sam.transformer import RoPEAttention
13
+
14
+ from ...sam2.modeling.sam2_utils import get_activation_fn, get_clones
15
+
16
+
17
+ class MemoryAttentionLayer(nn.Module):
18
+
19
+ def __init__(
20
+ self,
21
+ activation: str,
22
+ cross_attention: nn.Module,
23
+ d_model: int,
24
+ dim_feedforward: int,
25
+ dropout: float,
26
+ pos_enc_at_attn: bool,
27
+ pos_enc_at_cross_attn_keys: bool,
28
+ pos_enc_at_cross_attn_queries: bool,
29
+ self_attention: nn.Module,
30
+ ):
31
+ super().__init__()
32
+ self.d_model = d_model
33
+ self.dim_feedforward = dim_feedforward
34
+ self.dropout_value = dropout
35
+ self.self_attn = self_attention
36
+ self.cross_attn_image = cross_attention
37
+
38
+ # Implementation of Feedforward model
39
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
40
+ self.dropout = nn.Dropout(dropout)
41
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
42
+
43
+ self.norm1 = nn.LayerNorm(d_model)
44
+ self.norm2 = nn.LayerNorm(d_model)
45
+ self.norm3 = nn.LayerNorm(d_model)
46
+ self.dropout1 = nn.Dropout(dropout)
47
+ self.dropout2 = nn.Dropout(dropout)
48
+ self.dropout3 = nn.Dropout(dropout)
49
+
50
+ self.activation_str = activation
51
+ self.activation = get_activation_fn(activation)
52
+
53
+ # Where to add pos enc
54
+ self.pos_enc_at_attn = pos_enc_at_attn
55
+ self.pos_enc_at_cross_attn_queries = pos_enc_at_cross_attn_queries
56
+ self.pos_enc_at_cross_attn_keys = pos_enc_at_cross_attn_keys
57
+
58
+ def _forward_sa(self, tgt, query_pos):
59
+ # Self-Attention
60
+ tgt2 = self.norm1(tgt)
61
+ q = k = tgt2 + query_pos if self.pos_enc_at_attn else tgt2
62
+ tgt2 = self.self_attn(q, k, v=tgt2)
63
+ tgt = tgt + self.dropout1(tgt2)
64
+ return tgt
65
+
66
+ def _forward_ca(self, tgt, memory, query_pos, pos, num_k_exclude_rope=0):
67
+ kwds = {}
68
+ if num_k_exclude_rope > 0:
69
+ assert isinstance(self.cross_attn_image, RoPEAttention)
70
+ kwds = {"num_k_exclude_rope": num_k_exclude_rope}
71
+
72
+ # Cross-Attention
73
+ tgt2 = self.norm2(tgt)
74
+ tgt2 = self.cross_attn_image(
75
+ q=tgt2 + query_pos if self.pos_enc_at_cross_attn_queries else tgt2,
76
+ k=memory + pos if self.pos_enc_at_cross_attn_keys else memory,
77
+ v=memory,
78
+ **kwds,
79
+ )
80
+ tgt = tgt + self.dropout2(tgt2)
81
+ return tgt
82
+
83
+ def forward(
84
+ self,
85
+ tgt,
86
+ memory,
87
+ pos: Optional[Tensor] = None,
88
+ query_pos: Optional[Tensor] = None,
89
+ num_k_exclude_rope: int = 0,
90
+ ) -> torch.Tensor:
91
+
92
+ # Self-Attn, Cross-Attn
93
+ tgt = self._forward_sa(tgt, query_pos)
94
+ tgt = self._forward_ca(tgt, memory, query_pos, pos, num_k_exclude_rope)
95
+ # MLP
96
+ tgt2 = self.norm3(tgt)
97
+ tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
98
+ tgt = tgt + self.dropout3(tgt2)
99
+ return tgt
100
+
101
+
102
+ class MemoryAttention(nn.Module):
103
+ def __init__(
104
+ self,
105
+ d_model: int,
106
+ pos_enc_at_input: bool,
107
+ layer: nn.Module,
108
+ num_layers: int,
109
+ batch_first: bool = True, # Do layers expect batch first input?
110
+ ):
111
+ super().__init__()
112
+ self.d_model = d_model
113
+ self.layers = get_clones(layer, num_layers)
114
+ self.num_layers = num_layers
115
+ self.norm = nn.LayerNorm(d_model)
116
+ self.pos_enc_at_input = pos_enc_at_input
117
+ self.batch_first = batch_first
118
+
119
+ def forward(
120
+ self,
121
+ curr: torch.Tensor, # self-attention inputs
122
+ memory: torch.Tensor, # cross-attention inputs
123
+ curr_pos: Optional[Tensor] = None, # pos_enc for self-attention inputs
124
+ memory_pos: Optional[Tensor] = None, # pos_enc for cross-attention inputs
125
+ num_obj_ptr_tokens: int = 0, # number of object pointer *tokens*
126
+ ):
127
+ if isinstance(curr, list):
128
+ assert isinstance(curr_pos, list)
129
+ assert len(curr) == len(curr_pos) == 1
130
+ curr, curr_pos = (
131
+ curr[0],
132
+ curr_pos[0],
133
+ )
134
+
135
+ assert (
136
+ curr.shape[1] == memory.shape[1]
137
+ ), "Batch size must be the same for curr and memory"
138
+
139
+ output = curr
140
+ if self.pos_enc_at_input and curr_pos is not None:
141
+ output = output + 0.1 * curr_pos
142
+
143
+ if self.batch_first:
144
+ # Convert to batch first
145
+ output = output.transpose(0, 1)
146
+ curr_pos = curr_pos.transpose(0, 1)
147
+ memory = memory.transpose(0, 1)
148
+ memory_pos = memory_pos.transpose(0, 1)
149
+
150
+ for layer in self.layers:
151
+ kwds = {}
152
+ if isinstance(layer.cross_attn_image, RoPEAttention):
153
+ kwds = {"num_k_exclude_rope": num_obj_ptr_tokens}
154
+
155
+ output = layer(
156
+ tgt=output,
157
+ memory=memory,
158
+ pos=memory_pos,
159
+ query_pos=curr_pos,
160
+ **kwds,
161
+ )
162
+ normed_output = self.norm(output)
163
+
164
+ if self.batch_first:
165
+ # Convert back to seq first
166
+ normed_output = normed_output.transpose(0, 1)
167
+ curr_pos = curr_pos.transpose(0, 1)
168
+
169
+ return normed_output
custom_nodes/comfyui-segment-anything-2/sam2/modeling/memory_encoder.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import math
8
+ from typing import Tuple
9
+
10
+ import torch
11
+ import torch.nn as nn
12
+ import torch.nn.functional as F
13
+
14
+ from ...sam2.modeling.sam2_utils import DropPath, get_clones, LayerNorm2d
15
+
16
+
17
+ class MaskDownSampler(nn.Module):
18
+ """
19
+ Progressively downsample a mask by total_stride, each time by stride.
20
+ Note that LayerNorm is applied per *token*, like in ViT.
21
+
22
+ With each downsample (by a factor stride**2), channel capacity increases by the same factor.
23
+ In the end, we linearly project to embed_dim channels.
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ embed_dim=256,
29
+ kernel_size=4,
30
+ stride=4,
31
+ padding=0,
32
+ total_stride=16,
33
+ activation=nn.GELU,
34
+ ):
35
+ super().__init__()
36
+ num_layers = int(math.log2(total_stride) // math.log2(stride))
37
+ assert stride**num_layers == total_stride
38
+ self.encoder = nn.Sequential()
39
+ mask_in_chans, mask_out_chans = 1, 1
40
+ for _ in range(num_layers):
41
+ mask_out_chans = mask_in_chans * (stride**2)
42
+ self.encoder.append(
43
+ nn.Conv2d(
44
+ mask_in_chans,
45
+ mask_out_chans,
46
+ kernel_size=kernel_size,
47
+ stride=stride,
48
+ padding=padding,
49
+ )
50
+ )
51
+ self.encoder.append(LayerNorm2d(mask_out_chans))
52
+ self.encoder.append(activation())
53
+ mask_in_chans = mask_out_chans
54
+
55
+ self.encoder.append(nn.Conv2d(mask_out_chans, embed_dim, kernel_size=1))
56
+
57
+ def forward(self, x):
58
+ return self.encoder(x)
59
+
60
+
61
+ # Lightly adapted from ConvNext (https://github.com/facebookresearch/ConvNeXt)
62
+ class CXBlock(nn.Module):
63
+ r"""ConvNeXt Block. There are two equivalent implementations:
64
+ (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
65
+ (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
66
+ We use (2) as we find it slightly faster in PyTorch
67
+
68
+ Args:
69
+ dim (int): Number of input channels.
70
+ drop_path (float): Stochastic depth rate. Default: 0.0
71
+ layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
72
+ """
73
+
74
+ def __init__(
75
+ self,
76
+ dim,
77
+ kernel_size=7,
78
+ padding=3,
79
+ drop_path=0.0,
80
+ layer_scale_init_value=1e-6,
81
+ use_dwconv=True,
82
+ ):
83
+ super().__init__()
84
+ self.dwconv = nn.Conv2d(
85
+ dim,
86
+ dim,
87
+ kernel_size=kernel_size,
88
+ padding=padding,
89
+ groups=dim if use_dwconv else 1,
90
+ ) # depthwise conv
91
+ self.norm = LayerNorm2d(dim, eps=1e-6)
92
+ self.pwconv1 = nn.Linear(
93
+ dim, 4 * dim
94
+ ) # pointwise/1x1 convs, implemented with linear layers
95
+ self.act = nn.GELU()
96
+ self.pwconv2 = nn.Linear(4 * dim, dim)
97
+ self.gamma = (
98
+ nn.Parameter(layer_scale_init_value * torch.ones((dim)), requires_grad=True)
99
+ if layer_scale_init_value > 0
100
+ else None
101
+ )
102
+ self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
103
+
104
+ def forward(self, x):
105
+ input = x
106
+ x = self.dwconv(x)
107
+ x = self.norm(x)
108
+ x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
109
+ x = self.pwconv1(x)
110
+ x = self.act(x)
111
+ x = self.pwconv2(x)
112
+ if self.gamma is not None:
113
+ x = self.gamma * x
114
+ x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
115
+
116
+ x = input + self.drop_path(x)
117
+ return x
118
+
119
+
120
+ class Fuser(nn.Module):
121
+ def __init__(self, layer, num_layers, dim=None, input_projection=False):
122
+ super().__init__()
123
+ self.proj = nn.Identity()
124
+ self.layers = get_clones(layer, num_layers)
125
+
126
+ if input_projection:
127
+ assert dim is not None
128
+ self.proj = nn.Conv2d(dim, dim, kernel_size=1)
129
+
130
+ def forward(self, x):
131
+ # normally x: (N, C, H, W)
132
+ x = self.proj(x)
133
+ for layer in self.layers:
134
+ x = layer(x)
135
+ return x
136
+
137
+
138
+ class MemoryEncoder(nn.Module):
139
+ def __init__(
140
+ self,
141
+ out_dim,
142
+ mask_downsampler,
143
+ fuser,
144
+ position_encoding,
145
+ in_dim=256, # in_dim of pix_feats
146
+ ):
147
+ super().__init__()
148
+
149
+ self.mask_downsampler = mask_downsampler
150
+
151
+ self.pix_feat_proj = nn.Conv2d(in_dim, in_dim, kernel_size=1)
152
+ self.fuser = fuser
153
+ self.position_encoding = position_encoding
154
+ self.out_proj = nn.Identity()
155
+ if out_dim != in_dim:
156
+ self.out_proj = nn.Conv2d(in_dim, out_dim, kernel_size=1)
157
+
158
+ def forward(
159
+ self,
160
+ pix_feat: torch.Tensor,
161
+ masks: torch.Tensor,
162
+ skip_mask_sigmoid: bool = False,
163
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
164
+ ## Process masks
165
+ # sigmoid, so that less domain shift from gt masks which are bool
166
+ if not skip_mask_sigmoid:
167
+ masks = F.sigmoid(masks)
168
+ masks = self.mask_downsampler(masks)
169
+
170
+ ## Fuse pix_feats and downsampled masks
171
+ # in case the visual features are on CPU, cast them to CUDA
172
+ pix_feat = pix_feat.to(masks.device)
173
+
174
+ x = self.pix_feat_proj(pix_feat)
175
+ x = x + masks
176
+ x = self.fuser(x)
177
+ x = self.out_proj(x)
178
+
179
+ pos = self.position_encoding(x).to(x.dtype)
180
+
181
+ return {"vision_features": x, "vision_pos_enc": [pos]}
custom_nodes/comfyui-segment-anything-2/sam2/modeling/position_encoding.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import math
8
+ from typing import Any, Optional, Tuple
9
+
10
+ import numpy as np
11
+
12
+ import torch
13
+ from torch import nn
14
+
15
+
16
+ class PositionEmbeddingSine(nn.Module):
17
+ """
18
+ This is a more standard version of the position embedding, very similar to the one
19
+ used by the Attention is all you need paper, generalized to work on images.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ num_pos_feats,
25
+ temperature: int = 10000,
26
+ normalize: bool = True,
27
+ scale: Optional[float] = None,
28
+ ):
29
+ super().__init__()
30
+ assert num_pos_feats % 2 == 0, "Expecting even model width"
31
+ self.num_pos_feats = num_pos_feats // 2
32
+ self.temperature = temperature
33
+ self.normalize = normalize
34
+ if scale is not None and normalize is False:
35
+ raise ValueError("normalize should be True if scale is passed")
36
+ if scale is None:
37
+ scale = 2 * math.pi
38
+ self.scale = scale
39
+
40
+ self.cache = {}
41
+
42
+ def _encode_xy(self, x, y):
43
+ # The positions are expected to be normalized
44
+ assert len(x) == len(y) and x.ndim == y.ndim == 1
45
+ x_embed = x * self.scale
46
+ y_embed = y * self.scale
47
+
48
+ dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
49
+ dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
50
+
51
+ pos_x = x_embed[:, None] / dim_t
52
+ pos_y = y_embed[:, None] / dim_t
53
+ pos_x = torch.stack(
54
+ (pos_x[:, 0::2].sin(), pos_x[:, 1::2].cos()), dim=2
55
+ ).flatten(1)
56
+ pos_y = torch.stack(
57
+ (pos_y[:, 0::2].sin(), pos_y[:, 1::2].cos()), dim=2
58
+ ).flatten(1)
59
+ return pos_x, pos_y
60
+
61
+ @torch.no_grad()
62
+ def encode_boxes(self, x, y, w, h):
63
+ pos_x, pos_y = self._encode_xy(x, y)
64
+ pos = torch.cat((pos_y, pos_x, h[:, None], w[:, None]), dim=1)
65
+ return pos
66
+
67
+ encode = encode_boxes # Backwards compatibility
68
+
69
+ @torch.no_grad()
70
+ def encode_points(self, x, y, labels):
71
+ (bx, nx), (by, ny), (bl, nl) = x.shape, y.shape, labels.shape
72
+ assert bx == by and nx == ny and bx == bl and nx == nl
73
+ pos_x, pos_y = self._encode_xy(x.flatten(), y.flatten())
74
+ pos_x, pos_y = pos_x.reshape(bx, nx, -1), pos_y.reshape(by, ny, -1)
75
+ pos = torch.cat((pos_y, pos_x, labels[:, :, None]), dim=2)
76
+ return pos
77
+
78
+ @torch.no_grad()
79
+ def forward(self, x: torch.Tensor):
80
+ cache_key = (x.shape[-2], x.shape[-1])
81
+ if cache_key in self.cache:
82
+ return self.cache[cache_key][None].repeat(x.shape[0], 1, 1, 1)
83
+ y_embed = (
84
+ torch.arange(1, x.shape[-2] + 1, dtype=torch.float32, device=x.device)
85
+ .view(1, -1, 1)
86
+ .repeat(x.shape[0], 1, x.shape[-1])
87
+ )
88
+ x_embed = (
89
+ torch.arange(1, x.shape[-1] + 1, dtype=torch.float32, device=x.device)
90
+ .view(1, 1, -1)
91
+ .repeat(x.shape[0], x.shape[-2], 1)
92
+ )
93
+
94
+ if self.normalize:
95
+ eps = 1e-6
96
+ y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
97
+ x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
98
+
99
+ dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
100
+ dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
101
+
102
+ pos_x = x_embed[:, :, :, None] / dim_t
103
+ pos_y = y_embed[:, :, :, None] / dim_t
104
+ pos_x = torch.stack(
105
+ (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
106
+ ).flatten(3)
107
+ pos_y = torch.stack(
108
+ (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
109
+ ).flatten(3)
110
+ pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
111
+ self.cache[cache_key] = pos[0]
112
+ return pos
113
+
114
+
115
+ class PositionEmbeddingRandom(nn.Module):
116
+ """
117
+ Positional encoding using random spatial frequencies.
118
+ """
119
+
120
+ def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None:
121
+ super().__init__()
122
+ if scale is None or scale <= 0.0:
123
+ scale = 1.0
124
+ self.register_buffer(
125
+ "positional_encoding_gaussian_matrix",
126
+ scale * torch.randn((2, num_pos_feats)),
127
+ )
128
+
129
+ def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
130
+ """Positionally encode points that are normalized to [0,1]."""
131
+ # assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
132
+ coords = 2 * coords - 1
133
+ coords = coords @ self.positional_encoding_gaussian_matrix
134
+ coords = 2 * np.pi * coords
135
+ # outputs d_1 x ... x d_n x C shape
136
+ return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)
137
+
138
+ def forward(self, size: Tuple[int, int]) -> torch.Tensor:
139
+ """Generate positional encoding for a grid of the specified size."""
140
+ h, w = size
141
+ device: Any = self.positional_encoding_gaussian_matrix.device
142
+ grid = torch.ones((h, w), device=device, dtype=torch.float32)
143
+ y_embed = grid.cumsum(dim=0) - 0.5
144
+ x_embed = grid.cumsum(dim=1) - 0.5
145
+ y_embed = y_embed / h
146
+ x_embed = x_embed / w
147
+
148
+ pe = self._pe_encoding(torch.stack([x_embed, y_embed], dim=-1))
149
+ return pe.permute(2, 0, 1) # C x H x W
150
+
151
+ def forward_with_coords(
152
+ self, coords_input: torch.Tensor, image_size: Tuple[int, int]
153
+ ) -> torch.Tensor:
154
+ """Positionally encode points that are not normalized to [0,1]."""
155
+ coords = coords_input.clone()
156
+ coords[:, :, 0] = coords[:, :, 0] / image_size[1]
157
+ coords[:, :, 1] = coords[:, :, 1] / image_size[0]
158
+ return self._pe_encoding(coords.to(torch.float)) # B x N x C
159
+
160
+
161
+ # Rotary Positional Encoding, adapted from:
162
+ # 1. https://github.com/meta-llama/codellama/blob/main/llama/model.py
163
+ # 2. https://github.com/naver-ai/rope-vit
164
+ # 3. https://github.com/lucidrains/rotary-embedding-torch
165
+
166
+
167
+ def init_t_xy(end_x: int, end_y: int):
168
+ t = torch.arange(end_x * end_y, dtype=torch.float32)
169
+ t_x = (t % end_x).float()
170
+ t_y = torch.div(t, end_x, rounding_mode="floor").float()
171
+ return t_x, t_y
172
+
173
+
174
+ def compute_axial_cis(dim: int, end_x: int, end_y: int, theta: float = 10000.0):
175
+ freqs_x = 1.0 / (theta ** (torch.arange(0, dim, 4)[: (dim // 4)].float() / dim))
176
+ freqs_y = 1.0 / (theta ** (torch.arange(0, dim, 4)[: (dim // 4)].float() / dim))
177
+
178
+ t_x, t_y = init_t_xy(end_x, end_y)
179
+ freqs_x = torch.outer(t_x, freqs_x)
180
+ freqs_y = torch.outer(t_y, freqs_y)
181
+ freqs_cis_x = torch.polar(torch.ones_like(freqs_x), freqs_x)
182
+ freqs_cis_y = torch.polar(torch.ones_like(freqs_y), freqs_y)
183
+ return torch.cat([freqs_cis_x, freqs_cis_y], dim=-1)
184
+
185
+
186
+ def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
187
+ ndim = x.ndim
188
+ assert 0 <= 1 < ndim
189
+ assert freqs_cis.shape == (x.shape[-2], x.shape[-1])
190
+ shape = [d if i >= ndim - 2 else 1 for i, d in enumerate(x.shape)]
191
+ return freqs_cis.view(*shape)
192
+
193
+
194
+ def apply_rotary_enc(
195
+ xq: torch.Tensor,
196
+ xk: torch.Tensor,
197
+ freqs_cis: torch.Tensor,
198
+ repeat_freqs_k: bool = False,
199
+ ):
200
+ xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
201
+ xk_ = (
202
+ torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
203
+ if xk.shape[-2] != 0
204
+ else None
205
+ )
206
+ freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
207
+ xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
208
+ if xk_ is None:
209
+ # no keys to rotate, due to dropout
210
+ return xq_out.type_as(xq).to(xq.device), xk
211
+ # repeat freqs along seq_len dim to match k seq_len
212
+ if repeat_freqs_k:
213
+ r = xk_.shape[-2] // xq_.shape[-2]
214
+ if freqs_cis.is_complex() and freqs_cis.device.type == "mps":
215
+ # MPS doesn't support repeat on complex; cat works fine.
216
+ freqs_cis = torch.cat([freqs_cis] * r, dim=-2)
217
+ else:
218
+ freqs_cis = freqs_cis.repeat(*([1] * (freqs_cis.ndim - 2)), r, 1)
219
+ xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
220
+ return xq_out.type_as(xq).to(xq.device), xk_out.type_as(xk).to(xk.device)
custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam/mask_decoder.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from typing import List, Optional, Tuple, Type
8
+
9
+ import torch
10
+ from torch import nn
11
+
12
+ from ....sam2.modeling.sam2_utils import LayerNorm2d, MLP
13
+
14
+
15
+ class MaskDecoder(nn.Module):
16
+ def __init__(
17
+ self,
18
+ *,
19
+ transformer_dim: int,
20
+ transformer: nn.Module,
21
+ num_multimask_outputs: int = 3,
22
+ activation: Type[nn.Module] = nn.GELU,
23
+ iou_head_depth: int = 3,
24
+ iou_head_hidden_dim: int = 256,
25
+ use_high_res_features: bool = False,
26
+ iou_prediction_use_sigmoid=False,
27
+ dynamic_multimask_via_stability=False,
28
+ dynamic_multimask_stability_delta=0.05,
29
+ dynamic_multimask_stability_thresh=0.98,
30
+ pred_obj_scores: bool = False,
31
+ pred_obj_scores_mlp: bool = False,
32
+ use_multimask_token_for_obj_ptr: bool = False,
33
+ ) -> None:
34
+ """
35
+ Predicts masks given an image and prompt embeddings, using a
36
+ transformer architecture.
37
+
38
+ Arguments:
39
+ transformer_dim (int): the channel dimension of the transformer
40
+ transformer (nn.Module): the transformer used to predict masks
41
+ num_multimask_outputs (int): the number of masks to predict
42
+ when disambiguating masks
43
+ activation (nn.Module): the type of activation to use when
44
+ upscaling masks
45
+ iou_head_depth (int): the depth of the MLP used to predict
46
+ mask quality
47
+ iou_head_hidden_dim (int): the hidden dimension of the MLP
48
+ used to predict mask quality
49
+ """
50
+ super().__init__()
51
+ self.transformer_dim = transformer_dim
52
+ self.transformer = transformer
53
+
54
+ self.num_multimask_outputs = num_multimask_outputs
55
+
56
+ self.iou_token = nn.Embedding(1, transformer_dim)
57
+ self.num_mask_tokens = num_multimask_outputs + 1
58
+ self.mask_tokens = nn.Embedding(self.num_mask_tokens, transformer_dim)
59
+
60
+ self.pred_obj_scores = pred_obj_scores
61
+ if self.pred_obj_scores:
62
+ self.obj_score_token = nn.Embedding(1, transformer_dim)
63
+ self.use_multimask_token_for_obj_ptr = use_multimask_token_for_obj_ptr
64
+
65
+ self.output_upscaling = nn.Sequential(
66
+ nn.ConvTranspose2d(
67
+ transformer_dim, transformer_dim // 4, kernel_size=2, stride=2
68
+ ),
69
+ LayerNorm2d(transformer_dim // 4),
70
+ activation(),
71
+ nn.ConvTranspose2d(
72
+ transformer_dim // 4, transformer_dim // 8, kernel_size=2, stride=2
73
+ ),
74
+ activation(),
75
+ )
76
+ self.use_high_res_features = use_high_res_features
77
+ if use_high_res_features:
78
+ self.conv_s0 = nn.Conv2d(
79
+ transformer_dim, transformer_dim // 8, kernel_size=1, stride=1
80
+ )
81
+ self.conv_s1 = nn.Conv2d(
82
+ transformer_dim, transformer_dim // 4, kernel_size=1, stride=1
83
+ )
84
+
85
+ self.output_hypernetworks_mlps = nn.ModuleList(
86
+ [
87
+ MLP(transformer_dim, transformer_dim, transformer_dim // 8, 3)
88
+ for i in range(self.num_mask_tokens)
89
+ ]
90
+ )
91
+
92
+ self.iou_prediction_head = MLP(
93
+ transformer_dim,
94
+ iou_head_hidden_dim,
95
+ self.num_mask_tokens,
96
+ iou_head_depth,
97
+ sigmoid_output=iou_prediction_use_sigmoid,
98
+ )
99
+ if self.pred_obj_scores:
100
+ self.pred_obj_score_head = nn.Linear(transformer_dim, 1)
101
+ if pred_obj_scores_mlp:
102
+ self.pred_obj_score_head = MLP(transformer_dim, transformer_dim, 1, 3)
103
+
104
+ # When outputting a single mask, optionally we can dynamically fall back to the best
105
+ # multimask output token if the single mask output token gives low stability scores.
106
+ self.dynamic_multimask_via_stability = dynamic_multimask_via_stability
107
+ self.dynamic_multimask_stability_delta = dynamic_multimask_stability_delta
108
+ self.dynamic_multimask_stability_thresh = dynamic_multimask_stability_thresh
109
+
110
+ def forward(
111
+ self,
112
+ image_embeddings: torch.Tensor,
113
+ image_pe: torch.Tensor,
114
+ sparse_prompt_embeddings: torch.Tensor,
115
+ dense_prompt_embeddings: torch.Tensor,
116
+ multimask_output: bool,
117
+ repeat_image: bool,
118
+ high_res_features: Optional[List[torch.Tensor]] = None,
119
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
120
+ """
121
+ Predict masks given image and prompt embeddings.
122
+
123
+ Arguments:
124
+ image_embeddings (torch.Tensor): the embeddings from the image encoder
125
+ image_pe (torch.Tensor): positional encoding with the shape of image_embeddings
126
+ sparse_prompt_embeddings (torch.Tensor): the embeddings of the points and boxes
127
+ dense_prompt_embeddings (torch.Tensor): the embeddings of the mask inputs
128
+ multimask_output (bool): Whether to return multiple masks or a single
129
+ mask.
130
+
131
+ Returns:
132
+ torch.Tensor: batched predicted masks
133
+ torch.Tensor: batched predictions of mask quality
134
+ torch.Tensor: batched SAM token for mask output
135
+ """
136
+ masks, iou_pred, mask_tokens_out, object_score_logits = self.predict_masks(
137
+ image_embeddings=image_embeddings,
138
+ image_pe=image_pe,
139
+ sparse_prompt_embeddings=sparse_prompt_embeddings,
140
+ dense_prompt_embeddings=dense_prompt_embeddings,
141
+ repeat_image=repeat_image,
142
+ high_res_features=high_res_features,
143
+ )
144
+
145
+ # Select the correct mask or masks for output
146
+ if multimask_output:
147
+ masks = masks[:, 1:, :, :]
148
+ iou_pred = iou_pred[:, 1:]
149
+ elif self.dynamic_multimask_via_stability and not self.training:
150
+ masks, iou_pred = self._dynamic_multimask_via_stability(masks, iou_pred)
151
+ else:
152
+ masks = masks[:, 0:1, :, :]
153
+ iou_pred = iou_pred[:, 0:1]
154
+
155
+ if multimask_output and self.use_multimask_token_for_obj_ptr:
156
+ sam_tokens_out = mask_tokens_out[:, 1:] # [b, 3, c] shape
157
+ else:
158
+ # Take the mask output token. Here we *always* use the token for single mask output.
159
+ # At test time, even if we track after 1-click (and using multimask_output=True),
160
+ # we still take the single mask token here. The rationale is that we always track
161
+ # after multiple clicks during training, so the past tokens seen during training
162
+ # are always the single mask token (and we'll let it be the object-memory token).
163
+ sam_tokens_out = mask_tokens_out[:, 0:1] # [b, 1, c] shape
164
+
165
+ # Prepare output
166
+ return masks, iou_pred, sam_tokens_out, object_score_logits
167
+
168
+ def predict_masks(
169
+ self,
170
+ image_embeddings: torch.Tensor,
171
+ image_pe: torch.Tensor,
172
+ sparse_prompt_embeddings: torch.Tensor,
173
+ dense_prompt_embeddings: torch.Tensor,
174
+ repeat_image: bool,
175
+ high_res_features: Optional[List[torch.Tensor]] = None,
176
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
177
+ """Predicts masks. See 'forward' for more details."""
178
+ # Concatenate output tokens
179
+ s = 0
180
+ if self.pred_obj_scores:
181
+ output_tokens = torch.cat(
182
+ [
183
+ self.obj_score_token.weight,
184
+ self.iou_token.weight,
185
+ self.mask_tokens.weight,
186
+ ],
187
+ dim=0,
188
+ )
189
+ s = 1
190
+ else:
191
+ output_tokens = torch.cat(
192
+ [self.iou_token.weight, self.mask_tokens.weight], dim=0
193
+ )
194
+ output_tokens = output_tokens.unsqueeze(0).expand(
195
+ sparse_prompt_embeddings.size(0), -1, -1
196
+ )
197
+ tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1)
198
+
199
+ # Expand per-image data in batch direction to be per-mask
200
+ if repeat_image:
201
+ src = torch.repeat_interleave(image_embeddings, tokens.shape[0], dim=0)
202
+ else:
203
+ assert image_embeddings.shape[0] == tokens.shape[0]
204
+ src = image_embeddings
205
+ src = src + dense_prompt_embeddings
206
+ assert (
207
+ image_pe.size(0) == 1
208
+ ), "image_pe should have size 1 in batch dim (from `get_dense_pe()`)"
209
+ pos_src = torch.repeat_interleave(image_pe, tokens.shape[0], dim=0)
210
+ b, c, h, w = src.shape
211
+
212
+ # Run the transformer
213
+ hs, src = self.transformer(src, pos_src, tokens)
214
+ iou_token_out = hs[:, s, :]
215
+ mask_tokens_out = hs[:, s + 1 : (s + 1 + self.num_mask_tokens), :]
216
+
217
+ # Upscale mask embeddings and predict masks using the mask tokens
218
+ src = src.transpose(1, 2).view(b, c, h, w)
219
+ if not self.use_high_res_features:
220
+ upscaled_embedding = self.output_upscaling(src)
221
+ else:
222
+ dc1, ln1, act1, dc2, act2 = self.output_upscaling
223
+ feat_s0, feat_s1 = high_res_features
224
+ upscaled_embedding = act1(ln1(dc1(src) + feat_s1))
225
+ upscaled_embedding = act2(dc2(upscaled_embedding) + feat_s0)
226
+
227
+ hyper_in_list: List[torch.Tensor] = []
228
+ for i in range(self.num_mask_tokens):
229
+ hyper_in_list.append(
230
+ self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :])
231
+ )
232
+ hyper_in = torch.stack(hyper_in_list, dim=1)
233
+ b, c, h, w = upscaled_embedding.shape
234
+ masks = (hyper_in @ upscaled_embedding.view(b, c, h * w)).view(b, -1, h, w)
235
+
236
+ # Generate mask quality predictions
237
+ iou_pred = self.iou_prediction_head(iou_token_out)
238
+ if self.pred_obj_scores:
239
+ assert s == 1
240
+ object_score_logits = self.pred_obj_score_head(hs[:, 0, :])
241
+ else:
242
+ # Obj scores logits - default to 10.0, i.e. assuming the object is present, sigmoid(10)=1
243
+ object_score_logits = 10.0 * iou_pred.new_ones(iou_pred.shape[0], 1)
244
+
245
+ return masks, iou_pred, mask_tokens_out, object_score_logits
246
+
247
+ def _get_stability_scores(self, mask_logits):
248
+ """
249
+ Compute stability scores of the mask logits based on the IoU between upper and
250
+ lower thresholds.
251
+ """
252
+ mask_logits = mask_logits.flatten(-2)
253
+ stability_delta = self.dynamic_multimask_stability_delta
254
+ area_i = torch.sum(mask_logits > stability_delta, dim=-1).float()
255
+ area_u = torch.sum(mask_logits > -stability_delta, dim=-1).float()
256
+ stability_scores = torch.where(area_u > 0, area_i / area_u, 1.0)
257
+ return stability_scores
258
+
259
+ def _dynamic_multimask_via_stability(self, all_mask_logits, all_iou_scores):
260
+ """
261
+ When outputting a single mask, if the stability score from the current single-mask
262
+ output (based on output token 0) falls below a threshold, we instead select from
263
+ multi-mask outputs (based on output token 1~3) the mask with the highest predicted
264
+ IoU score. This is intended to ensure a valid mask for both clicking and tracking.
265
+ """
266
+ # The best mask from multimask output tokens (1~3)
267
+ multimask_logits = all_mask_logits[:, 1:, :, :]
268
+ multimask_iou_scores = all_iou_scores[:, 1:]
269
+ best_scores_inds = torch.argmax(multimask_iou_scores, dim=-1)
270
+ batch_inds = torch.arange(
271
+ multimask_iou_scores.size(0), device=all_iou_scores.device
272
+ )
273
+ best_multimask_logits = multimask_logits[batch_inds, best_scores_inds]
274
+ best_multimask_logits = best_multimask_logits.unsqueeze(1)
275
+ best_multimask_iou_scores = multimask_iou_scores[batch_inds, best_scores_inds]
276
+ best_multimask_iou_scores = best_multimask_iou_scores.unsqueeze(1)
277
+
278
+ # The mask from singlemask output token 0 and its stability score
279
+ singlemask_logits = all_mask_logits[:, 0:1, :, :]
280
+ singlemask_iou_scores = all_iou_scores[:, 0:1]
281
+ stability_scores = self._get_stability_scores(singlemask_logits)
282
+ is_stable = stability_scores >= self.dynamic_multimask_stability_thresh
283
+
284
+ # Dynamically fall back to best multimask output upon low stability scores.
285
+ mask_logits_out = torch.where(
286
+ is_stable[..., None, None].expand_as(singlemask_logits),
287
+ singlemask_logits,
288
+ best_multimask_logits,
289
+ )
290
+ iou_scores_out = torch.where(
291
+ is_stable.expand_as(singlemask_iou_scores),
292
+ singlemask_iou_scores,
293
+ best_multimask_iou_scores,
294
+ )
295
+ return mask_logits_out, iou_scores_out
custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam/prompt_encoder.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from typing import Optional, Tuple, Type
8
+
9
+ import torch
10
+ from torch import nn
11
+
12
+ from ....sam2.modeling.position_encoding import PositionEmbeddingRandom
13
+
14
+ from ....sam2.modeling.sam2_utils import LayerNorm2d
15
+
16
+
17
+ class PromptEncoder(nn.Module):
18
+ def __init__(
19
+ self,
20
+ embed_dim: int,
21
+ image_embedding_size: Tuple[int, int],
22
+ input_image_size: Tuple[int, int],
23
+ mask_in_chans: int,
24
+ activation: Type[nn.Module] = nn.GELU,
25
+ ) -> None:
26
+ """
27
+ Encodes prompts for input to SAM's mask decoder.
28
+
29
+ Arguments:
30
+ embed_dim (int): The prompts' embedding dimension
31
+ image_embedding_size (tuple(int, int)): The spatial size of the
32
+ image embedding, as (H, W).
33
+ input_image_size (int): The padded size of the image as input
34
+ to the image encoder, as (H, W).
35
+ mask_in_chans (int): The number of hidden channels used for
36
+ encoding input masks.
37
+ activation (nn.Module): The activation to use when encoding
38
+ input masks.
39
+ """
40
+ super().__init__()
41
+ self.embed_dim = embed_dim
42
+ self.input_image_size = input_image_size
43
+ self.image_embedding_size = image_embedding_size
44
+ self.pe_layer = PositionEmbeddingRandom(embed_dim // 2)
45
+
46
+ self.num_point_embeddings: int = 4 # pos/neg point + 2 box corners
47
+ point_embeddings = [
48
+ nn.Embedding(1, embed_dim) for i in range(self.num_point_embeddings)
49
+ ]
50
+ self.point_embeddings = nn.ModuleList(point_embeddings)
51
+ self.not_a_point_embed = nn.Embedding(1, embed_dim)
52
+
53
+ self.mask_input_size = (
54
+ 4 * image_embedding_size[0],
55
+ 4 * image_embedding_size[1],
56
+ )
57
+ self.mask_downscaling = nn.Sequential(
58
+ nn.Conv2d(1, mask_in_chans // 4, kernel_size=2, stride=2),
59
+ LayerNorm2d(mask_in_chans // 4),
60
+ activation(),
61
+ nn.Conv2d(mask_in_chans // 4, mask_in_chans, kernel_size=2, stride=2),
62
+ LayerNorm2d(mask_in_chans),
63
+ activation(),
64
+ nn.Conv2d(mask_in_chans, embed_dim, kernel_size=1),
65
+ )
66
+ self.no_mask_embed = nn.Embedding(1, embed_dim)
67
+
68
+ def get_dense_pe(self) -> torch.Tensor:
69
+ """
70
+ Returns the positional encoding used to encode point prompts,
71
+ applied to a dense set of points the shape of the image encoding.
72
+
73
+ Returns:
74
+ torch.Tensor: Positional encoding with shape
75
+ 1x(embed_dim)x(embedding_h)x(embedding_w)
76
+ """
77
+ return self.pe_layer(self.image_embedding_size).unsqueeze(0)
78
+
79
+ def _embed_points(
80
+ self,
81
+ points: torch.Tensor,
82
+ labels: torch.Tensor,
83
+ pad: bool,
84
+ ) -> torch.Tensor:
85
+ """Embeds point prompts."""
86
+ points = points + 0.5 # Shift to center of pixel
87
+ if pad:
88
+ padding_point = torch.zeros((points.shape[0], 1, 2), device=points.device)
89
+ padding_label = -torch.ones((labels.shape[0], 1), device=labels.device)
90
+ points = torch.cat([points, padding_point], dim=1)
91
+ labels = torch.cat([labels, padding_label], dim=1)
92
+ point_embedding = self.pe_layer.forward_with_coords(
93
+ points, self.input_image_size
94
+ )
95
+ point_embedding[labels == -1] = 0.0
96
+ point_embedding[labels == -1] += self.not_a_point_embed.weight
97
+ point_embedding[labels == 0] += self.point_embeddings[0].weight
98
+ point_embedding[labels == 1] += self.point_embeddings[1].weight
99
+ point_embedding[labels == 2] += self.point_embeddings[2].weight
100
+ point_embedding[labels == 3] += self.point_embeddings[3].weight
101
+ return point_embedding
102
+
103
+ def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
104
+ """Embeds box prompts."""
105
+ boxes = boxes + 0.5 # Shift to center of pixel
106
+ coords = boxes.reshape(-1, 2, 2)
107
+ corner_embedding = self.pe_layer.forward_with_coords(
108
+ coords, self.input_image_size
109
+ )
110
+ corner_embedding[:, 0, :] += self.point_embeddings[2].weight
111
+ corner_embedding[:, 1, :] += self.point_embeddings[3].weight
112
+ return corner_embedding
113
+
114
+ def _embed_masks(self, masks: torch.Tensor) -> torch.Tensor:
115
+ """Embeds mask inputs."""
116
+ mask_embedding = self.mask_downscaling(masks)
117
+ return mask_embedding
118
+
119
+ def _get_batch_size(
120
+ self,
121
+ points: Optional[Tuple[torch.Tensor, torch.Tensor]],
122
+ boxes: Optional[torch.Tensor],
123
+ masks: Optional[torch.Tensor],
124
+ ) -> int:
125
+ """
126
+ Gets the batch size of the output given the batch size of the input prompts.
127
+ """
128
+ if points is not None:
129
+ return points[0].shape[0]
130
+ elif boxes is not None:
131
+ return boxes.shape[0]
132
+ elif masks is not None:
133
+ return masks.shape[0]
134
+ else:
135
+ return 1
136
+
137
+ def _get_device(self) -> torch.device:
138
+ return self.point_embeddings[0].weight.device
139
+
140
+ def forward(
141
+ self,
142
+ points: Optional[Tuple[torch.Tensor, torch.Tensor]],
143
+ boxes: Optional[torch.Tensor],
144
+ masks: Optional[torch.Tensor],
145
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
146
+ """
147
+ Embeds different types of prompts, returning both sparse and dense
148
+ embeddings.
149
+
150
+ Arguments:
151
+ points (tuple(torch.Tensor, torch.Tensor) or none): point coordinates
152
+ and labels to embed.
153
+ boxes (torch.Tensor or none): boxes to embed
154
+ masks (torch.Tensor or none): masks to embed
155
+
156
+ Returns:
157
+ torch.Tensor: sparse embeddings for the points and boxes, with shape
158
+ BxNx(embed_dim), where N is determined by the number of input points
159
+ and boxes.
160
+ torch.Tensor: dense embeddings for the masks, in the shape
161
+ Bx(embed_dim)x(embed_H)x(embed_W)
162
+ """
163
+ bs = self._get_batch_size(points, boxes, masks)
164
+ sparse_embeddings = torch.empty(
165
+ (bs, 0, self.embed_dim), device=self._get_device()
166
+ )
167
+ if points is not None:
168
+ coords, labels = points
169
+ point_embeddings = self._embed_points(coords, labels, pad=(boxes is None))
170
+ sparse_embeddings = torch.cat([sparse_embeddings, point_embeddings], dim=1)
171
+ if boxes is not None:
172
+ box_embeddings = self._embed_boxes(boxes)
173
+ sparse_embeddings = torch.cat([sparse_embeddings, box_embeddings], dim=1)
174
+
175
+ if masks is not None:
176
+ dense_embeddings = self._embed_masks(masks)
177
+ else:
178
+ dense_embeddings = self.no_mask_embed.weight.reshape(1, -1, 1, 1).expand(
179
+ bs, -1, self.image_embedding_size[0], self.image_embedding_size[1]
180
+ )
181
+
182
+ return sparse_embeddings, dense_embeddings
custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam/transformer.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import math
8
+ import warnings
9
+ from functools import partial
10
+ from typing import Tuple, Type
11
+
12
+ import torch
13
+ import torch.nn.functional as F
14
+ from torch import nn, Tensor
15
+
16
+ from ....sam2.modeling.position_encoding import apply_rotary_enc, compute_axial_cis
17
+ from ....sam2.modeling.sam2_utils import MLP
18
+
19
+ from ....sam2.utils.misc import get_sdpa_settings
20
+ OLD_GPU, USE_FLASH_ATTN, MATH_KERNEL_ON = get_sdpa_settings()
21
+
22
+ try:
23
+ from torch.nn.attention import SDPBackend, sdpa_kernel
24
+ backends = []
25
+ if USE_FLASH_ATTN:
26
+ backends.append(SDPBackend.FLASH_ATTENTION)
27
+ if MATH_KERNEL_ON:
28
+ backends.append(SDPBackend.MATH)
29
+ if OLD_GPU:
30
+ backends.append(SDPBackend.EFFICIENT_ATTENTION)
31
+ OLD_TORCH = False
32
+ except:
33
+ OLD_TORCH = True
34
+
35
+ warnings.simplefilter(action="ignore", category=FutureWarning)
36
+
37
+ class TwoWayTransformer(nn.Module):
38
+ def __init__(
39
+ self,
40
+ depth: int,
41
+ embedding_dim: int,
42
+ num_heads: int,
43
+ mlp_dim: int,
44
+ activation: Type[nn.Module] = nn.ReLU,
45
+ attention_downsample_rate: int = 2,
46
+ ) -> None:
47
+ """
48
+ A transformer decoder that attends to an input image using
49
+ queries whose positional embedding is supplied.
50
+
51
+ Args:
52
+ depth (int): number of layers in the transformer
53
+ embedding_dim (int): the channel dimension for the input embeddings
54
+ num_heads (int): the number of heads for multihead attention. Must
55
+ divide embedding_dim
56
+ mlp_dim (int): the channel dimension internal to the MLP block
57
+ activation (nn.Module): the activation to use in the MLP block
58
+ """
59
+ super().__init__()
60
+ self.depth = depth
61
+ self.embedding_dim = embedding_dim
62
+ self.num_heads = num_heads
63
+ self.mlp_dim = mlp_dim
64
+ self.layers = nn.ModuleList()
65
+
66
+ for i in range(depth):
67
+ self.layers.append(
68
+ TwoWayAttentionBlock(
69
+ embedding_dim=embedding_dim,
70
+ num_heads=num_heads,
71
+ mlp_dim=mlp_dim,
72
+ activation=activation,
73
+ attention_downsample_rate=attention_downsample_rate,
74
+ skip_first_layer_pe=(i == 0),
75
+ )
76
+ )
77
+
78
+ self.final_attn_token_to_image = Attention(
79
+ embedding_dim, num_heads, downsample_rate=attention_downsample_rate
80
+ )
81
+ self.norm_final_attn = nn.LayerNorm(embedding_dim)
82
+
83
+ def forward(
84
+ self,
85
+ image_embedding: Tensor,
86
+ image_pe: Tensor,
87
+ point_embedding: Tensor,
88
+ ) -> Tuple[Tensor, Tensor]:
89
+ """
90
+ Args:
91
+ image_embedding (torch.Tensor): image to attend to. Should be shape
92
+ B x embedding_dim x h x w for any h and w.
93
+ image_pe (torch.Tensor): the positional encoding to add to the image. Must
94
+ have the same shape as image_embedding.
95
+ point_embedding (torch.Tensor): the embedding to add to the query points.
96
+ Must have shape B x N_points x embedding_dim for any N_points.
97
+
98
+ Returns:
99
+ torch.Tensor: the processed point_embedding
100
+ torch.Tensor: the processed image_embedding
101
+ """
102
+ # BxCxHxW -> BxHWxC == B x N_image_tokens x C
103
+ bs, c, h, w = image_embedding.shape
104
+ image_embedding = image_embedding.flatten(2).permute(0, 2, 1)
105
+ image_pe = image_pe.flatten(2).permute(0, 2, 1)
106
+
107
+ # Prepare queries
108
+ queries = point_embedding
109
+ keys = image_embedding
110
+
111
+ # Apply transformer blocks and final layernorm
112
+ for layer in self.layers:
113
+ queries, keys = layer(
114
+ queries=queries,
115
+ keys=keys,
116
+ query_pe=point_embedding,
117
+ key_pe=image_pe,
118
+ )
119
+
120
+ # Apply the final attention layer from the points to the image
121
+ q = queries + point_embedding
122
+ k = keys + image_pe
123
+ attn_out = self.final_attn_token_to_image(q=q, k=k, v=keys)
124
+ queries = queries + attn_out
125
+ queries = self.norm_final_attn(queries)
126
+
127
+ return queries, keys
128
+
129
+
130
+ class TwoWayAttentionBlock(nn.Module):
131
+ def __init__(
132
+ self,
133
+ embedding_dim: int,
134
+ num_heads: int,
135
+ mlp_dim: int = 2048,
136
+ activation: Type[nn.Module] = nn.ReLU,
137
+ attention_downsample_rate: int = 2,
138
+ skip_first_layer_pe: bool = False,
139
+ ) -> None:
140
+ """
141
+ A transformer block with four layers: (1) self-attention of sparse
142
+ inputs, (2) cross attention of sparse inputs to dense inputs, (3) mlp
143
+ block on sparse inputs, and (4) cross attention of dense inputs to sparse
144
+ inputs.
145
+
146
+ Arguments:
147
+ embedding_dim (int): the channel dimension of the embeddings
148
+ num_heads (int): the number of heads in the attention layers
149
+ mlp_dim (int): the hidden dimension of the mlp block
150
+ activation (nn.Module): the activation of the mlp block
151
+ skip_first_layer_pe (bool): skip the PE on the first layer
152
+ """
153
+ super().__init__()
154
+ self.self_attn = Attention(embedding_dim, num_heads)
155
+ self.norm1 = nn.LayerNorm(embedding_dim)
156
+
157
+ self.cross_attn_token_to_image = Attention(
158
+ embedding_dim, num_heads, downsample_rate=attention_downsample_rate
159
+ )
160
+ self.norm2 = nn.LayerNorm(embedding_dim)
161
+
162
+ self.mlp = MLP(
163
+ embedding_dim, mlp_dim, embedding_dim, num_layers=2, activation=activation
164
+ )
165
+ self.norm3 = nn.LayerNorm(embedding_dim)
166
+
167
+ self.norm4 = nn.LayerNorm(embedding_dim)
168
+ self.cross_attn_image_to_token = Attention(
169
+ embedding_dim, num_heads, downsample_rate=attention_downsample_rate
170
+ )
171
+
172
+ self.skip_first_layer_pe = skip_first_layer_pe
173
+
174
+ def forward(
175
+ self, queries: Tensor, keys: Tensor, query_pe: Tensor, key_pe: Tensor
176
+ ) -> Tuple[Tensor, Tensor]:
177
+ # Self attention block
178
+ if self.skip_first_layer_pe:
179
+ queries = self.self_attn(q=queries, k=queries, v=queries)
180
+ else:
181
+ q = queries + query_pe
182
+ attn_out = self.self_attn(q=q, k=q, v=queries)
183
+ queries = queries + attn_out
184
+ queries = self.norm1(queries)
185
+
186
+ # Cross attention block, tokens attending to image embedding
187
+ q = queries + query_pe
188
+ k = keys + key_pe
189
+ attn_out = self.cross_attn_token_to_image(q=q, k=k, v=keys)
190
+ queries = queries + attn_out
191
+ queries = self.norm2(queries)
192
+
193
+ # MLP block
194
+ mlp_out = self.mlp(queries)
195
+ queries = queries + mlp_out
196
+ queries = self.norm3(queries)
197
+
198
+ # Cross attention block, image embedding attending to tokens
199
+ q = queries + query_pe
200
+ k = keys + key_pe
201
+ attn_out = self.cross_attn_image_to_token(q=k, k=q, v=queries)
202
+ keys = keys + attn_out
203
+ keys = self.norm4(keys)
204
+
205
+ return queries, keys
206
+
207
+
208
+ class Attention(nn.Module):
209
+ """
210
+ An attention layer that allows for downscaling the size of the embedding
211
+ after projection to queries, keys, and values.
212
+ """
213
+
214
+ def __init__(
215
+ self,
216
+ embedding_dim: int,
217
+ num_heads: int,
218
+ downsample_rate: int = 1,
219
+ dropout: float = 0.0,
220
+ kv_in_dim: int = None,
221
+ ) -> None:
222
+ super().__init__()
223
+ self.embedding_dim = embedding_dim
224
+ self.kv_in_dim = kv_in_dim if kv_in_dim is not None else embedding_dim
225
+ self.internal_dim = embedding_dim // downsample_rate
226
+ self.num_heads = num_heads
227
+ assert (
228
+ self.internal_dim % num_heads == 0
229
+ ), "num_heads must divide embedding_dim."
230
+
231
+ self.q_proj = nn.Linear(embedding_dim, self.internal_dim)
232
+ self.k_proj = nn.Linear(self.kv_in_dim, self.internal_dim)
233
+ self.v_proj = nn.Linear(self.kv_in_dim, self.internal_dim)
234
+ self.out_proj = nn.Linear(self.internal_dim, embedding_dim)
235
+
236
+ self.dropout_p = dropout
237
+
238
+ def _separate_heads(self, x: Tensor, num_heads: int) -> Tensor:
239
+ b, n, c = x.shape
240
+ x = x.reshape(b, n, num_heads, c // num_heads)
241
+ return x.transpose(1, 2) # B x N_heads x N_tokens x C_per_head
242
+
243
+ def _recombine_heads(self, x: Tensor) -> Tensor:
244
+ b, n_heads, n_tokens, c_per_head = x.shape
245
+ x = x.transpose(1, 2)
246
+ return x.reshape(b, n_tokens, n_heads * c_per_head) # B x N_tokens x C
247
+
248
+ def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
249
+ # Input projections
250
+ q = self.q_proj(q)
251
+ k = self.k_proj(k)
252
+ v = self.v_proj(v)
253
+
254
+ # Separate into heads
255
+ q = self._separate_heads(q, self.num_heads)
256
+ k = self._separate_heads(k, self.num_heads)
257
+ v = self._separate_heads(v, self.num_heads)
258
+
259
+ dropout_p = self.dropout_p if self.training else 0.0
260
+ # Attention
261
+ if not OLD_TORCH:
262
+ if not MATH_KERNEL_ON and OLD_GPU and dropout_p > 0.0:
263
+ backends.append(SDPBackend.MATH)
264
+ with sdpa_kernel(backends):
265
+ out = F.scaled_dot_product_attention(q, k, v, dropout_p=dropout_p)
266
+ else:
267
+ with torch.backends.cuda.sdp_kernel(
268
+ enable_flash=USE_FLASH_ATTN,
269
+ enable_math=(OLD_GPU and dropout_p > 0.0) or MATH_KERNEL_ON,
270
+ enable_mem_efficient=OLD_GPU,
271
+ ):
272
+ out = F.scaled_dot_product_attention(q, k, v, dropout_p=dropout_p)
273
+ out = self._recombine_heads(out)
274
+ out = self.out_proj(out)
275
+
276
+ return out
277
+
278
+
279
+ class RoPEAttention(Attention):
280
+ """Attention with rotary position encoding."""
281
+
282
+ def __init__(
283
+ self,
284
+ *args,
285
+ rope_theta=10000.0,
286
+ # whether to repeat q rope to match k length
287
+ # this is needed for cross-attention to memories
288
+ rope_k_repeat=False,
289
+ feat_sizes=(32, 32), # [w, h] for stride 16 feats at 512 resolution
290
+ **kwargs,
291
+ ):
292
+ super().__init__(*args, **kwargs)
293
+
294
+ self.compute_cis = partial(
295
+ compute_axial_cis, dim=self.internal_dim // self.num_heads, theta=rope_theta
296
+ )
297
+ freqs_cis = self.compute_cis(end_x=feat_sizes[0], end_y=feat_sizes[1])
298
+ self.freqs_cis = freqs_cis
299
+ self.rope_k_repeat = rope_k_repeat
300
+
301
+ def forward(
302
+ self, q: Tensor, k: Tensor, v: Tensor, num_k_exclude_rope: int = 0
303
+ ) -> Tensor:
304
+ # Input projections
305
+ q = self.q_proj(q)
306
+ k = self.k_proj(k)
307
+ v = self.v_proj(v)
308
+
309
+ # Separate into heads
310
+ q = self._separate_heads(q, self.num_heads)
311
+ k = self._separate_heads(k, self.num_heads)
312
+ v = self._separate_heads(v, self.num_heads)
313
+
314
+ # Apply rotary position encoding
315
+ w = h = math.sqrt(q.shape[-2])
316
+ self.freqs_cis = self.freqs_cis.to(q.device)
317
+ if self.freqs_cis.shape[0] != q.shape[-2]:
318
+ self.freqs_cis = self.compute_cis(end_x=w, end_y=h).to(q.device)
319
+ if q.shape[-2] != k.shape[-2]:
320
+ assert self.rope_k_repeat
321
+
322
+ num_k_rope = k.size(-2) - num_k_exclude_rope
323
+ q, k[:, :, :num_k_rope] = apply_rotary_enc(
324
+ q,
325
+ k[:, :, :num_k_rope],
326
+ freqs_cis=self.freqs_cis,
327
+ repeat_freqs_k=self.rope_k_repeat,
328
+ )
329
+
330
+ dropout_p = self.dropout_p if self.training else 0.0
331
+ # Attention
332
+ if not OLD_TORCH:
333
+ if not MATH_KERNEL_ON and OLD_GPU and dropout_p > 0.0:
334
+ backends.append(SDPBackend.MATH)
335
+ with sdpa_kernel(backends):
336
+ out = F.scaled_dot_product_attention(q, k, v, dropout_p=dropout_p)
337
+ else:
338
+ with torch.backends.cuda.sdp_kernel(
339
+ enable_flash=USE_FLASH_ATTN,
340
+ enable_math=(OLD_GPU and dropout_p > 0.0) or MATH_KERNEL_ON,
341
+ enable_mem_efficient=OLD_GPU,
342
+ ):
343
+ out = F.scaled_dot_product_attention(q, k, v, dropout_p=dropout_p)
344
+ out = self._recombine_heads(out)
345
+ out = self.out_proj(out)
346
+
347
+ return out
custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam2_base.py ADDED
@@ -0,0 +1,907 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import torch
8
+ import torch.distributed
9
+ import torch.nn.functional as F
10
+
11
+ from torch.nn.init import trunc_normal_
12
+
13
+ from ...sam2.modeling.sam.mask_decoder import MaskDecoder
14
+ from ...sam2.modeling.sam.prompt_encoder import PromptEncoder
15
+ from ...sam2.modeling.sam.transformer import TwoWayTransformer
16
+ from ...sam2.modeling.sam2_utils import get_1d_sine_pe, MLP, select_closest_cond_frames
17
+
18
+ # a large negative value as a placeholder score for missing objects
19
+ NO_OBJ_SCORE = -1024.0
20
+
21
+
22
+ class SAM2Base(torch.nn.Module):
23
+ def __init__(
24
+ self,
25
+ image_encoder,
26
+ memory_attention,
27
+ memory_encoder,
28
+ num_maskmem=7, # default 1 input frame + 6 previous frames
29
+ image_size=512,
30
+ backbone_stride=16, # stride of the image backbone output
31
+ sigmoid_scale_for_mem_enc=1.0, # scale factor for mask sigmoid prob
32
+ sigmoid_bias_for_mem_enc=0.0, # bias factor for mask sigmoid prob
33
+ # During evaluation, whether to binarize the sigmoid mask logits on interacted frames with clicks
34
+ binarize_mask_from_pts_for_mem_enc=False,
35
+ use_mask_input_as_output_without_sam=False, # on frames with mask input, whether to directly output the input mask without using a SAM prompt encoder + mask decoder
36
+ # The maximum number of conditioning frames to participate in the memory attention (-1 means no limit; if there are more conditioning frames than this limit,
37
+ # we only cross-attend to the temporally closest `max_cond_frames_in_attn` conditioning frames in the encoder when tracking each frame). This gives the model
38
+ # a temporal locality when handling a large number of annotated frames (since closer frames should be more important) and also avoids GPU OOM.
39
+ max_cond_frames_in_attn=-1,
40
+ # on the first frame, whether to directly add the no-memory embedding to the image feature
41
+ # (instead of using the transformer encoder)
42
+ directly_add_no_mem_embed=False,
43
+ # whether to use high-resolution feature maps in the SAM mask decoder
44
+ use_high_res_features_in_sam=False,
45
+ # whether to output multiple (3) masks for the first click on initial conditioning frames
46
+ multimask_output_in_sam=False,
47
+ # the minimum and maximum number of clicks to use multimask_output_in_sam (only relevant when `multimask_output_in_sam=True`;
48
+ # default is 1 for both, meaning that only the first click gives multimask output; also note that a box counts as two points)
49
+ multimask_min_pt_num=1,
50
+ multimask_max_pt_num=1,
51
+ # whether to also use multimask output for tracking (not just for the first click on initial conditioning frames; only relevant when `multimask_output_in_sam=True`)
52
+ multimask_output_for_tracking=False,
53
+ # Whether to use multimask tokens for obj ptr; Only relevant when both
54
+ # use_obj_ptrs_in_encoder=True and multimask_output_for_tracking=True
55
+ use_multimask_token_for_obj_ptr: bool = False,
56
+ # whether to use sigmoid to restrict ious prediction to [0-1]
57
+ iou_prediction_use_sigmoid=False,
58
+ # The memory bank's temporal stride during evaluation (i.e. the `r` parameter in XMem and Cutie; XMem and Cutie use r=5).
59
+ # For r>1, the (self.num_maskmem - 1) non-conditioning memory frames consist of
60
+ # (self.num_maskmem - 2) nearest frames from every r-th frames, plus the last frame.
61
+ memory_temporal_stride_for_eval=1,
62
+ # whether to apply non-overlapping constraints on the object masks in the memory encoder during evaluation (to avoid/alleviate superposing masks)
63
+ non_overlap_masks_for_mem_enc=False,
64
+ # whether to cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
65
+ use_obj_ptrs_in_encoder=False,
66
+ # the maximum number of object pointers from other frames in encoder cross attention (only relevant when `use_obj_ptrs_in_encoder=True`)
67
+ max_obj_ptrs_in_encoder=16,
68
+ # whether to add temporal positional encoding to the object pointers in the encoder (only relevant when `use_obj_ptrs_in_encoder=True`)
69
+ add_tpos_enc_to_obj_ptrs=True,
70
+ # whether to add an extra linear projection layer for the temporal positional encoding in the object pointers to avoid potential interference
71
+ # with spatial positional encoding (only relevant when both `use_obj_ptrs_in_encoder=True` and `add_tpos_enc_to_obj_ptrs=True`)
72
+ proj_tpos_enc_in_obj_ptrs=False,
73
+ # whether to use signed distance (instead of unsigned absolute distance) in the temporal positional encoding in the object pointers
74
+ # (only relevant when both `use_obj_ptrs_in_encoder=True` and `add_tpos_enc_to_obj_ptrs=True`)
75
+ use_signed_tpos_enc_to_obj_ptrs=False,
76
+ # whether to only attend to object pointers in the past (before the current frame) in the encoder during evaluation
77
+ # (only relevant when `use_obj_ptrs_in_encoder=True`; this might avoid pointer information too far in the future to distract the initial tracking)
78
+ only_obj_ptrs_in_the_past_for_eval=False,
79
+ # Whether to predict if there is an object in the frame
80
+ pred_obj_scores: bool = False,
81
+ # Whether to use an MLP to predict object scores
82
+ pred_obj_scores_mlp: bool = False,
83
+ # Only relevant if pred_obj_scores=True and use_obj_ptrs_in_encoder=True;
84
+ # Whether to have a fixed no obj pointer when there is no object present
85
+ # or to use it as an additive embedding with obj_ptr produced by decoder
86
+ fixed_no_obj_ptr: bool = False,
87
+ # Soft no object, i.e. mix in no_obj_ptr softly,
88
+ # hope to make recovery easier if there is a mistake and mitigate accumulation of errors
89
+ soft_no_obj_ptr: bool = False,
90
+ use_mlp_for_obj_ptr_proj: bool = False,
91
+ # add no obj embedding to spatial frames
92
+ no_obj_embed_spatial: bool = False,
93
+ # extra arguments used to construct the SAM mask decoder; if not None, it should be a dict of kwargs to be passed into `MaskDecoder` class.
94
+ sam_mask_decoder_extra_args=None,
95
+ compile_image_encoder: bool = False,
96
+ ):
97
+ super().__init__()
98
+
99
+ # Part 1: the image backbone
100
+ self.image_encoder = image_encoder
101
+ # Use level 0, 1, 2 for high-res setting, or just level 2 for the default setting
102
+ self.use_high_res_features_in_sam = use_high_res_features_in_sam
103
+ self.num_feature_levels = 3 if use_high_res_features_in_sam else 1
104
+ self.use_obj_ptrs_in_encoder = use_obj_ptrs_in_encoder
105
+ self.max_obj_ptrs_in_encoder = max_obj_ptrs_in_encoder
106
+ if use_obj_ptrs_in_encoder:
107
+ # A conv layer to downsample the mask prompt to stride 4 (the same stride as
108
+ # low-res SAM mask logits) and to change its scales from 0~1 to SAM logit scale,
109
+ # so that it can be fed into the SAM mask decoder to generate a pointer.
110
+ self.mask_downsample = torch.nn.Conv2d(1, 1, kernel_size=4, stride=4)
111
+ self.add_tpos_enc_to_obj_ptrs = add_tpos_enc_to_obj_ptrs
112
+ if proj_tpos_enc_in_obj_ptrs:
113
+ assert add_tpos_enc_to_obj_ptrs # these options need to be used together
114
+ self.proj_tpos_enc_in_obj_ptrs = proj_tpos_enc_in_obj_ptrs
115
+ self.use_signed_tpos_enc_to_obj_ptrs = use_signed_tpos_enc_to_obj_ptrs
116
+ self.only_obj_ptrs_in_the_past_for_eval = only_obj_ptrs_in_the_past_for_eval
117
+
118
+ # Part 2: memory attention to condition current frame's visual features
119
+ # with memories (and obj ptrs) from past frames
120
+ self.memory_attention = memory_attention
121
+ self.hidden_dim = image_encoder.neck.d_model
122
+
123
+ # Part 3: memory encoder for the previous frame's outputs
124
+ self.memory_encoder = memory_encoder
125
+ self.mem_dim = self.hidden_dim
126
+ if hasattr(self.memory_encoder, "out_proj") and hasattr(
127
+ self.memory_encoder.out_proj, "weight"
128
+ ):
129
+ # if there is compression of memories along channel dim
130
+ self.mem_dim = self.memory_encoder.out_proj.weight.shape[0]
131
+ self.num_maskmem = num_maskmem # Number of memories accessible
132
+ # Temporal encoding of the memories
133
+ self.maskmem_tpos_enc = torch.nn.Parameter(
134
+ torch.zeros(num_maskmem, 1, 1, self.mem_dim)
135
+ )
136
+ trunc_normal_(self.maskmem_tpos_enc, std=0.02)
137
+ # a single token to indicate no memory embedding from previous frames
138
+ self.no_mem_embed = torch.nn.Parameter(torch.zeros(1, 1, self.hidden_dim))
139
+ self.no_mem_pos_enc = torch.nn.Parameter(torch.zeros(1, 1, self.hidden_dim))
140
+ trunc_normal_(self.no_mem_embed, std=0.02)
141
+ trunc_normal_(self.no_mem_pos_enc, std=0.02)
142
+ self.directly_add_no_mem_embed = directly_add_no_mem_embed
143
+ # Apply sigmoid to the output raw mask logits (to turn them from
144
+ # range (-inf, +inf) to range (0, 1)) before feeding them into the memory encoder
145
+ self.sigmoid_scale_for_mem_enc = sigmoid_scale_for_mem_enc
146
+ self.sigmoid_bias_for_mem_enc = sigmoid_bias_for_mem_enc
147
+ self.binarize_mask_from_pts_for_mem_enc = binarize_mask_from_pts_for_mem_enc
148
+ self.non_overlap_masks_for_mem_enc = non_overlap_masks_for_mem_enc
149
+ self.memory_temporal_stride_for_eval = memory_temporal_stride_for_eval
150
+ # On frames with mask input, whether to directly output the input mask without
151
+ # using a SAM prompt encoder + mask decoder
152
+ self.use_mask_input_as_output_without_sam = use_mask_input_as_output_without_sam
153
+ self.multimask_output_in_sam = multimask_output_in_sam
154
+ self.multimask_min_pt_num = multimask_min_pt_num
155
+ self.multimask_max_pt_num = multimask_max_pt_num
156
+ self.multimask_output_for_tracking = multimask_output_for_tracking
157
+ self.use_multimask_token_for_obj_ptr = use_multimask_token_for_obj_ptr
158
+ self.iou_prediction_use_sigmoid = iou_prediction_use_sigmoid
159
+
160
+ # Part 4: SAM-style prompt encoder (for both mask and point inputs)
161
+ # and SAM-style mask decoder for the final mask output
162
+ self.image_size = image_size
163
+ self.backbone_stride = backbone_stride
164
+ self.sam_mask_decoder_extra_args = sam_mask_decoder_extra_args
165
+ self.pred_obj_scores = pred_obj_scores
166
+ self.pred_obj_scores_mlp = pred_obj_scores_mlp
167
+ self.fixed_no_obj_ptr = fixed_no_obj_ptr
168
+ self.soft_no_obj_ptr = soft_no_obj_ptr
169
+ if self.fixed_no_obj_ptr:
170
+ assert self.pred_obj_scores
171
+ assert self.use_obj_ptrs_in_encoder
172
+ if self.pred_obj_scores and self.use_obj_ptrs_in_encoder:
173
+ self.no_obj_ptr = torch.nn.Parameter(torch.zeros(1, self.hidden_dim))
174
+ trunc_normal_(self.no_obj_ptr, std=0.02)
175
+ self.use_mlp_for_obj_ptr_proj = use_mlp_for_obj_ptr_proj
176
+ self.no_obj_embed_spatial = None
177
+ if no_obj_embed_spatial:
178
+ self.no_obj_embed_spatial = torch.nn.Parameter(torch.zeros(1, self.mem_dim))
179
+ trunc_normal_(self.no_obj_embed_spatial, std=0.02)
180
+
181
+ self._build_sam_heads()
182
+ self.max_cond_frames_in_attn = max_cond_frames_in_attn
183
+
184
+ # Model compilation
185
+ if compile_image_encoder:
186
+ # Compile the forward function (not the full module) to allow loading checkpoints.
187
+ print(
188
+ "Image encoder compilation is enabled. First forward pass will be slow."
189
+ )
190
+ self.image_encoder.forward = torch.compile(
191
+ self.image_encoder.forward,
192
+ mode="max-autotune",
193
+ fullgraph=True,
194
+ dynamic=False,
195
+ )
196
+
197
+ @property
198
+ def device(self):
199
+ return next(self.parameters()).device
200
+
201
+ def forward(self, *args, **kwargs):
202
+ raise NotImplementedError(
203
+ "Please use the corresponding methods in SAM2VideoPredictor for inference or SAM2Train for training/fine-tuning"
204
+ "See notebooks/video_predictor_example.ipynb for an inference example."
205
+ )
206
+
207
+ def _build_sam_heads(self):
208
+ """Build SAM-style prompt encoder and mask decoder."""
209
+ self.sam_prompt_embed_dim = self.hidden_dim
210
+ self.sam_image_embedding_size = self.image_size // self.backbone_stride
211
+
212
+ # build PromptEncoder and MaskDecoder from SAM
213
+ # (their hyperparameters like `mask_in_chans=16` are from SAM code)
214
+ self.sam_prompt_encoder = PromptEncoder(
215
+ embed_dim=self.sam_prompt_embed_dim,
216
+ image_embedding_size=(
217
+ self.sam_image_embedding_size,
218
+ self.sam_image_embedding_size,
219
+ ),
220
+ input_image_size=(self.image_size, self.image_size),
221
+ mask_in_chans=16,
222
+ )
223
+ self.sam_mask_decoder = MaskDecoder(
224
+ num_multimask_outputs=3,
225
+ transformer=TwoWayTransformer(
226
+ depth=2,
227
+ embedding_dim=self.sam_prompt_embed_dim,
228
+ mlp_dim=2048,
229
+ num_heads=8,
230
+ ),
231
+ transformer_dim=self.sam_prompt_embed_dim,
232
+ iou_head_depth=3,
233
+ iou_head_hidden_dim=256,
234
+ use_high_res_features=self.use_high_res_features_in_sam,
235
+ iou_prediction_use_sigmoid=self.iou_prediction_use_sigmoid,
236
+ pred_obj_scores=self.pred_obj_scores,
237
+ pred_obj_scores_mlp=self.pred_obj_scores_mlp,
238
+ use_multimask_token_for_obj_ptr=self.use_multimask_token_for_obj_ptr,
239
+ **(self.sam_mask_decoder_extra_args or {}),
240
+ )
241
+ if self.use_obj_ptrs_in_encoder:
242
+ # a linear projection on SAM output tokens to turn them into object pointers
243
+ self.obj_ptr_proj = torch.nn.Linear(self.hidden_dim, self.hidden_dim)
244
+ if self.use_mlp_for_obj_ptr_proj:
245
+ self.obj_ptr_proj = MLP(
246
+ self.hidden_dim, self.hidden_dim, self.hidden_dim, 3
247
+ )
248
+ else:
249
+ self.obj_ptr_proj = torch.nn.Identity()
250
+ if self.proj_tpos_enc_in_obj_ptrs:
251
+ # a linear projection on temporal positional encoding in object pointers to
252
+ # avoid potential interference with spatial positional encoding
253
+ self.obj_ptr_tpos_proj = torch.nn.Linear(self.hidden_dim, self.mem_dim)
254
+ else:
255
+ self.obj_ptr_tpos_proj = torch.nn.Identity()
256
+
257
+ def _forward_sam_heads(
258
+ self,
259
+ backbone_features,
260
+ point_inputs=None,
261
+ mask_inputs=None,
262
+ high_res_features=None,
263
+ multimask_output=False,
264
+ ):
265
+ """
266
+ Forward SAM prompt encoders and mask heads.
267
+
268
+ Inputs:
269
+ - backbone_features: image features of [B, C, H, W] shape
270
+ - point_inputs: a dictionary with "point_coords" and "point_labels", where
271
+ 1) "point_coords" has [B, P, 2] shape and float32 dtype and contains the
272
+ absolute pixel-unit coordinate in (x, y) format of the P input points
273
+ 2) "point_labels" has shape [B, P] and int32 dtype, where 1 means
274
+ positive clicks, 0 means negative clicks, and -1 means padding
275
+ - mask_inputs: a mask of [B, 1, H*16, W*16] shape, float or bool, with the
276
+ same spatial size as the image.
277
+ - high_res_features: either 1) None or 2) or a list of length 2 containing
278
+ two feature maps of [B, C, 4*H, 4*W] and [B, C, 2*H, 2*W] shapes respectively,
279
+ which will be used as high-resolution feature maps for SAM decoder.
280
+ - multimask_output: if it's True, we output 3 candidate masks and their 3
281
+ corresponding IoU estimates, and if it's False, we output only 1 mask and
282
+ its corresponding IoU estimate.
283
+
284
+ Outputs:
285
+ - low_res_multimasks: [B, M, H*4, W*4] shape (where M = 3 if
286
+ `multimask_output=True` and M = 1 if `multimask_output=False`), the SAM
287
+ output mask logits (before sigmoid) for the low-resolution masks, with 4x
288
+ the resolution (1/4 stride) of the input backbone_features.
289
+ - high_res_multimasks: [B, M, H*16, W*16] shape (where M = 3
290
+ if `multimask_output=True` and M = 1 if `multimask_output=False`),
291
+ upsampled from the low-resolution masks, with shape size as the image
292
+ (stride is 1 pixel).
293
+ - ious, [B, M] shape, where (where M = 3 if `multimask_output=True` and M = 1
294
+ if `multimask_output=False`), the estimated IoU of each output mask.
295
+ - low_res_masks: [B, 1, H*4, W*4] shape, the best mask in `low_res_multimasks`.
296
+ If `multimask_output=True`, it's the mask with the highest IoU estimate.
297
+ If `multimask_output=False`, it's the same as `low_res_multimasks`.
298
+ - high_res_masks: [B, 1, H*16, W*16] shape, the best mask in `high_res_multimasks`.
299
+ If `multimask_output=True`, it's the mask with the highest IoU estimate.
300
+ If `multimask_output=False`, it's the same as `high_res_multimasks`.
301
+ - obj_ptr: [B, C] shape, the object pointer vector for the output mask, extracted
302
+ based on the output token from the SAM mask decoder.
303
+ """
304
+ B = backbone_features.size(0)
305
+ device = backbone_features.device
306
+ assert backbone_features.size(1) == self.sam_prompt_embed_dim
307
+ assert backbone_features.size(2) == self.sam_image_embedding_size
308
+ assert backbone_features.size(3) == self.sam_image_embedding_size
309
+
310
+ # a) Handle point prompts
311
+ if point_inputs is not None:
312
+ sam_point_coords = point_inputs["point_coords"]
313
+ sam_point_labels = point_inputs["point_labels"]
314
+ assert sam_point_coords.size(0) == B and sam_point_labels.size(0) == B
315
+ else:
316
+ # If no points are provide, pad with an empty point (with label -1)
317
+ sam_point_coords = torch.zeros(B, 1, 2, device=device)
318
+ sam_point_labels = -torch.ones(B, 1, dtype=torch.int32, device=device)
319
+
320
+ # b) Handle mask prompts
321
+ if mask_inputs is not None:
322
+ # If mask_inputs is provided, downsize it into low-res mask input if needed
323
+ # and feed it as a dense mask prompt into the SAM mask encoder
324
+ assert len(mask_inputs.shape) == 4 and mask_inputs.shape[:2] == (B, 1)
325
+ if mask_inputs.shape[-2:] != self.sam_prompt_encoder.mask_input_size:
326
+ sam_mask_prompt = F.interpolate(
327
+ mask_inputs.float(),
328
+ size=self.sam_prompt_encoder.mask_input_size,
329
+ align_corners=False,
330
+ mode="bilinear",
331
+ antialias=True, # use antialias for downsampling
332
+ )
333
+ else:
334
+ sam_mask_prompt = mask_inputs
335
+ else:
336
+ # Otherwise, simply feed None (and SAM's prompt encoder will add
337
+ # a learned `no_mask_embed` to indicate no mask input in this case).
338
+ sam_mask_prompt = None
339
+
340
+ sparse_embeddings, dense_embeddings = self.sam_prompt_encoder(
341
+ points=(sam_point_coords, sam_point_labels),
342
+ boxes=None,
343
+ masks=sam_mask_prompt,
344
+ )
345
+ (
346
+ low_res_multimasks,
347
+ ious,
348
+ sam_output_tokens,
349
+ object_score_logits,
350
+ ) = self.sam_mask_decoder(
351
+ image_embeddings=backbone_features,
352
+ image_pe=self.sam_prompt_encoder.get_dense_pe(),
353
+ sparse_prompt_embeddings=sparse_embeddings,
354
+ dense_prompt_embeddings=dense_embeddings,
355
+ multimask_output=multimask_output,
356
+ repeat_image=False, # the image is already batched
357
+ high_res_features=high_res_features,
358
+ )
359
+ if self.pred_obj_scores:
360
+ is_obj_appearing = object_score_logits > 0
361
+
362
+ # Mask used for spatial memories is always a *hard* choice between obj and no obj,
363
+ # consistent with the actual mask prediction
364
+ low_res_multimasks = torch.where(
365
+ is_obj_appearing[:, None, None],
366
+ low_res_multimasks,
367
+ NO_OBJ_SCORE,
368
+ )
369
+
370
+ # convert masks from possibly bfloat16 (or float16) to float32
371
+ # (older PyTorch versions before 2.1 don't support `interpolate` on bf16)
372
+ low_res_multimasks = low_res_multimasks.float()
373
+ high_res_multimasks = F.interpolate(
374
+ low_res_multimasks,
375
+ size=(self.image_size, self.image_size),
376
+ mode="bilinear",
377
+ align_corners=False,
378
+ )
379
+
380
+ sam_output_token = sam_output_tokens[:, 0]
381
+ if multimask_output:
382
+ # take the best mask prediction (with the highest IoU estimation)
383
+ best_iou_inds = torch.argmax(ious, dim=-1)
384
+ batch_inds = torch.arange(B, device=device)
385
+ low_res_masks = low_res_multimasks[batch_inds, best_iou_inds].unsqueeze(1)
386
+ high_res_masks = high_res_multimasks[batch_inds, best_iou_inds].unsqueeze(1)
387
+ if sam_output_tokens.size(1) > 1:
388
+ sam_output_token = sam_output_tokens[batch_inds, best_iou_inds]
389
+ else:
390
+ low_res_masks, high_res_masks = low_res_multimasks, high_res_multimasks
391
+
392
+ # Extract object pointer from the SAM output token (with occlusion handling)
393
+ obj_ptr = self.obj_ptr_proj(sam_output_token)
394
+ if self.pred_obj_scores:
395
+ # Allow *soft* no obj ptr, unlike for masks
396
+ if self.soft_no_obj_ptr:
397
+ lambda_is_obj_appearing = object_score_logits.sigmoid()
398
+ else:
399
+ lambda_is_obj_appearing = is_obj_appearing.float()
400
+
401
+ if self.fixed_no_obj_ptr:
402
+ obj_ptr = lambda_is_obj_appearing * obj_ptr
403
+ obj_ptr = obj_ptr + (1 - lambda_is_obj_appearing) * self.no_obj_ptr
404
+
405
+ return (
406
+ low_res_multimasks,
407
+ high_res_multimasks,
408
+ ious,
409
+ low_res_masks,
410
+ high_res_masks,
411
+ obj_ptr,
412
+ object_score_logits,
413
+ )
414
+
415
+ def _use_mask_as_output(self, backbone_features, high_res_features, mask_inputs):
416
+ """
417
+ Directly turn binary `mask_inputs` into a output mask logits without using SAM.
418
+ (same input and output shapes as in _forward_sam_heads above).
419
+ """
420
+ # Use -10/+10 as logits for neg/pos pixels (very close to 0/1 in prob after sigmoid).
421
+ out_scale, out_bias = 20.0, -10.0 # sigmoid(-10.0)=4.5398e-05
422
+ mask_inputs_float = mask_inputs.float()
423
+ high_res_masks = mask_inputs_float * out_scale + out_bias
424
+ low_res_masks = F.interpolate(
425
+ high_res_masks,
426
+ size=(high_res_masks.size(-2) // 4, high_res_masks.size(-1) // 4),
427
+ align_corners=False,
428
+ mode="bilinear",
429
+ antialias=True, # use antialias for downsampling
430
+ )
431
+ # a dummy IoU prediction of all 1's under mask input
432
+ ious = mask_inputs.new_ones(mask_inputs.size(0), 1).float()
433
+ if not self.use_obj_ptrs_in_encoder:
434
+ # all zeros as a dummy object pointer (of shape [B, C])
435
+ obj_ptr = torch.zeros(
436
+ mask_inputs.size(0), self.hidden_dim, device=mask_inputs.device
437
+ )
438
+ else:
439
+ # produce an object pointer using the SAM decoder from the mask input
440
+ _, _, _, _, _, obj_ptr, _ = self._forward_sam_heads(
441
+ backbone_features=backbone_features,
442
+ mask_inputs=self.mask_downsample(mask_inputs_float),
443
+ high_res_features=high_res_features,
444
+ )
445
+ # In this method, we are treating mask_input as output, e.g. using it directly to create spatial mem;
446
+ # Below, we follow the same design axiom to use mask_input to decide if obj appears or not instead of relying
447
+ # on the object_scores from the SAM decoder.
448
+ is_obj_appearing = torch.any(mask_inputs.flatten(1).float() > 0.0, dim=1)
449
+ is_obj_appearing = is_obj_appearing[..., None]
450
+ lambda_is_obj_appearing = is_obj_appearing.float()
451
+ object_score_logits = out_scale * lambda_is_obj_appearing + out_bias
452
+ if self.pred_obj_scores:
453
+ if self.fixed_no_obj_ptr:
454
+ obj_ptr = lambda_is_obj_appearing * obj_ptr
455
+ obj_ptr = obj_ptr + (1 - lambda_is_obj_appearing) * self.no_obj_ptr
456
+
457
+ return (
458
+ low_res_masks,
459
+ high_res_masks,
460
+ ious,
461
+ low_res_masks,
462
+ high_res_masks,
463
+ obj_ptr,
464
+ object_score_logits,
465
+ )
466
+
467
+ def forward_image(self, img_batch: torch.Tensor):
468
+ """Get the image feature on the input batch."""
469
+ backbone_out = self.image_encoder(img_batch)
470
+ if self.use_high_res_features_in_sam:
471
+ # precompute projected level 0 and level 1 features in SAM decoder
472
+ # to avoid running it again on every SAM click
473
+ backbone_out["backbone_fpn"][0] = self.sam_mask_decoder.conv_s0(
474
+ backbone_out["backbone_fpn"][0]
475
+ )
476
+ backbone_out["backbone_fpn"][1] = self.sam_mask_decoder.conv_s1(
477
+ backbone_out["backbone_fpn"][1]
478
+ )
479
+ return backbone_out
480
+
481
+ def _prepare_backbone_features(self, backbone_out):
482
+ """Prepare and flatten visual features."""
483
+ backbone_out = backbone_out.copy()
484
+ assert len(backbone_out["backbone_fpn"]) == len(backbone_out["vision_pos_enc"])
485
+ assert len(backbone_out["backbone_fpn"]) >= self.num_feature_levels
486
+
487
+ feature_maps = backbone_out["backbone_fpn"][-self.num_feature_levels :]
488
+ vision_pos_embeds = backbone_out["vision_pos_enc"][-self.num_feature_levels :]
489
+
490
+ feat_sizes = [(x.shape[-2], x.shape[-1]) for x in vision_pos_embeds]
491
+ # flatten NxCxHxW to HWxNxC
492
+ vision_feats = [x.flatten(2).permute(2, 0, 1) for x in feature_maps]
493
+ vision_pos_embeds = [x.flatten(2).permute(2, 0, 1) for x in vision_pos_embeds]
494
+
495
+ return backbone_out, vision_feats, vision_pos_embeds, feat_sizes
496
+
497
+ def _prepare_memory_conditioned_features(
498
+ self,
499
+ frame_idx,
500
+ is_init_cond_frame,
501
+ current_vision_feats,
502
+ current_vision_pos_embeds,
503
+ feat_sizes,
504
+ output_dict,
505
+ num_frames,
506
+ track_in_reverse=False, # tracking in reverse time order (for demo usage)
507
+ ):
508
+ """Fuse the current frame's visual feature map with previous memory."""
509
+ B = current_vision_feats[-1].size(1) # batch size on this frame
510
+ C = self.hidden_dim
511
+ H, W = feat_sizes[-1] # top-level (lowest-resolution) feature size
512
+ device = current_vision_feats[-1].device
513
+ # The case of `self.num_maskmem == 0` below is primarily used for reproducing SAM on images.
514
+ # In this case, we skip the fusion with any memory.
515
+ if self.num_maskmem == 0: # Disable memory and skip fusion
516
+ pix_feat = current_vision_feats[-1].permute(1, 2, 0).view(B, C, H, W)
517
+ return pix_feat
518
+
519
+ num_obj_ptr_tokens = 0
520
+ tpos_sign_mul = -1 if track_in_reverse else 1
521
+ # Step 1: condition the visual features of the current frame on previous memories
522
+ if not is_init_cond_frame:
523
+ # Retrieve the memories encoded with the maskmem backbone
524
+ to_cat_memory, to_cat_memory_pos_embed = [], []
525
+ # Add conditioning frames's output first (all cond frames have t_pos=0 for
526
+ # when getting temporal positional embedding below)
527
+ assert len(output_dict["cond_frame_outputs"]) > 0
528
+ # Select a maximum number of temporally closest cond frames for cross attention
529
+ cond_outputs = output_dict["cond_frame_outputs"]
530
+ selected_cond_outputs, unselected_cond_outputs = select_closest_cond_frames(
531
+ frame_idx, cond_outputs, self.max_cond_frames_in_attn
532
+ )
533
+ t_pos_and_prevs = [(0, out) for out in selected_cond_outputs.values()]
534
+ # Add last (self.num_maskmem - 1) frames before current frame for non-conditioning memory
535
+ # the earliest one has t_pos=1 and the latest one has t_pos=self.num_maskmem-1
536
+ # We also allow taking the memory frame non-consecutively (with stride>1), in which case
537
+ # we take (self.num_maskmem - 2) frames among every stride-th frames plus the last frame.
538
+ stride = 1 if self.training else self.memory_temporal_stride_for_eval
539
+ for t_pos in range(1, self.num_maskmem):
540
+ t_rel = self.num_maskmem - t_pos # how many frames before current frame
541
+ if t_rel == 1:
542
+ # for t_rel == 1, we take the last frame (regardless of r)
543
+ if not track_in_reverse:
544
+ # the frame immediately before this frame (i.e. frame_idx - 1)
545
+ prev_frame_idx = frame_idx - t_rel
546
+ else:
547
+ # the frame immediately after this frame (i.e. frame_idx + 1)
548
+ prev_frame_idx = frame_idx + t_rel
549
+ else:
550
+ # for t_rel >= 2, we take the memory frame from every r-th frames
551
+ if not track_in_reverse:
552
+ # first find the nearest frame among every r-th frames before this frame
553
+ # for r=1, this would be (frame_idx - 2)
554
+ prev_frame_idx = ((frame_idx - 2) // stride) * stride
555
+ # then seek further among every r-th frames
556
+ prev_frame_idx = prev_frame_idx - (t_rel - 2) * stride
557
+ else:
558
+ # first find the nearest frame among every r-th frames after this frame
559
+ # for r=1, this would be (frame_idx + 2)
560
+ prev_frame_idx = -(-(frame_idx + 2) // stride) * stride
561
+ # then seek further among every r-th frames
562
+ prev_frame_idx = prev_frame_idx + (t_rel - 2) * stride
563
+ out = output_dict["non_cond_frame_outputs"].get(prev_frame_idx, None)
564
+ if out is None:
565
+ # If an unselected conditioning frame is among the last (self.num_maskmem - 1)
566
+ # frames, we still attend to it as if it's a non-conditioning frame.
567
+ out = unselected_cond_outputs.get(prev_frame_idx, None)
568
+ t_pos_and_prevs.append((t_pos, out))
569
+
570
+ for t_pos, prev in t_pos_and_prevs:
571
+ if prev is None:
572
+ continue # skip padding frames
573
+ # "maskmem_features" might have been offloaded to CPU in demo use cases,
574
+ # so we load it back to GPU (it's a no-op if it's already on GPU).
575
+ feats = prev["maskmem_features"].to(device, non_blocking=True)
576
+ to_cat_memory.append(feats.flatten(2).permute(2, 0, 1))
577
+ # Spatial positional encoding (it might have been offloaded to CPU in eval)
578
+ maskmem_enc = prev["maskmem_pos_enc"][-1].to(device)
579
+ maskmem_enc = maskmem_enc.flatten(2).permute(2, 0, 1)
580
+ # Temporal positional encoding
581
+ maskmem_enc = (
582
+ maskmem_enc + self.maskmem_tpos_enc[self.num_maskmem - t_pos - 1]
583
+ )
584
+ to_cat_memory_pos_embed.append(maskmem_enc)
585
+
586
+ # Construct the list of past object pointers
587
+ if self.use_obj_ptrs_in_encoder:
588
+ max_obj_ptrs_in_encoder = min(num_frames, self.max_obj_ptrs_in_encoder)
589
+ # First add those object pointers from selected conditioning frames
590
+ # (optionally, only include object pointers in the past during evaluation)
591
+ if not self.training and self.only_obj_ptrs_in_the_past_for_eval:
592
+ ptr_cond_outputs = {
593
+ t: out
594
+ for t, out in selected_cond_outputs.items()
595
+ if (t >= frame_idx if track_in_reverse else t <= frame_idx)
596
+ }
597
+ else:
598
+ ptr_cond_outputs = selected_cond_outputs
599
+ pos_and_ptrs = [
600
+ # Temporal pos encoding contains how far away each pointer is from current frame
601
+ (
602
+ (
603
+ (frame_idx - t) * tpos_sign_mul
604
+ if self.use_signed_tpos_enc_to_obj_ptrs
605
+ else abs(frame_idx - t)
606
+ ),
607
+ out["obj_ptr"],
608
+ )
609
+ for t, out in ptr_cond_outputs.items()
610
+ ]
611
+ # Add up to (max_obj_ptrs_in_encoder - 1) non-conditioning frames before current frame
612
+ for t_diff in range(1, max_obj_ptrs_in_encoder):
613
+ t = frame_idx + t_diff if track_in_reverse else frame_idx - t_diff
614
+ if t < 0 or (num_frames is not None and t >= num_frames):
615
+ break
616
+ out = output_dict["non_cond_frame_outputs"].get(
617
+ t, unselected_cond_outputs.get(t, None)
618
+ )
619
+ if out is not None:
620
+ pos_and_ptrs.append((t_diff, out["obj_ptr"]))
621
+ # If we have at least one object pointer, add them to the across attention
622
+ if len(pos_and_ptrs) > 0:
623
+ pos_list, ptrs_list = zip(*pos_and_ptrs)
624
+ # stack object pointers along dim=0 into [ptr_seq_len, B, C] shape
625
+ obj_ptrs = torch.stack(ptrs_list, dim=0)
626
+ # a temporal positional embedding based on how far each object pointer is from
627
+ # the current frame (sine embedding normalized by the max pointer num).
628
+ if self.add_tpos_enc_to_obj_ptrs:
629
+ t_diff_max = max_obj_ptrs_in_encoder - 1
630
+ tpos_dim = C if self.proj_tpos_enc_in_obj_ptrs else self.mem_dim
631
+ obj_pos = torch.tensor(pos_list, device=device)
632
+ obj_pos = get_1d_sine_pe(obj_pos / t_diff_max, dim=tpos_dim)
633
+ obj_pos = self.obj_ptr_tpos_proj(obj_pos)
634
+ obj_pos = obj_pos.unsqueeze(1).expand(-1, B, self.mem_dim)
635
+ else:
636
+ obj_pos = obj_ptrs.new_zeros(len(pos_list), B, self.mem_dim)
637
+ if self.mem_dim < C:
638
+ # split a pointer into (C // self.mem_dim) tokens for self.mem_dim < C
639
+ obj_ptrs = obj_ptrs.reshape(
640
+ -1, B, C // self.mem_dim, self.mem_dim
641
+ )
642
+ obj_ptrs = obj_ptrs.permute(0, 2, 1, 3).flatten(0, 1)
643
+ obj_pos = obj_pos.repeat_interleave(C // self.mem_dim, dim=0)
644
+ to_cat_memory.append(obj_ptrs)
645
+ to_cat_memory_pos_embed.append(obj_pos)
646
+ num_obj_ptr_tokens = obj_ptrs.shape[0]
647
+ else:
648
+ num_obj_ptr_tokens = 0
649
+ else:
650
+ # for initial conditioning frames, encode them without using any previous memory
651
+ if self.directly_add_no_mem_embed:
652
+ # directly add no-mem embedding (instead of using the transformer encoder)
653
+ pix_feat_with_mem = current_vision_feats[-1] + self.no_mem_embed
654
+ pix_feat_with_mem = pix_feat_with_mem.permute(1, 2, 0).view(B, C, H, W)
655
+ return pix_feat_with_mem
656
+
657
+ # Use a dummy token on the first frame (to avoid empty memory input to tranformer encoder)
658
+ to_cat_memory = [self.no_mem_embed.expand(1, B, self.mem_dim)]
659
+ to_cat_memory_pos_embed = [self.no_mem_pos_enc.expand(1, B, self.mem_dim)]
660
+
661
+ # Step 2: Concatenate the memories and forward through the transformer encoder
662
+ memory = torch.cat(to_cat_memory, dim=0)
663
+ memory_pos_embed = torch.cat(to_cat_memory_pos_embed, dim=0)
664
+
665
+ pix_feat_with_mem = self.memory_attention(
666
+ curr=current_vision_feats,
667
+ curr_pos=current_vision_pos_embeds,
668
+ memory=memory,
669
+ memory_pos=memory_pos_embed,
670
+ num_obj_ptr_tokens=num_obj_ptr_tokens,
671
+ )
672
+ # reshape the output (HW)BC => BCHW
673
+ pix_feat_with_mem = pix_feat_with_mem.permute(1, 2, 0).view(B, C, H, W)
674
+ return pix_feat_with_mem
675
+
676
+ def _encode_new_memory(
677
+ self,
678
+ current_vision_feats,
679
+ feat_sizes,
680
+ pred_masks_high_res,
681
+ object_score_logits,
682
+ is_mask_from_pts,
683
+ ):
684
+ """Encode the current image and its prediction into a memory feature."""
685
+ B = current_vision_feats[-1].size(1) # batch size on this frame
686
+ C = self.hidden_dim
687
+ H, W = feat_sizes[-1] # top-level (lowest-resolution) feature size
688
+ # top-level feature, (HW)BC => BCHW
689
+ pix_feat = current_vision_feats[-1].permute(1, 2, 0).view(B, C, H, W)
690
+ if self.non_overlap_masks_for_mem_enc and not self.training:
691
+ # optionally, apply non-overlapping constraints to the masks (it's applied
692
+ # in the batch dimension and should only be used during eval, where all
693
+ # the objects come from the same video under batch size 1).
694
+ pred_masks_high_res = self._apply_non_overlapping_constraints(
695
+ pred_masks_high_res
696
+ )
697
+ # scale the raw mask logits with a temperature before applying sigmoid
698
+ binarize = self.binarize_mask_from_pts_for_mem_enc and is_mask_from_pts
699
+ if binarize and not self.training:
700
+ mask_for_mem = (pred_masks_high_res > 0).float()
701
+ else:
702
+ # apply sigmoid on the raw mask logits to turn them into range (0, 1)
703
+ mask_for_mem = torch.sigmoid(pred_masks_high_res)
704
+ # apply scale and bias terms to the sigmoid probabilities
705
+ if self.sigmoid_scale_for_mem_enc != 1.0:
706
+ mask_for_mem = mask_for_mem * self.sigmoid_scale_for_mem_enc
707
+ if self.sigmoid_bias_for_mem_enc != 0.0:
708
+ mask_for_mem = mask_for_mem + self.sigmoid_bias_for_mem_enc
709
+ maskmem_out = self.memory_encoder(
710
+ pix_feat, mask_for_mem, skip_mask_sigmoid=True # sigmoid already applied
711
+ )
712
+ maskmem_features = maskmem_out["vision_features"]
713
+ maskmem_pos_enc = maskmem_out["vision_pos_enc"]
714
+ # add a no-object embedding to the spatial memory to indicate that the frame
715
+ # is predicted to be occluded (i.e. no object is appearing in the frame)
716
+ if self.no_obj_embed_spatial is not None:
717
+ is_obj_appearing = (object_score_logits > 0).float()
718
+ maskmem_features += (
719
+ 1 - is_obj_appearing[..., None, None]
720
+ ) * self.no_obj_embed_spatial[..., None, None].expand(
721
+ *maskmem_features.shape
722
+ )
723
+
724
+ return maskmem_features, maskmem_pos_enc
725
+
726
+ def _track_step(
727
+ self,
728
+ frame_idx,
729
+ is_init_cond_frame,
730
+ current_vision_feats,
731
+ current_vision_pos_embeds,
732
+ feat_sizes,
733
+ point_inputs,
734
+ mask_inputs,
735
+ output_dict,
736
+ num_frames,
737
+ track_in_reverse,
738
+ prev_sam_mask_logits,
739
+ ):
740
+ current_out = {"point_inputs": point_inputs, "mask_inputs": mask_inputs}
741
+ # High-resolution feature maps for the SAM head, reshape (HW)BC => BCHW
742
+ if len(current_vision_feats) > 1:
743
+ high_res_features = [
744
+ x.permute(1, 2, 0).view(x.size(1), x.size(2), *s)
745
+ for x, s in zip(current_vision_feats[:-1], feat_sizes[:-1])
746
+ ]
747
+ else:
748
+ high_res_features = None
749
+ if mask_inputs is not None and self.use_mask_input_as_output_without_sam:
750
+ # When use_mask_input_as_output_without_sam=True, we directly output the mask input
751
+ # (see it as a GT mask) without using a SAM prompt encoder + mask decoder.
752
+ pix_feat = current_vision_feats[-1].permute(1, 2, 0)
753
+ pix_feat = pix_feat.view(-1, self.hidden_dim, *feat_sizes[-1])
754
+ sam_outputs = self._use_mask_as_output(
755
+ pix_feat, high_res_features, mask_inputs
756
+ )
757
+ else:
758
+ # fused the visual feature with previous memory features in the memory bank
759
+ pix_feat = self._prepare_memory_conditioned_features(
760
+ frame_idx=frame_idx,
761
+ is_init_cond_frame=is_init_cond_frame,
762
+ current_vision_feats=current_vision_feats[-1:],
763
+ current_vision_pos_embeds=current_vision_pos_embeds[-1:],
764
+ feat_sizes=feat_sizes[-1:],
765
+ output_dict=output_dict,
766
+ num_frames=num_frames,
767
+ track_in_reverse=track_in_reverse,
768
+ )
769
+ # apply SAM-style segmentation head
770
+ # here we might feed previously predicted low-res SAM mask logits into the SAM mask decoder,
771
+ # e.g. in demo where such logits come from earlier interaction instead of correction sampling
772
+ # (in this case, any `mask_inputs` shouldn't reach here as they are sent to _use_mask_as_output instead)
773
+ if prev_sam_mask_logits is not None:
774
+ assert point_inputs is not None and mask_inputs is None
775
+ mask_inputs = prev_sam_mask_logits
776
+ multimask_output = self._use_multimask(is_init_cond_frame, point_inputs)
777
+ sam_outputs = self._forward_sam_heads(
778
+ backbone_features=pix_feat,
779
+ point_inputs=point_inputs,
780
+ mask_inputs=mask_inputs,
781
+ high_res_features=high_res_features,
782
+ multimask_output=multimask_output,
783
+ )
784
+
785
+ return current_out, sam_outputs, high_res_features, pix_feat
786
+
787
+ def _encode_memory_in_output(
788
+ self,
789
+ current_vision_feats,
790
+ feat_sizes,
791
+ point_inputs,
792
+ run_mem_encoder,
793
+ high_res_masks,
794
+ object_score_logits,
795
+ current_out,
796
+ ):
797
+ if run_mem_encoder and self.num_maskmem > 0:
798
+ high_res_masks_for_mem_enc = high_res_masks
799
+ maskmem_features, maskmem_pos_enc = self._encode_new_memory(
800
+ current_vision_feats=current_vision_feats,
801
+ feat_sizes=feat_sizes,
802
+ pred_masks_high_res=high_res_masks_for_mem_enc,
803
+ object_score_logits=object_score_logits,
804
+ is_mask_from_pts=(point_inputs is not None),
805
+ )
806
+ current_out["maskmem_features"] = maskmem_features
807
+ current_out["maskmem_pos_enc"] = maskmem_pos_enc
808
+ else:
809
+ current_out["maskmem_features"] = None
810
+ current_out["maskmem_pos_enc"] = None
811
+
812
+ def track_step(
813
+ self,
814
+ frame_idx,
815
+ is_init_cond_frame,
816
+ current_vision_feats,
817
+ current_vision_pos_embeds,
818
+ feat_sizes,
819
+ point_inputs,
820
+ mask_inputs,
821
+ output_dict,
822
+ num_frames,
823
+ track_in_reverse=False, # tracking in reverse time order (for demo usage)
824
+ # Whether to run the memory encoder on the predicted masks. Sometimes we might want
825
+ # to skip the memory encoder with `run_mem_encoder=False`. For example,
826
+ # in demo we might call `track_step` multiple times for each user click,
827
+ # and only encode the memory when the user finalizes their clicks. And in ablation
828
+ # settings like SAM training on static images, we don't need the memory encoder.
829
+ run_mem_encoder=True,
830
+ # The previously predicted SAM mask logits (which can be fed together with new clicks in demo).
831
+ prev_sam_mask_logits=None,
832
+ ):
833
+ current_out, sam_outputs, _, _ = self._track_step(
834
+ frame_idx,
835
+ is_init_cond_frame,
836
+ current_vision_feats,
837
+ current_vision_pos_embeds,
838
+ feat_sizes,
839
+ point_inputs,
840
+ mask_inputs,
841
+ output_dict,
842
+ num_frames,
843
+ track_in_reverse,
844
+ prev_sam_mask_logits,
845
+ )
846
+
847
+ (
848
+ _,
849
+ _,
850
+ _,
851
+ low_res_masks,
852
+ high_res_masks,
853
+ obj_ptr,
854
+ object_score_logits,
855
+ ) = sam_outputs
856
+
857
+ current_out["pred_masks"] = low_res_masks
858
+ current_out["pred_masks_high_res"] = high_res_masks
859
+ current_out["obj_ptr"] = obj_ptr
860
+ if not self.training:
861
+ # Only add this in inference (to avoid unused param in activation checkpointing;
862
+ # it's mainly used in the demo to encode spatial memories w/ consolidated masks)
863
+ current_out["object_score_logits"] = object_score_logits
864
+
865
+ # Finally run the memory encoder on the predicted mask to encode
866
+ # it into a new memory feature (that can be used in future frames)
867
+ self._encode_memory_in_output(
868
+ current_vision_feats,
869
+ feat_sizes,
870
+ point_inputs,
871
+ run_mem_encoder,
872
+ high_res_masks,
873
+ object_score_logits,
874
+ current_out,
875
+ )
876
+
877
+ return current_out
878
+
879
+ def _use_multimask(self, is_init_cond_frame, point_inputs):
880
+ """Whether to use multimask output in the SAM head."""
881
+ num_pts = 0 if point_inputs is None else point_inputs["point_labels"].size(1)
882
+ multimask_output = (
883
+ self.multimask_output_in_sam
884
+ and (is_init_cond_frame or self.multimask_output_for_tracking)
885
+ and (self.multimask_min_pt_num <= num_pts <= self.multimask_max_pt_num)
886
+ )
887
+ return multimask_output
888
+
889
+ def _apply_non_overlapping_constraints(self, pred_masks):
890
+ """
891
+ Apply non-overlapping constraints to the object scores in pred_masks. Here we
892
+ keep only the highest scoring object at each spatial location in pred_masks.
893
+ """
894
+ batch_size = pred_masks.size(0)
895
+ if batch_size == 1:
896
+ return pred_masks
897
+
898
+ device = pred_masks.device
899
+ # "max_obj_inds": object index of the object with the highest score at each location
900
+ max_obj_inds = torch.argmax(pred_masks, dim=0, keepdim=True)
901
+ # "batch_obj_inds": object index of each object slice (along dim 0) in `pred_masks`
902
+ batch_obj_inds = torch.arange(batch_size, device=device)[:, None, None, None]
903
+ keep = max_obj_inds == batch_obj_inds
904
+ # suppress overlapping regions' scores below -10.0 so that the foreground regions
905
+ # don't overlap (here sigmoid(-10.0)=4.5398e-05)
906
+ pred_masks = torch.where(keep, pred_masks, torch.clamp(pred_masks, max=-10.0))
907
+ return pred_masks
custom_nodes/comfyui-segment-anything-2/sam2/modeling/sam2_utils.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+
8
+ import copy
9
+ from typing import Tuple
10
+
11
+ import numpy as np
12
+ import torch
13
+ import torch.nn as nn
14
+ import torch.nn.functional as F
15
+
16
+ from ..utils.misc import mask_to_box
17
+
18
+
19
+ def select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num):
20
+ """
21
+ Select up to `max_cond_frame_num` conditioning frames from `cond_frame_outputs`
22
+ that are temporally closest to the current frame at `frame_idx`. Here, we take
23
+ - a) the closest conditioning frame before `frame_idx` (if any);
24
+ - b) the closest conditioning frame after `frame_idx` (if any);
25
+ - c) any other temporally closest conditioning frames until reaching a total
26
+ of `max_cond_frame_num` conditioning frames.
27
+
28
+ Outputs:
29
+ - selected_outputs: selected items (keys & values) from `cond_frame_outputs`.
30
+ - unselected_outputs: items (keys & values) not selected in `cond_frame_outputs`.
31
+ """
32
+ if max_cond_frame_num == -1 or len(cond_frame_outputs) <= max_cond_frame_num:
33
+ selected_outputs = cond_frame_outputs
34
+ unselected_outputs = {}
35
+ else:
36
+ assert max_cond_frame_num >= 2, "we should allow using 2+ conditioning frames"
37
+ selected_outputs = {}
38
+
39
+ # the closest conditioning frame before `frame_idx` (if any)
40
+ idx_before = max((t for t in cond_frame_outputs if t < frame_idx), default=None)
41
+ if idx_before is not None:
42
+ selected_outputs[idx_before] = cond_frame_outputs[idx_before]
43
+
44
+ # the closest conditioning frame after `frame_idx` (if any)
45
+ idx_after = min((t for t in cond_frame_outputs if t >= frame_idx), default=None)
46
+ if idx_after is not None:
47
+ selected_outputs[idx_after] = cond_frame_outputs[idx_after]
48
+
49
+ # add other temporally closest conditioning frames until reaching a total
50
+ # of `max_cond_frame_num` conditioning frames.
51
+ num_remain = max_cond_frame_num - len(selected_outputs)
52
+ inds_remain = sorted(
53
+ (t for t in cond_frame_outputs if t not in selected_outputs),
54
+ key=lambda x: abs(x - frame_idx),
55
+ )[:num_remain]
56
+ selected_outputs.update((t, cond_frame_outputs[t]) for t in inds_remain)
57
+ unselected_outputs = {
58
+ t: v for t, v in cond_frame_outputs.items() if t not in selected_outputs
59
+ }
60
+
61
+ return selected_outputs, unselected_outputs
62
+
63
+
64
+ def get_1d_sine_pe(pos_inds, dim, temperature=10000):
65
+ """
66
+ Get 1D sine positional embedding as in the original Transformer paper.
67
+ """
68
+ pe_dim = dim // 2
69
+ dim_t = torch.arange(pe_dim, dtype=torch.float32, device=pos_inds.device)
70
+ dim_t = temperature ** (2 * (dim_t // 2) / pe_dim)
71
+
72
+ pos_embed = pos_inds.unsqueeze(-1) / dim_t
73
+ pos_embed = torch.cat([pos_embed.sin(), pos_embed.cos()], dim=-1)
74
+ return pos_embed
75
+
76
+
77
+ def get_activation_fn(activation):
78
+ """Return an activation function given a string"""
79
+ if activation == "relu":
80
+ return F.relu
81
+ if activation == "gelu":
82
+ return F.gelu
83
+ if activation == "glu":
84
+ return F.glu
85
+ raise RuntimeError(f"activation should be relu/gelu, not {activation}.")
86
+
87
+
88
+ def get_clones(module, N):
89
+ return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
90
+
91
+
92
+ class DropPath(nn.Module):
93
+ # adapted from https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/drop.py
94
+ def __init__(self, drop_prob=0.0, scale_by_keep=True):
95
+ super(DropPath, self).__init__()
96
+ self.drop_prob = drop_prob
97
+ self.scale_by_keep = scale_by_keep
98
+
99
+ def forward(self, x):
100
+ if self.drop_prob == 0.0 or not self.training:
101
+ return x
102
+ keep_prob = 1 - self.drop_prob
103
+ shape = (x.shape[0],) + (1,) * (x.ndim - 1)
104
+ random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
105
+ if keep_prob > 0.0 and self.scale_by_keep:
106
+ random_tensor.div_(keep_prob)
107
+ return x * random_tensor
108
+
109
+
110
+ # Lightly adapted from
111
+ # https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py # noqa
112
+ class MLP(nn.Module):
113
+ def __init__(
114
+ self,
115
+ input_dim: int,
116
+ hidden_dim: int,
117
+ output_dim: int,
118
+ num_layers: int,
119
+ activation: nn.Module = nn.ReLU,
120
+ sigmoid_output: bool = False,
121
+ ) -> None:
122
+ super().__init__()
123
+ self.num_layers = num_layers
124
+ h = [hidden_dim] * (num_layers - 1)
125
+ self.layers = nn.ModuleList(
126
+ nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
127
+ )
128
+ self.sigmoid_output = sigmoid_output
129
+ self.act = activation()
130
+
131
+ def forward(self, x):
132
+ for i, layer in enumerate(self.layers):
133
+ x = self.act(layer(x)) if i < self.num_layers - 1 else layer(x)
134
+ if self.sigmoid_output:
135
+ x = F.sigmoid(x)
136
+ return x
137
+
138
+
139
+ # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
140
+ # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa
141
+ class LayerNorm2d(nn.Module):
142
+ def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
143
+ super().__init__()
144
+ self.weight = nn.Parameter(torch.ones(num_channels))
145
+ self.bias = nn.Parameter(torch.zeros(num_channels))
146
+ self.eps = eps
147
+
148
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
149
+ u = x.mean(1, keepdim=True)
150
+ s = (x - u).pow(2).mean(1, keepdim=True)
151
+ x = (x - u) / torch.sqrt(s + self.eps)
152
+ x = self.weight[:, None, None] * x + self.bias[:, None, None]
153
+ return x
154
+
155
+
156
+ def sample_box_points(
157
+ masks: torch.Tensor,
158
+ noise: float = 0.1, # SAM default
159
+ noise_bound: int = 20, # SAM default
160
+ top_left_label: int = 2,
161
+ bottom_right_label: int = 3,
162
+ ) -> Tuple[np.array, np.array]:
163
+ """
164
+ Sample a noised version of the top left and bottom right corners of a given `bbox`
165
+
166
+ Inputs:
167
+ - masks: [B, 1, H,W] boxes, dtype=torch.Tensor
168
+ - noise: noise as a fraction of box width and height, dtype=float
169
+ - noise_bound: maximum amount of noise (in pure pixesl), dtype=int
170
+
171
+ Returns:
172
+ - box_coords: [B, num_pt, 2], contains (x, y) coordinates of top left and bottom right box corners, dtype=torch.float
173
+ - box_labels: [B, num_pt], label 2 is reserverd for top left and 3 for bottom right corners, dtype=torch.int32
174
+ """
175
+ device = masks.device
176
+ box_coords = mask_to_box(masks)
177
+ B, _, H, W = masks.shape
178
+ box_labels = torch.tensor(
179
+ [top_left_label, bottom_right_label], dtype=torch.int, device=device
180
+ ).repeat(B)
181
+ if noise > 0.0:
182
+ if not isinstance(noise_bound, torch.Tensor):
183
+ noise_bound = torch.tensor(noise_bound, device=device)
184
+ bbox_w = box_coords[..., 2] - box_coords[..., 0]
185
+ bbox_h = box_coords[..., 3] - box_coords[..., 1]
186
+ max_dx = torch.min(bbox_w * noise, noise_bound)
187
+ max_dy = torch.min(bbox_h * noise, noise_bound)
188
+ box_noise = 2 * torch.rand(B, 1, 4, device=device) - 1
189
+ box_noise = box_noise * torch.stack((max_dx, max_dy, max_dx, max_dy), dim=-1)
190
+
191
+ box_coords = box_coords + box_noise
192
+ img_bounds = (
193
+ torch.tensor([W, H, W, H], device=device) - 1
194
+ ) # uncentered pixel coords
195
+ box_coords.clamp_(torch.zeros_like(img_bounds), img_bounds) # In place clamping
196
+
197
+ box_coords = box_coords.reshape(-1, 2, 2) # always 2 points
198
+ box_labels = box_labels.reshape(-1, 2)
199
+ return box_coords, box_labels
200
+
201
+
202
+ def sample_random_points_from_errors(gt_masks, pred_masks, num_pt=1):
203
+ """
204
+ Sample `num_pt` random points (along with their labels) independently from the error regions.
205
+
206
+ Inputs:
207
+ - gt_masks: [B, 1, H_im, W_im] masks, dtype=torch.bool
208
+ - pred_masks: [B, 1, H_im, W_im] masks, dtype=torch.bool or None
209
+ - num_pt: int, number of points to sample independently for each of the B error maps
210
+
211
+ Outputs:
212
+ - points: [B, num_pt, 2], dtype=torch.float, contains (x, y) coordinates of each sampled point
213
+ - labels: [B, num_pt], dtype=torch.int32, where 1 means positive clicks and 0 means
214
+ negative clicks
215
+ """
216
+ if pred_masks is None: # if pred_masks is not provided, treat it as empty
217
+ pred_masks = torch.zeros_like(gt_masks)
218
+ assert gt_masks.dtype == torch.bool and gt_masks.size(1) == 1
219
+ assert pred_masks.dtype == torch.bool and pred_masks.shape == gt_masks.shape
220
+ assert num_pt >= 0
221
+
222
+ B, _, H_im, W_im = gt_masks.shape
223
+ device = gt_masks.device
224
+
225
+ # false positive region, a new point sampled in this region should have
226
+ # negative label to correct the FP error
227
+ fp_masks = ~gt_masks & pred_masks
228
+ # false negative region, a new point sampled in this region should have
229
+ # positive label to correct the FN error
230
+ fn_masks = gt_masks & ~pred_masks
231
+ # whether the prediction completely match the ground-truth on each mask
232
+ all_correct = torch.all((gt_masks == pred_masks).flatten(2), dim=2)
233
+ all_correct = all_correct[..., None, None]
234
+
235
+ # channel 0 is FP map, while channel 1 is FN map
236
+ pts_noise = torch.rand(B, num_pt, H_im, W_im, 2, device=device)
237
+ # sample a negative new click from FP region or a positive new click
238
+ # from FN region, depend on where the maximum falls,
239
+ # and in case the predictions are all correct (no FP or FN), we just
240
+ # sample a negative click from the background region
241
+ pts_noise[..., 0] *= fp_masks | (all_correct & ~gt_masks)
242
+ pts_noise[..., 1] *= fn_masks
243
+ pts_idx = pts_noise.flatten(2).argmax(dim=2)
244
+ labels = (pts_idx % 2).to(torch.int32)
245
+ pts_idx = pts_idx // 2
246
+ pts_x = pts_idx % W_im
247
+ pts_y = pts_idx // W_im
248
+ points = torch.stack([pts_x, pts_y], dim=2).to(torch.float)
249
+ return points, labels
250
+
251
+
252
+ def sample_one_point_from_error_center(gt_masks, pred_masks, padding=True):
253
+ """
254
+ Sample 1 random point (along with its label) from the center of each error region,
255
+ that is, the point with the largest distance to the boundary of each error region.
256
+ This is the RITM sampling method from https://github.com/saic-vul/ritm_interactive_segmentation/blob/master/isegm/inference/clicker.py
257
+
258
+ Inputs:
259
+ - gt_masks: [B, 1, H_im, W_im] masks, dtype=torch.bool
260
+ - pred_masks: [B, 1, H_im, W_im] masks, dtype=torch.bool or None
261
+ - padding: if True, pad with boundary of 1 px for distance transform
262
+
263
+ Outputs:
264
+ - points: [B, 1, 2], dtype=torch.float, contains (x, y) coordinates of each sampled point
265
+ - labels: [B, 1], dtype=torch.int32, where 1 means positive clicks and 0 means negative clicks
266
+ """
267
+ import cv2
268
+
269
+ if pred_masks is None:
270
+ pred_masks = torch.zeros_like(gt_masks)
271
+ assert gt_masks.dtype == torch.bool and gt_masks.size(1) == 1
272
+ assert pred_masks.dtype == torch.bool and pred_masks.shape == gt_masks.shape
273
+
274
+ B, _, _, W_im = gt_masks.shape
275
+ device = gt_masks.device
276
+
277
+ # false positive region, a new point sampled in this region should have
278
+ # negative label to correct the FP error
279
+ fp_masks = ~gt_masks & pred_masks
280
+ # false negative region, a new point sampled in this region should have
281
+ # positive label to correct the FN error
282
+ fn_masks = gt_masks & ~pred_masks
283
+
284
+ fp_masks = fp_masks.cpu().numpy()
285
+ fn_masks = fn_masks.cpu().numpy()
286
+ points = torch.zeros(B, 1, 2, dtype=torch.float)
287
+ labels = torch.ones(B, 1, dtype=torch.int32)
288
+ for b in range(B):
289
+ fn_mask = fn_masks[b, 0]
290
+ fp_mask = fp_masks[b, 0]
291
+ if padding:
292
+ fn_mask = np.pad(fn_mask, ((1, 1), (1, 1)), "constant")
293
+ fp_mask = np.pad(fp_mask, ((1, 1), (1, 1)), "constant")
294
+ # compute the distance of each point in FN/FP region to its boundary
295
+ fn_mask_dt = cv2.distanceTransform(fn_mask.astype(np.uint8), cv2.DIST_L2, 0)
296
+ fp_mask_dt = cv2.distanceTransform(fp_mask.astype(np.uint8), cv2.DIST_L2, 0)
297
+ if padding:
298
+ fn_mask_dt = fn_mask_dt[1:-1, 1:-1]
299
+ fp_mask_dt = fp_mask_dt[1:-1, 1:-1]
300
+
301
+ # take the point in FN/FP region with the largest distance to its boundary
302
+ fn_mask_dt_flat = fn_mask_dt.reshape(-1)
303
+ fp_mask_dt_flat = fp_mask_dt.reshape(-1)
304
+ fn_argmax = np.argmax(fn_mask_dt_flat)
305
+ fp_argmax = np.argmax(fp_mask_dt_flat)
306
+ is_positive = fn_mask_dt_flat[fn_argmax] > fp_mask_dt_flat[fp_argmax]
307
+ pt_idx = fn_argmax if is_positive else fp_argmax
308
+ points[b, 0, 0] = pt_idx % W_im # x
309
+ points[b, 0, 1] = pt_idx // W_im # y
310
+ labels[b, 0] = int(is_positive)
311
+
312
+ points = points.to(device)
313
+ labels = labels.to(device)
314
+ return points, labels
315
+
316
+
317
+ def get_next_point(gt_masks, pred_masks, method):
318
+ if method == "uniform":
319
+ return sample_random_points_from_errors(gt_masks, pred_masks)
320
+ elif method == "center":
321
+ return sample_one_point_from_error_center(gt_masks, pred_masks)
322
+ else:
323
+ raise ValueError(f"unknown sampling method {method}")
custom_nodes/comfyui-segment-anything-2/sam2/sam2_image_predictor.py ADDED
@@ -0,0 +1,446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import logging
8
+
9
+ from typing import List, Optional, Tuple, Union
10
+
11
+ import numpy as np
12
+ import torch
13
+ from PIL.Image import Image
14
+
15
+ from ..sam2.modeling.sam2_base import SAM2Base
16
+
17
+ from ..sam2.utils.transforms import SAM2Transforms
18
+
19
+
20
+ class SAM2ImagePredictor:
21
+ def __init__(
22
+ self,
23
+ sam_model: SAM2Base,
24
+ mask_threshold=0.0,
25
+ max_hole_area=0.0,
26
+ max_sprinkle_area=0.0,
27
+ ) -> None:
28
+ """
29
+ Uses SAM-2 to calculate the image embedding for an image, and then
30
+ allow repeated, efficient mask prediction given prompts.
31
+
32
+ Arguments:
33
+ sam_model (Sam-2): The model to use for mask prediction.
34
+ mask_threshold (float): The threshold to use when converting mask logits
35
+ to binary masks. Masks are thresholded at 0 by default.
36
+ fill_hole_area (int): If fill_hole_area > 0, we fill small holes in up to
37
+ the maximum area of fill_hole_area in low_res_masks.
38
+ """
39
+ super().__init__()
40
+ self.model = sam_model
41
+ self._transforms = SAM2Transforms(
42
+ resolution=self.model.image_size,
43
+ mask_threshold=mask_threshold,
44
+ max_hole_area=max_hole_area,
45
+ max_sprinkle_area=max_sprinkle_area,
46
+ )
47
+
48
+ # Predictor state
49
+ self._is_image_set = False
50
+ self._features = None
51
+ self._orig_hw = None
52
+ # Whether the predictor is set for single image or a batch of images
53
+ self._is_batch = False
54
+
55
+ # Predictor config
56
+ self.mask_threshold = mask_threshold
57
+
58
+ # Spatial dim for backbone feature maps
59
+ self._bb_feat_sizes = [
60
+ (256, 256),
61
+ (128, 128),
62
+ (64, 64),
63
+ ]
64
+
65
+ @torch.no_grad()
66
+ def set_image(
67
+ self,
68
+ image: Union[np.ndarray, Image],
69
+ ) -> None:
70
+ """
71
+ Calculates the image embeddings for the provided image, allowing
72
+ masks to be predicted with the 'predict' method.
73
+
74
+ Arguments:
75
+ image (np.ndarray or PIL Image): The input image to embed in RGB format. The image should be in HWC format if np.ndarray, or WHC format if PIL Image
76
+ with pixel values in [0, 255].
77
+ image_format (str): The color format of the image, in ['RGB', 'BGR'].
78
+ """
79
+ self.reset_predictor()
80
+ # Transform the image to the form expected by the model
81
+ if isinstance(image, np.ndarray):
82
+ #logging.info("For numpy array image, we assume (HxWxC) format")
83
+ self._orig_hw = [image.shape[:2]]
84
+ elif isinstance(image, Image):
85
+ w, h = image.size
86
+ self._orig_hw = [(h, w)]
87
+ else:
88
+ raise NotImplementedError("Image format not supported")
89
+
90
+ input_image = self._transforms(image)
91
+ input_image = input_image[None, ...].to(self.device)
92
+
93
+ assert (
94
+ len(input_image.shape) == 4 and input_image.shape[1] == 3
95
+ ), f"input_image must be of size 1x3xHxW, got {input_image.shape}"
96
+ #logging.info("Computing image embeddings for the provided image...")
97
+ backbone_out = self.model.forward_image(input_image)
98
+ _, vision_feats, _, _ = self.model._prepare_backbone_features(backbone_out)
99
+ # Add no_mem_embed, which is added to the lowest rest feat. map during training on videos
100
+ if self.model.directly_add_no_mem_embed:
101
+ vision_feats[-1] = vision_feats[-1] + self.model.no_mem_embed
102
+
103
+ feats = [
104
+ feat.permute(1, 2, 0).view(1, -1, *feat_size)
105
+ for feat, feat_size in zip(vision_feats[::-1], self._bb_feat_sizes[::-1])
106
+ ][::-1]
107
+ self._features = {"image_embed": feats[-1], "high_res_feats": feats[:-1]}
108
+ self._is_image_set = True
109
+ #logging.info("Image embeddings computed.")
110
+
111
+ @torch.no_grad()
112
+ def set_image_batch(
113
+ self,
114
+ image_list: List[Union[np.ndarray]],
115
+ ) -> None:
116
+ """
117
+ Calculates the image embeddings for the provided image batch, allowing
118
+ masks to be predicted with the 'predict_batch' method.
119
+
120
+ Arguments:
121
+ image_list (List[np.ndarray]): The input images to embed in RGB format. The image should be in HWC format if np.ndarray
122
+ with pixel values in [0, 255].
123
+ """
124
+ self.reset_predictor()
125
+ assert isinstance(image_list, list)
126
+ self._orig_hw = []
127
+ for image in image_list:
128
+ assert isinstance(
129
+ image, np.ndarray
130
+ ), "Images are expected to be an np.ndarray in RGB format, and of shape HWC"
131
+ self._orig_hw.append(image.shape[:2])
132
+ # Transform the image to the form expected by the model
133
+ img_batch = self._transforms.forward_batch(image_list)
134
+ img_batch = img_batch.to(self.device)
135
+ batch_size = img_batch.shape[0]
136
+ assert (
137
+ len(img_batch.shape) == 4 and img_batch.shape[1] == 3
138
+ ), f"img_batch must be of size Bx3xHxW, got {img_batch.shape}"
139
+ logging.info("Computing image embeddings for the provided images...")
140
+ backbone_out = self.model.forward_image(img_batch)
141
+ _, vision_feats, _, _ = self.model._prepare_backbone_features(backbone_out)
142
+ # Add no_mem_embed, which is added to the lowest rest feat. map during training on videos
143
+ if self.model.directly_add_no_mem_embed:
144
+ vision_feats[-1] = vision_feats[-1] + self.model.no_mem_embed
145
+
146
+ feats = [
147
+ feat.permute(1, 2, 0).view(batch_size, -1, *feat_size)
148
+ for feat, feat_size in zip(vision_feats[::-1], self._bb_feat_sizes[::-1])
149
+ ][::-1]
150
+ self._features = {"image_embed": feats[-1], "high_res_feats": feats[:-1]}
151
+ self._is_image_set = True
152
+ self._is_batch = True
153
+ logging.info("Image embeddings computed.")
154
+
155
+ def predict_batch(
156
+ self,
157
+ point_coords_batch: List[np.ndarray] = None,
158
+ point_labels_batch: List[np.ndarray] = None,
159
+ box_batch: List[np.ndarray] = None,
160
+ mask_input_batch: List[np.ndarray] = None,
161
+ multimask_output: bool = True,
162
+ return_logits: bool = False,
163
+ normalize_coords=True,
164
+ ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
165
+ """This function is very similar to predict(...), however it is used for batched mode, when the model is expected to generate predictions on multiple images.
166
+ It returns a tupele of lists of masks, ious, and low_res_masks_logits.
167
+ """
168
+ assert self._is_batch, "This function should only be used when in batched mode"
169
+ if not self._is_image_set:
170
+ raise RuntimeError(
171
+ "An image must be set with .set_image_batch(...) before mask prediction."
172
+ )
173
+ num_images = len(self._features["image_embed"])
174
+ all_masks = []
175
+ all_ious = []
176
+ all_low_res_masks = []
177
+ for img_idx in range(num_images):
178
+ # Transform input prompts
179
+ point_coords = (
180
+ point_coords_batch[img_idx] if point_coords_batch is not None else None
181
+ )
182
+ point_labels = (
183
+ point_labels_batch[img_idx] if point_labels_batch is not None else None
184
+ )
185
+ box = box_batch[img_idx] if box_batch is not None else None
186
+ mask_input = (
187
+ mask_input_batch[img_idx] if mask_input_batch is not None else None
188
+ )
189
+ mask_input, unnorm_coords, labels, unnorm_box = self._prep_prompts(
190
+ point_coords,
191
+ point_labels,
192
+ box,
193
+ mask_input,
194
+ normalize_coords,
195
+ img_idx=img_idx,
196
+ )
197
+ masks, iou_predictions, low_res_masks = self._predict(
198
+ unnorm_coords,
199
+ labels,
200
+ unnorm_box,
201
+ mask_input,
202
+ multimask_output,
203
+ return_logits=return_logits,
204
+ img_idx=img_idx,
205
+ )
206
+ masks_np = masks.squeeze(0).float().detach().cpu().numpy()
207
+ iou_predictions_np = (
208
+ iou_predictions.squeeze(0).float().detach().cpu().numpy()
209
+ )
210
+ low_res_masks_np = low_res_masks.squeeze(0).float().detach().cpu().numpy()
211
+ all_masks.append(masks_np)
212
+ all_ious.append(iou_predictions_np)
213
+ all_low_res_masks.append(low_res_masks_np)
214
+
215
+ return all_masks, all_ious, all_low_res_masks
216
+
217
+ def predict(
218
+ self,
219
+ point_coords: Optional[np.ndarray] = None,
220
+ point_labels: Optional[np.ndarray] = None,
221
+ box: Optional[np.ndarray] = None,
222
+ mask_input: Optional[np.ndarray] = None,
223
+ multimask_output: bool = True,
224
+ return_logits: bool = False,
225
+ normalize_coords=True,
226
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
227
+ """
228
+ Predict masks for the given input prompts, using the currently set image.
229
+
230
+ Arguments:
231
+ point_coords (np.ndarray or None): A Nx2 array of point prompts to the
232
+ model. Each point is in (X,Y) in pixels.
233
+ point_labels (np.ndarray or None): A length N array of labels for the
234
+ point prompts. 1 indicates a foreground point and 0 indicates a
235
+ background point.
236
+ box (np.ndarray or None): A length 4 array given a box prompt to the
237
+ model, in XYXY format.
238
+ mask_input (np.ndarray): A low resolution mask input to the model, typically
239
+ coming from a previous prediction iteration. Has form 1xHxW, where
240
+ for SAM, H=W=256.
241
+ multimask_output (bool): If true, the model will return three masks.
242
+ For ambiguous input prompts (such as a single click), this will often
243
+ produce better masks than a single prediction. If only a single
244
+ mask is needed, the model's predicted quality score can be used
245
+ to select the best mask. For non-ambiguous prompts, such as multiple
246
+ input prompts, multimask_output=False can give better results.
247
+ return_logits (bool): If true, returns un-thresholded masks logits
248
+ instead of a binary mask.
249
+ normalize_coords (bool): If true, the point coordinates will be normalized to the range [0,1] and point_coords is expected to be wrt. image dimensions.
250
+
251
+ Returns:
252
+ (np.ndarray): The output masks in CxHxW format, where C is the
253
+ number of masks, and (H, W) is the original image size.
254
+ (np.ndarray): An array of length C containing the model's
255
+ predictions for the quality of each mask.
256
+ (np.ndarray): An array of shape CxHxW, where C is the number
257
+ of masks and H=W=256. These low resolution logits can be passed to
258
+ a subsequent iteration as mask input.
259
+ """
260
+ if not self._is_image_set:
261
+ raise RuntimeError(
262
+ "An image must be set with .set_image(...) before mask prediction."
263
+ )
264
+
265
+ # Transform input prompts
266
+
267
+ mask_input, unnorm_coords, labels, unnorm_box = self._prep_prompts(
268
+ point_coords, point_labels, box, mask_input, normalize_coords
269
+ )
270
+
271
+ masks, iou_predictions, low_res_masks = self._predict(
272
+ unnorm_coords,
273
+ labels,
274
+ unnorm_box,
275
+ mask_input,
276
+ multimask_output,
277
+ return_logits=return_logits,
278
+ )
279
+
280
+ masks_np = masks.squeeze(0).float().detach().cpu().numpy()
281
+ iou_predictions_np = iou_predictions.squeeze(0).float().detach().cpu().numpy()
282
+ low_res_masks_np = low_res_masks.squeeze(0).float().detach().cpu().numpy()
283
+ return masks_np, iou_predictions_np, low_res_masks_np
284
+
285
+ def _prep_prompts(
286
+ self, point_coords, point_labels, box, mask_logits, normalize_coords, img_idx=-1
287
+ ):
288
+
289
+ unnorm_coords, labels, unnorm_box, mask_input = None, None, None, None
290
+ if point_coords is not None:
291
+ assert (
292
+ point_labels is not None
293
+ ), "point_labels must be supplied if point_coords is supplied."
294
+ point_coords = torch.as_tensor(
295
+ point_coords, dtype=torch.float, device=self.device
296
+ )
297
+ unnorm_coords = self._transforms.transform_coords(
298
+ point_coords, normalize=normalize_coords, orig_hw=self._orig_hw[img_idx]
299
+ )
300
+ labels = torch.as_tensor(point_labels, dtype=torch.int, device=self.device)
301
+ if len(unnorm_coords.shape) == 2:
302
+ unnorm_coords, labels = unnorm_coords[None, ...], labels[None, ...]
303
+ if box is not None:
304
+ box = torch.as_tensor(box, dtype=torch.float, device=self.device)
305
+ unnorm_box = self._transforms.transform_boxes(
306
+ box, normalize=normalize_coords, orig_hw=self._orig_hw[img_idx]
307
+ ) # Bx2x2
308
+ if mask_logits is not None:
309
+ mask_input = torch.as_tensor(
310
+ mask_logits, dtype=torch.float, device=self.device
311
+ )
312
+ if len(mask_input.shape) == 3:
313
+ mask_input = mask_input[None, :, :, :]
314
+ return mask_input, unnorm_coords, labels, unnorm_box
315
+
316
+ @torch.no_grad()
317
+ def _predict(
318
+ self,
319
+ point_coords: Optional[torch.Tensor],
320
+ point_labels: Optional[torch.Tensor],
321
+ boxes: Optional[torch.Tensor] = None,
322
+ mask_input: Optional[torch.Tensor] = None,
323
+ multimask_output: bool = True,
324
+ return_logits: bool = False,
325
+ img_idx: int = -1,
326
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
327
+ """
328
+ Predict masks for the given input prompts, using the currently set image.
329
+ Input prompts are batched torch tensors and are expected to already be
330
+ transformed to the input frame using SAM2Transforms.
331
+
332
+ Arguments:
333
+ point_coords (torch.Tensor or None): A BxNx2 array of point prompts to the
334
+ model. Each point is in (X,Y) in pixels.
335
+ point_labels (torch.Tensor or None): A BxN array of labels for the
336
+ point prompts. 1 indicates a foreground point and 0 indicates a
337
+ background point.
338
+ boxes (np.ndarray or None): A Bx4 array given a box prompt to the
339
+ model, in XYXY format.
340
+ mask_input (np.ndarray): A low resolution mask input to the model, typically
341
+ coming from a previous prediction iteration. Has form Bx1xHxW, where
342
+ for SAM, H=W=256. Masks returned by a previous iteration of the
343
+ predict method do not need further transformation.
344
+ multimask_output (bool): If true, the model will return three masks.
345
+ For ambiguous input prompts (such as a single click), this will often
346
+ produce better masks than a single prediction. If only a single
347
+ mask is needed, the model's predicted quality score can be used
348
+ to select the best mask. For non-ambiguous prompts, such as multiple
349
+ input prompts, multimask_output=False can give better results.
350
+ return_logits (bool): If true, returns un-thresholded masks logits
351
+ instead of a binary mask.
352
+
353
+ Returns:
354
+ (torch.Tensor): The output masks in BxCxHxW format, where C is the
355
+ number of masks, and (H, W) is the original image size.
356
+ (torch.Tensor): An array of shape BxC containing the model's
357
+ predictions for the quality of each mask.
358
+ (torch.Tensor): An array of shape BxCxHxW, where C is the number
359
+ of masks and H=W=256. These low res logits can be passed to
360
+ a subsequent iteration as mask input.
361
+ """
362
+ if not self._is_image_set:
363
+ raise RuntimeError(
364
+ "An image must be set with .set_image(...) before mask prediction."
365
+ )
366
+
367
+ if point_coords is not None:
368
+ concat_points = (point_coords, point_labels)
369
+ else:
370
+ concat_points = None
371
+
372
+ # Embed prompts
373
+ if boxes is not None:
374
+ box_coords = boxes.reshape(-1, 2, 2)
375
+ box_labels = torch.tensor([[2, 3]], dtype=torch.int, device=boxes.device)
376
+ box_labels = box_labels.repeat(boxes.size(0), 1)
377
+ # we merge "boxes" and "points" into a single "concat_points" input (where
378
+ # boxes are added at the beginning) to sam_prompt_encoder
379
+ if concat_points is not None:
380
+ concat_coords = torch.cat([box_coords, concat_points[0]], dim=1)
381
+ concat_labels = torch.cat([box_labels, concat_points[1]], dim=1)
382
+ concat_points = (concat_coords, concat_labels)
383
+ else:
384
+ concat_points = (box_coords, box_labels)
385
+
386
+ sparse_embeddings, dense_embeddings = self.model.sam_prompt_encoder(
387
+ points=concat_points,
388
+ boxes=None,
389
+ masks=mask_input,
390
+ )
391
+
392
+ # Predict masks
393
+ batched_mode = (
394
+ concat_points is not None and concat_points[0].shape[0] > 1
395
+ ) # multi object prediction
396
+ high_res_features = [
397
+ feat_level[img_idx].unsqueeze(0)
398
+ for feat_level in self._features["high_res_feats"]
399
+ ]
400
+ low_res_masks, iou_predictions, _, _ = self.model.sam_mask_decoder(
401
+ image_embeddings=self._features["image_embed"][img_idx].unsqueeze(0),
402
+ image_pe=self.model.sam_prompt_encoder.get_dense_pe(),
403
+ sparse_prompt_embeddings=sparse_embeddings,
404
+ dense_prompt_embeddings=dense_embeddings,
405
+ multimask_output=multimask_output,
406
+ repeat_image=batched_mode,
407
+ high_res_features=high_res_features,
408
+ )
409
+
410
+ # Upscale the masks to the original image resolution
411
+ masks = self._transforms.postprocess_masks(
412
+ low_res_masks, self._orig_hw[img_idx]
413
+ )
414
+ low_res_masks = torch.clamp(low_res_masks, -32.0, 32.0)
415
+ if not return_logits:
416
+ masks = masks > self.mask_threshold
417
+
418
+ return masks, iou_predictions, low_res_masks
419
+
420
+ def get_image_embedding(self) -> torch.Tensor:
421
+ """
422
+ Returns the image embeddings for the currently set image, with
423
+ shape 1xCxHxW, where C is the embedding dimension and (H,W) are
424
+ the embedding spatial dimension of SAM (typically C=256, H=W=64).
425
+ """
426
+ if not self._is_image_set:
427
+ raise RuntimeError(
428
+ "An image must be set with .set_image(...) to generate an embedding."
429
+ )
430
+ assert (
431
+ self._features is not None
432
+ ), "Features must exist if an image has been set."
433
+ return self._features["image_embed"]
434
+
435
+ @property
436
+ def device(self) -> torch.device:
437
+ return self.model.device
438
+
439
+ def reset_predictor(self) -> None:
440
+ """
441
+ Resets the image embeddings and other state variables.
442
+ """
443
+ self._is_image_set = False
444
+ self._features = None
445
+ self._orig_hw = None
446
+ self._is_batch = False
custom_nodes/comfyui-segment-anything-2/sam2/sam2_video_predictor.py ADDED
@@ -0,0 +1,1154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import warnings
8
+ from collections import OrderedDict
9
+
10
+ import torch
11
+
12
+ from tqdm import tqdm
13
+
14
+ from ..sam2.modeling.sam2_base import NO_OBJ_SCORE, SAM2Base
15
+ from ..sam2.utils.misc import concat_points, fill_holes_in_mask_scores, load_video_frames
16
+
17
+
18
+ class SAM2VideoPredictor(SAM2Base):
19
+ """The predictor class to handle user interactions and manage inference states."""
20
+
21
+ def __init__(
22
+ self,
23
+ fill_hole_area=0,
24
+ # whether to apply non-overlapping constraints on the output object masks
25
+ non_overlap_masks=False,
26
+ # whether to clear non-conditioning memory of the surrounding frames (which may contain outdated information) after adding correction clicks;
27
+ # note that this would only apply to *single-object tracking* unless `clear_non_cond_mem_for_multi_obj` is also set to True)
28
+ clear_non_cond_mem_around_input=False,
29
+ # whether to also clear non-conditioning memory of the surrounding frames (only effective when `clear_non_cond_mem_around_input` is True).
30
+ clear_non_cond_mem_for_multi_obj=False,
31
+ # if `add_all_frames_to_correct_as_cond` is True, we also append to the conditioning frame list any frame that receives a later correction click
32
+ # if `add_all_frames_to_correct_as_cond` is False, we conditioning frame list to only use those initial conditioning frames
33
+ add_all_frames_to_correct_as_cond=False,
34
+ **kwargs,
35
+ ):
36
+ super().__init__(**kwargs)
37
+ self.fill_hole_area = fill_hole_area
38
+ self.non_overlap_masks = non_overlap_masks
39
+ self.clear_non_cond_mem_around_input = clear_non_cond_mem_around_input
40
+ self.clear_non_cond_mem_for_multi_obj = clear_non_cond_mem_for_multi_obj
41
+ self.add_all_frames_to_correct_as_cond = add_all_frames_to_correct_as_cond
42
+
43
+ @torch.inference_mode()
44
+ def init_state(
45
+ self,
46
+ images,
47
+ video_height,
48
+ video_width,
49
+ device='cuda',
50
+ offload_video_to_cpu=False,
51
+ offload_state_to_cpu=False,
52
+ async_loading_frames=False,
53
+ ):
54
+ """Initialize a inference state."""
55
+ # images, video_height, video_width = load_video_frames(
56
+ # video_path=video_path,
57
+ # image_size=self.image_size,
58
+ # offload_video_to_cpu=offload_video_to_cpu,
59
+ # async_loading_frames=async_loading_frames,
60
+ # )
61
+ inference_state = {}
62
+ inference_state["images"] = images
63
+ inference_state["num_frames"] = len(images)
64
+ # whether to offload the video frames to CPU memory
65
+ # turning on this option saves the GPU memory with only a very small overhead
66
+ inference_state["offload_video_to_cpu"] = offload_video_to_cpu
67
+ # whether to offload the inference state to CPU memory
68
+ # turning on this option saves the GPU memory at the cost of a lower tracking fps
69
+ # (e.g. in a test case of 768x768 model, fps dropped from 27 to 24 when tracking one object
70
+ # and from 24 to 21 when tracking two objects)
71
+ inference_state["offload_state_to_cpu"] = offload_state_to_cpu
72
+ # the original video height and width, used for resizing final output scores
73
+ inference_state["video_height"] = video_height
74
+ inference_state["video_width"] = video_width
75
+ inference_state["device"] = torch.device(device)
76
+ if offload_state_to_cpu:
77
+ inference_state["storage_device"] = torch.device("cpu")
78
+ else:
79
+ inference_state["storage_device"] = torch.device(device)
80
+ # inputs on each frame
81
+ inference_state["point_inputs_per_obj"] = {}
82
+ inference_state["mask_inputs_per_obj"] = {}
83
+ # visual features on a small number of recently visited frames for quick interactions
84
+ inference_state["cached_features"] = {}
85
+ # values that don't change across frames (so we only need to hold one copy of them)
86
+ inference_state["constants"] = {}
87
+ # mapping between client-side object id and model-side object index
88
+ inference_state["obj_id_to_idx"] = OrderedDict()
89
+ inference_state["obj_idx_to_id"] = OrderedDict()
90
+ inference_state["obj_ids"] = []
91
+ # A storage to hold the model's tracking results and states on each frame
92
+ inference_state["output_dict"] = {
93
+ "cond_frame_outputs": {}, # dict containing {frame_idx: <out>}
94
+ "non_cond_frame_outputs": {}, # dict containing {frame_idx: <out>}
95
+ }
96
+ # Slice (view) of each object tracking results, sharing the same memory with "output_dict"
97
+ inference_state["output_dict_per_obj"] = {}
98
+ # A temporary storage to hold new outputs when user interact with a frame
99
+ # to add clicks or mask (it's merged into "output_dict" before propagation starts)
100
+ inference_state["temp_output_dict_per_obj"] = {}
101
+ # Frames that already holds consolidated outputs from click or mask inputs
102
+ # (we directly use their consolidated outputs during tracking)
103
+ inference_state["consolidated_frame_inds"] = {
104
+ "cond_frame_outputs": set(), # set containing frame indices
105
+ "non_cond_frame_outputs": set(), # set containing frame indices
106
+ }
107
+ # metadata for each tracking frame (e.g. which direction it's tracked)
108
+ inference_state["tracking_has_started"] = False
109
+ inference_state["frames_already_tracked"] = {}
110
+ # Warm up the visual backbone and cache the image feature on frame 0
111
+ self._get_image_feature(inference_state, frame_idx=0, batch_size=1)
112
+ return inference_state
113
+
114
+ def _obj_id_to_idx(self, inference_state, obj_id):
115
+ """Map client-side object id to model-side object index."""
116
+ obj_idx = inference_state["obj_id_to_idx"].get(obj_id, None)
117
+ if obj_idx is not None:
118
+ return obj_idx
119
+
120
+ # This is a new object id not sent to the server before. We only allow adding
121
+ # new objects *before* the tracking starts.
122
+ allow_new_object = not inference_state["tracking_has_started"]
123
+ if allow_new_object:
124
+ # get the next object slot
125
+ obj_idx = len(inference_state["obj_id_to_idx"])
126
+ inference_state["obj_id_to_idx"][obj_id] = obj_idx
127
+ inference_state["obj_idx_to_id"][obj_idx] = obj_id
128
+ inference_state["obj_ids"] = list(inference_state["obj_id_to_idx"])
129
+ # set up input and output structures for this object
130
+ inference_state["point_inputs_per_obj"][obj_idx] = {}
131
+ inference_state["mask_inputs_per_obj"][obj_idx] = {}
132
+ inference_state["output_dict_per_obj"][obj_idx] = {
133
+ "cond_frame_outputs": {}, # dict containing {frame_idx: <out>}
134
+ "non_cond_frame_outputs": {}, # dict containing {frame_idx: <out>}
135
+ }
136
+ inference_state["temp_output_dict_per_obj"][obj_idx] = {
137
+ "cond_frame_outputs": {}, # dict containing {frame_idx: <out>}
138
+ "non_cond_frame_outputs": {}, # dict containing {frame_idx: <out>}
139
+ }
140
+ return obj_idx
141
+ else:
142
+ raise RuntimeError(
143
+ f"Cannot add new object id {obj_id} after tracking starts. "
144
+ f"All existing object ids: {inference_state['obj_ids']}. "
145
+ f"Please call 'reset_state' to restart from scratch."
146
+ )
147
+
148
+ def _obj_idx_to_id(self, inference_state, obj_idx):
149
+ """Map model-side object index to client-side object id."""
150
+ return inference_state["obj_idx_to_id"][obj_idx]
151
+
152
+ def _get_obj_num(self, inference_state):
153
+ """Get the total number of unique object ids received so far in this session."""
154
+ return len(inference_state["obj_idx_to_id"])
155
+
156
+ @torch.inference_mode()
157
+ def add_new_points_or_box(
158
+ self,
159
+ inference_state,
160
+ frame_idx,
161
+ obj_id,
162
+ points=None,
163
+ labels=None,
164
+ clear_old_points=True,
165
+ normalize_coords=True,
166
+ box=None,
167
+ ):
168
+ """Add new points to a frame."""
169
+ obj_idx = self._obj_id_to_idx(inference_state, obj_id)
170
+ point_inputs_per_frame = inference_state["point_inputs_per_obj"][obj_idx]
171
+ mask_inputs_per_frame = inference_state["mask_inputs_per_obj"][obj_idx]
172
+
173
+ if (points is not None) != (labels is not None):
174
+ raise ValueError("points and labels must be provided together")
175
+ if points is None and box is None:
176
+ raise ValueError("at least one of points or box must be provided as input")
177
+
178
+ if points is None:
179
+ points = torch.zeros(0, 2, dtype=torch.float32)
180
+ elif not isinstance(points, torch.Tensor):
181
+ points = torch.tensor(points, dtype=torch.float32)
182
+ if labels is None:
183
+ labels = torch.zeros(0, dtype=torch.int32)
184
+ elif not isinstance(labels, torch.Tensor):
185
+ labels = torch.tensor(labels, dtype=torch.int32)
186
+ if points.dim() == 2:
187
+ points = points.unsqueeze(0) # add batch dimension
188
+ if labels.dim() == 1:
189
+ labels = labels.unsqueeze(0) # add batch dimension
190
+
191
+ # If `box` is provided, we add it as the first two points with labels 2 and 3
192
+ # along with the user-provided points (consistent with how SAM 2 is trained).
193
+ if box is not None:
194
+ if not clear_old_points:
195
+ raise ValueError(
196
+ "cannot add box without clearing old points, since "
197
+ "box prompt must be provided before any point prompt "
198
+ "(please use clear_old_points=True instead)"
199
+ )
200
+ if inference_state["tracking_has_started"]:
201
+ warnings.warn(
202
+ "You are adding a box after tracking starts. SAM 2 may not always be "
203
+ "able to incorporate a box prompt for *refinement*. If you intend to "
204
+ "use box prompt as an *initial* input before tracking, please call "
205
+ "'reset_state' on the inference state to restart from scratch.",
206
+ category=UserWarning,
207
+ stacklevel=2,
208
+ )
209
+ if not isinstance(box, torch.Tensor):
210
+ box = torch.tensor(box, dtype=torch.float32, device=points.device)
211
+ box_coords = box.reshape(1, 2, 2)
212
+ box_labels = torch.tensor([2, 3], dtype=torch.int32, device=labels.device)
213
+ box_labels = box_labels.reshape(1, 2)
214
+ points = torch.cat([box_coords, points], dim=1)
215
+ labels = torch.cat([box_labels, labels], dim=1)
216
+
217
+ if normalize_coords:
218
+ video_H = inference_state["video_height"]
219
+ video_W = inference_state["video_width"]
220
+ points = points / torch.tensor([video_W, video_H]).to(points.device)
221
+ # scale the (normalized) coordinates by the model's internal image size
222
+ points = points * self.image_size
223
+ points = points.to(inference_state["device"])
224
+ labels = labels.to(inference_state["device"])
225
+
226
+ if not clear_old_points:
227
+ point_inputs = point_inputs_per_frame.get(frame_idx, None)
228
+ else:
229
+ point_inputs = None
230
+ point_inputs = concat_points(point_inputs, points, labels)
231
+
232
+ point_inputs_per_frame[frame_idx] = point_inputs
233
+ mask_inputs_per_frame.pop(frame_idx, None)
234
+ # If this frame hasn't been tracked before, we treat it as an initial conditioning
235
+ # frame, meaning that the inputs points are to generate segments on this frame without
236
+ # using any memory from other frames, like in SAM. Otherwise (if it has been tracked),
237
+ # the input points will be used to correct the already tracked masks.
238
+ is_init_cond_frame = frame_idx not in inference_state["frames_already_tracked"]
239
+ # whether to track in reverse time order
240
+ if is_init_cond_frame:
241
+ reverse = False
242
+ else:
243
+ reverse = inference_state["frames_already_tracked"][frame_idx]["reverse"]
244
+ obj_output_dict = inference_state["output_dict_per_obj"][obj_idx]
245
+ obj_temp_output_dict = inference_state["temp_output_dict_per_obj"][obj_idx]
246
+ # Add a frame to conditioning output if it's an initial conditioning frame or
247
+ # if the model sees all frames receiving clicks/mask as conditioning frames.
248
+ is_cond = is_init_cond_frame or self.add_all_frames_to_correct_as_cond
249
+ storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
250
+
251
+ # Get any previously predicted mask logits on this object and feed it along with
252
+ # the new clicks into the SAM mask decoder.
253
+ prev_sam_mask_logits = None
254
+ # lookup temporary output dict first, which contains the most recent output
255
+ # (if not found, then lookup conditioning and non-conditioning frame output)
256
+ prev_out = obj_temp_output_dict[storage_key].get(frame_idx)
257
+ if prev_out is None:
258
+ prev_out = obj_output_dict["cond_frame_outputs"].get(frame_idx)
259
+ if prev_out is None:
260
+ prev_out = obj_output_dict["non_cond_frame_outputs"].get(frame_idx)
261
+
262
+ if prev_out is not None and prev_out["pred_masks"] is not None:
263
+ prev_sam_mask_logits = prev_out["pred_masks"].to(inference_state["device"],non_blocking=True)
264
+ # Clamp the scale of prev_sam_mask_logits to avoid rare numerical issues.
265
+ prev_sam_mask_logits = torch.clamp(prev_sam_mask_logits, -32.0, 32.0)
266
+ current_out, _ = self._run_single_frame_inference(
267
+ inference_state=inference_state,
268
+ output_dict=obj_output_dict, # run on the slice of a single object
269
+ frame_idx=frame_idx,
270
+ batch_size=1, # run on the slice of a single object
271
+ is_init_cond_frame=is_init_cond_frame,
272
+ point_inputs=point_inputs,
273
+ mask_inputs=None,
274
+ reverse=reverse,
275
+ # Skip the memory encoder when adding clicks or mask. We execute the memory encoder
276
+ # at the beginning of `propagate_in_video` (after user finalize their clicks). This
277
+ # allows us to enforce non-overlapping constraints on all objects before encoding
278
+ # them into memory.
279
+ run_mem_encoder=False,
280
+ prev_sam_mask_logits=prev_sam_mask_logits,
281
+ )
282
+ # Add the output to the output dict (to be used as future memory)
283
+ obj_temp_output_dict[storage_key][frame_idx] = current_out
284
+
285
+ # Resize the output mask to the original video resolution
286
+ obj_ids = inference_state["obj_ids"]
287
+ consolidated_out = self._consolidate_temp_output_across_obj(
288
+ inference_state,
289
+ frame_idx,
290
+ is_cond=is_cond,
291
+ run_mem_encoder=False,
292
+ consolidate_at_video_res=True,
293
+ )
294
+ _, video_res_masks = self._get_orig_video_res_output(
295
+ inference_state, consolidated_out["pred_masks_video_res"]
296
+ )
297
+ return frame_idx, obj_ids, video_res_masks
298
+
299
+ def add_new_points(self, *args, **kwargs):
300
+ """Deprecated method. Please use `add_new_points_or_box` instead."""
301
+ return self.add_new_points_or_box(*args, **kwargs)
302
+
303
+ @torch.inference_mode()
304
+ def add_new_mask(
305
+ self,
306
+ inference_state,
307
+ frame_idx,
308
+ obj_id,
309
+ mask,
310
+ ):
311
+ """Add new mask to a frame."""
312
+ obj_idx = self._obj_id_to_idx(inference_state, obj_id)
313
+ point_inputs_per_frame = inference_state["point_inputs_per_obj"][obj_idx]
314
+ mask_inputs_per_frame = inference_state["mask_inputs_per_obj"][obj_idx]
315
+
316
+ if not isinstance(mask, torch.Tensor):
317
+ mask = torch.tensor(mask, dtype=torch.bool)
318
+ assert mask.dim() == 2
319
+ mask_H, mask_W = mask.shape
320
+ mask_inputs_orig = mask[None, None] # add batch and channel dimension
321
+ mask_inputs_orig = mask_inputs_orig.float().to(inference_state["device"])
322
+
323
+ # resize the mask if it doesn't match the model's image size
324
+ if mask_H != self.image_size or mask_W != self.image_size:
325
+ mask_inputs = torch.nn.functional.interpolate(
326
+ mask_inputs_orig,
327
+ size=(self.image_size, self.image_size),
328
+ align_corners=False,
329
+ mode="bilinear",
330
+ antialias=True, # use antialias for downsampling
331
+ )
332
+ mask_inputs = (mask_inputs >= 0.5).float()
333
+ else:
334
+ mask_inputs = mask_inputs_orig
335
+
336
+ mask_inputs_per_frame[frame_idx] = mask_inputs
337
+ point_inputs_per_frame.pop(frame_idx, None)
338
+ # If this frame hasn't been tracked before, we treat it as an initial conditioning
339
+ # frame, meaning that the inputs points are to generate segments on this frame without
340
+ # using any memory from other frames, like in SAM. Otherwise (if it has been tracked),
341
+ # the input points will be used to correct the already tracked masks.
342
+ is_init_cond_frame = frame_idx not in inference_state["frames_already_tracked"]
343
+ # whether to track in reverse time order
344
+ if is_init_cond_frame:
345
+ reverse = False
346
+ else:
347
+ reverse = inference_state["frames_already_tracked"][frame_idx]["reverse"]
348
+ obj_output_dict = inference_state["output_dict_per_obj"][obj_idx]
349
+ obj_temp_output_dict = inference_state["temp_output_dict_per_obj"][obj_idx]
350
+ # Add a frame to conditioning output if it's an initial conditioning frame or
351
+ # if the model sees all frames receiving clicks/mask as conditioning frames.
352
+ is_cond = is_init_cond_frame or self.add_all_frames_to_correct_as_cond
353
+ storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
354
+
355
+ current_out, _ = self._run_single_frame_inference(
356
+ inference_state=inference_state,
357
+ output_dict=obj_output_dict, # run on the slice of a single object
358
+ frame_idx=frame_idx,
359
+ batch_size=1, # run on the slice of a single object
360
+ is_init_cond_frame=is_init_cond_frame,
361
+ point_inputs=None,
362
+ mask_inputs=mask_inputs,
363
+ reverse=reverse,
364
+ # Skip the memory encoder when adding clicks or mask. We execute the memory encoder
365
+ # at the beginning of `propagate_in_video` (after user finalize their clicks). This
366
+ # allows us to enforce non-overlapping constraints on all objects before encoding
367
+ # them into memory.
368
+ run_mem_encoder=False,
369
+ )
370
+ # Add the output to the output dict (to be used as future memory)
371
+ obj_temp_output_dict[storage_key][frame_idx] = current_out
372
+
373
+ # Resize the output mask to the original video resolution
374
+ obj_ids = inference_state["obj_ids"]
375
+ consolidated_out = self._consolidate_temp_output_across_obj(
376
+ inference_state,
377
+ frame_idx,
378
+ is_cond=is_cond,
379
+ run_mem_encoder=False,
380
+ consolidate_at_video_res=True,
381
+ )
382
+ _, video_res_masks = self._get_orig_video_res_output(
383
+ inference_state, consolidated_out["pred_masks_video_res"]
384
+ )
385
+ return frame_idx, obj_ids, video_res_masks
386
+
387
+ def _get_orig_video_res_output(self, inference_state, any_res_masks):
388
+ """
389
+ Resize the object scores to the original video resolution (video_res_masks)
390
+ and apply non-overlapping constraints for final output.
391
+ """
392
+ device = inference_state["device"]
393
+ video_H = inference_state["video_height"]
394
+ video_W = inference_state["video_width"]
395
+ any_res_masks = any_res_masks.to(device, non_blocking=True)
396
+ if any_res_masks.shape[-2:] == (video_H, video_W):
397
+ video_res_masks = any_res_masks
398
+ else:
399
+ video_res_masks = torch.nn.functional.interpolate(
400
+ any_res_masks,
401
+ size=(video_H, video_W),
402
+ mode="bilinear",
403
+ align_corners=False,
404
+ )
405
+ if self.non_overlap_masks:
406
+ video_res_masks = self._apply_non_overlapping_constraints(video_res_masks)
407
+ return any_res_masks, video_res_masks
408
+
409
+ def _consolidate_temp_output_across_obj(
410
+ self,
411
+ inference_state,
412
+ frame_idx,
413
+ is_cond,
414
+ run_mem_encoder,
415
+ consolidate_at_video_res=False,
416
+ ):
417
+ """
418
+ Consolidate the per-object temporary outputs in `temp_output_dict_per_obj` on
419
+ a frame into a single output for all objects, including
420
+ 1) fill any missing objects either from `output_dict_per_obj` (if they exist in
421
+ `output_dict_per_obj` for this frame) or leave them as placeholder values
422
+ (if they don't exist in `output_dict_per_obj` for this frame);
423
+ 2) if specified, rerun memory encoder after apply non-overlapping constraints
424
+ on the object scores.
425
+ """
426
+ batch_size = self._get_obj_num(inference_state)
427
+ storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
428
+ # Optionally, we allow consolidating the temporary outputs at the original
429
+ # video resolution (to provide a better editing experience for mask prompts).
430
+ if consolidate_at_video_res:
431
+ assert not run_mem_encoder, "memory encoder cannot run at video resolution"
432
+ consolidated_H = inference_state["video_height"]
433
+ consolidated_W = inference_state["video_width"]
434
+ consolidated_mask_key = "pred_masks_video_res"
435
+ else:
436
+ consolidated_H = consolidated_W = self.image_size // 4
437
+ consolidated_mask_key = "pred_masks"
438
+
439
+ # Initialize `consolidated_out`. Its "maskmem_features" and "maskmem_pos_enc"
440
+ # will be added when rerunning the memory encoder after applying non-overlapping
441
+ # constraints to object scores. Its "pred_masks" are prefilled with a large
442
+ # negative value (NO_OBJ_SCORE) to represent missing objects.
443
+ consolidated_out = {
444
+ "maskmem_features": None,
445
+ "maskmem_pos_enc": None,
446
+ consolidated_mask_key: torch.full(
447
+ size=(batch_size, 1, consolidated_H, consolidated_W),
448
+ fill_value=NO_OBJ_SCORE,
449
+ dtype=torch.float32,
450
+ device=inference_state["storage_device"],
451
+ ),
452
+ "obj_ptr": torch.full(
453
+ size=(batch_size, self.hidden_dim),
454
+ fill_value=NO_OBJ_SCORE,
455
+ dtype=torch.float32,
456
+ device=inference_state["device"],
457
+ ),
458
+ "object_score_logits": torch.full(
459
+ size=(batch_size, 1),
460
+ # default to 10.0 for object_score_logits, i.e. assuming the object is
461
+ # present as sigmoid(10)=1, same as in `predict_masks` of `MaskDecoder`
462
+ fill_value=10.0,
463
+ dtype=torch.float32,
464
+ device=inference_state["device"],
465
+ ),
466
+ }
467
+ empty_mask_ptr = None
468
+ for obj_idx in range(batch_size):
469
+ obj_temp_output_dict = inference_state["temp_output_dict_per_obj"][obj_idx]
470
+ obj_output_dict = inference_state["output_dict_per_obj"][obj_idx]
471
+ out = obj_temp_output_dict[storage_key].get(frame_idx, None)
472
+ # If the object doesn't appear in "temp_output_dict_per_obj" on this frame,
473
+ # we fall back and look up its previous output in "output_dict_per_obj".
474
+ # We look up both "cond_frame_outputs" and "non_cond_frame_outputs" in
475
+ # "output_dict_per_obj" to find a previous output for this object.
476
+ if out is None:
477
+ out = obj_output_dict["cond_frame_outputs"].get(frame_idx, None)
478
+ if out is None:
479
+ out = obj_output_dict["non_cond_frame_outputs"].get(frame_idx, None)
480
+ # If the object doesn't appear in "output_dict_per_obj" either, we skip it
481
+ # and leave its mask scores to the default scores (i.e. the NO_OBJ_SCORE
482
+ # placeholder above) and set its object pointer to be a dummy pointer.
483
+ if out is None:
484
+ # Fill in dummy object pointers for those objects without any inputs or
485
+ # tracking outcomes on this frame (only do it under `run_mem_encoder=True`,
486
+ # i.e. when we need to build the memory for tracking).
487
+ if run_mem_encoder:
488
+ if empty_mask_ptr is None:
489
+ empty_mask_ptr = self._get_empty_mask_ptr(
490
+ inference_state, frame_idx
491
+ )
492
+ # fill object pointer with a dummy pointer (based on an empty mask)
493
+ consolidated_out["obj_ptr"][obj_idx : obj_idx + 1] = empty_mask_ptr
494
+ continue
495
+ # Add the temporary object output mask to consolidated output mask
496
+ obj_mask = out["pred_masks"]
497
+ consolidated_pred_masks = consolidated_out[consolidated_mask_key]
498
+ if obj_mask.shape[-2:] == consolidated_pred_masks.shape[-2:]:
499
+ consolidated_pred_masks[obj_idx : obj_idx + 1] = obj_mask
500
+ else:
501
+ # Resize first if temporary object mask has a different resolution
502
+ resized_obj_mask = torch.nn.functional.interpolate(
503
+ obj_mask,
504
+ size=consolidated_pred_masks.shape[-2:],
505
+ mode="bilinear",
506
+ align_corners=False,
507
+ )
508
+ consolidated_pred_masks[obj_idx : obj_idx + 1] = resized_obj_mask
509
+ consolidated_out["obj_ptr"][obj_idx : obj_idx + 1] = out["obj_ptr"]
510
+ consolidated_out["object_score_logits"][obj_idx : obj_idx + 1] = out[
511
+ "object_score_logits"
512
+ ]
513
+
514
+ # Optionally, apply non-overlapping constraints on the consolidated scores
515
+ # and rerun the memory encoder
516
+ if run_mem_encoder:
517
+ device = inference_state["device"]
518
+ high_res_masks = torch.nn.functional.interpolate(
519
+ consolidated_out["pred_masks"].to(device, non_blocking=True),
520
+ size=(self.image_size, self.image_size),
521
+ mode="bilinear",
522
+ align_corners=False,
523
+ )
524
+ if self.non_overlap_masks_for_mem_enc:
525
+ high_res_masks = self._apply_non_overlapping_constraints(high_res_masks)
526
+ maskmem_features, maskmem_pos_enc = self._run_memory_encoder(
527
+ inference_state=inference_state,
528
+ frame_idx=frame_idx,
529
+ batch_size=batch_size,
530
+ high_res_masks=high_res_masks,
531
+ object_score_logits=consolidated_out["object_score_logits"],
532
+ is_mask_from_pts=True, # these frames are what the user interacted with
533
+ )
534
+ consolidated_out["maskmem_features"] = maskmem_features
535
+ consolidated_out["maskmem_pos_enc"] = maskmem_pos_enc
536
+
537
+ return consolidated_out
538
+
539
+ def _get_empty_mask_ptr(self, inference_state, frame_idx):
540
+ """Get a dummy object pointer based on an empty mask on the current frame."""
541
+ # A dummy (empty) mask with a single object
542
+ batch_size = 1
543
+ mask_inputs = torch.zeros(
544
+ (batch_size, 1, self.image_size, self.image_size),
545
+ dtype=torch.float32,
546
+ device=inference_state["device"],
547
+ )
548
+
549
+ # Retrieve correct image features
550
+ (
551
+ _,
552
+ _,
553
+ current_vision_feats,
554
+ current_vision_pos_embeds,
555
+ feat_sizes,
556
+ ) = self._get_image_feature(inference_state, frame_idx, batch_size)
557
+
558
+ # Feed the empty mask and image feature above to get a dummy object pointer
559
+ current_out = self.track_step(
560
+ frame_idx=frame_idx,
561
+ is_init_cond_frame=True,
562
+ current_vision_feats=current_vision_feats,
563
+ current_vision_pos_embeds=current_vision_pos_embeds,
564
+ feat_sizes=feat_sizes,
565
+ point_inputs=None,
566
+ mask_inputs=mask_inputs,
567
+ output_dict={},
568
+ num_frames=inference_state["num_frames"],
569
+ track_in_reverse=False,
570
+ run_mem_encoder=False,
571
+ prev_sam_mask_logits=None,
572
+ )
573
+ return current_out["obj_ptr"]
574
+
575
+ @torch.inference_mode()
576
+ def propagate_in_video_preflight(self, inference_state):
577
+ """Prepare inference_state and consolidate temporary outputs before tracking."""
578
+ # Tracking has started and we don't allow adding new objects until session is reset.
579
+ inference_state["tracking_has_started"] = True
580
+ batch_size = self._get_obj_num(inference_state)
581
+
582
+ # Consolidate per-object temporary outputs in "temp_output_dict_per_obj" and
583
+ # add them into "output_dict".
584
+ temp_output_dict_per_obj = inference_state["temp_output_dict_per_obj"]
585
+ output_dict = inference_state["output_dict"]
586
+ # "consolidated_frame_inds" contains indices of those frames where consolidated
587
+ # temporary outputs have been added (either in this call or any previous calls
588
+ # to `propagate_in_video_preflight`).
589
+ consolidated_frame_inds = inference_state["consolidated_frame_inds"]
590
+ for is_cond in [False, True]:
591
+ # Separately consolidate conditioning and non-conditioning temp outputs
592
+ storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
593
+ # Find all the frames that contain temporary outputs for any objects
594
+ # (these should be the frames that have just received clicks for mask inputs
595
+ # via `add_new_points_or_box` or `add_new_mask`)
596
+ temp_frame_inds = set()
597
+ for obj_temp_output_dict in temp_output_dict_per_obj.values():
598
+ temp_frame_inds.update(obj_temp_output_dict[storage_key].keys())
599
+ consolidated_frame_inds[storage_key].update(temp_frame_inds)
600
+ # consolidate the temporary output across all objects on this frame
601
+ for frame_idx in temp_frame_inds:
602
+ consolidated_out = self._consolidate_temp_output_across_obj(
603
+ inference_state, frame_idx, is_cond=is_cond, run_mem_encoder=True
604
+ )
605
+ # merge them into "output_dict" and also create per-object slices
606
+ output_dict[storage_key][frame_idx] = consolidated_out
607
+ self._add_output_per_object(
608
+ inference_state, frame_idx, consolidated_out, storage_key
609
+ )
610
+ clear_non_cond_mem = self.clear_non_cond_mem_around_input and (
611
+ self.clear_non_cond_mem_for_multi_obj or batch_size <= 1
612
+ )
613
+ if clear_non_cond_mem:
614
+ # clear non-conditioning memory of the surrounding frames
615
+ self._clear_non_cond_mem_around_input(inference_state, frame_idx)
616
+
617
+ # clear temporary outputs in `temp_output_dict_per_obj`
618
+ for obj_temp_output_dict in temp_output_dict_per_obj.values():
619
+ obj_temp_output_dict[storage_key].clear()
620
+
621
+ # edge case: if an output is added to "cond_frame_outputs", we remove any prior
622
+ # output on the same frame in "non_cond_frame_outputs"
623
+ for frame_idx in output_dict["cond_frame_outputs"]:
624
+ output_dict["non_cond_frame_outputs"].pop(frame_idx, None)
625
+ for obj_output_dict in inference_state["output_dict_per_obj"].values():
626
+ for frame_idx in obj_output_dict["cond_frame_outputs"]:
627
+ obj_output_dict["non_cond_frame_outputs"].pop(frame_idx, None)
628
+ for frame_idx in consolidated_frame_inds["cond_frame_outputs"]:
629
+ assert frame_idx in output_dict["cond_frame_outputs"]
630
+ consolidated_frame_inds["non_cond_frame_outputs"].discard(frame_idx)
631
+
632
+ # Make sure that the frame indices in "consolidated_frame_inds" are exactly those frames
633
+ # with either points or mask inputs (which should be true under a correct workflow).
634
+ all_consolidated_frame_inds = (
635
+ consolidated_frame_inds["cond_frame_outputs"]
636
+ | consolidated_frame_inds["non_cond_frame_outputs"]
637
+ )
638
+ input_frames_inds = set()
639
+ for point_inputs_per_frame in inference_state["point_inputs_per_obj"].values():
640
+ input_frames_inds.update(point_inputs_per_frame.keys())
641
+ for mask_inputs_per_frame in inference_state["mask_inputs_per_obj"].values():
642
+ input_frames_inds.update(mask_inputs_per_frame.keys())
643
+ assert all_consolidated_frame_inds == input_frames_inds
644
+
645
+ @torch.inference_mode()
646
+ def propagate_in_video(
647
+ self,
648
+ inference_state,
649
+ start_frame_idx=None,
650
+ max_frame_num_to_track=None,
651
+ reverse=False,
652
+ ):
653
+ """Propagate the input points across frames to track in the entire video."""
654
+ self.propagate_in_video_preflight(inference_state)
655
+
656
+ output_dict = inference_state["output_dict"]
657
+ consolidated_frame_inds = inference_state["consolidated_frame_inds"]
658
+ obj_ids = inference_state["obj_ids"]
659
+ num_frames = inference_state["num_frames"]
660
+ batch_size = self._get_obj_num(inference_state)
661
+ if len(output_dict["cond_frame_outputs"]) == 0:
662
+ raise RuntimeError("No points are provided; please add points first")
663
+ clear_non_cond_mem = self.clear_non_cond_mem_around_input and (
664
+ self.clear_non_cond_mem_for_multi_obj or batch_size <= 1
665
+ )
666
+
667
+ # set start index, end index, and processing order
668
+ if start_frame_idx is None:
669
+ # default: start from the earliest frame with input points
670
+ start_frame_idx = min(output_dict["cond_frame_outputs"])
671
+ if max_frame_num_to_track is None:
672
+ # default: track all the frames in the video
673
+ max_frame_num_to_track = num_frames
674
+ if reverse:
675
+ end_frame_idx = max(start_frame_idx - max_frame_num_to_track, 0)
676
+ if start_frame_idx > 0:
677
+ processing_order = range(start_frame_idx, end_frame_idx - 1, -1)
678
+ else:
679
+ processing_order = [] # skip reverse tracking if starting from frame 0
680
+ else:
681
+ end_frame_idx = min(
682
+ start_frame_idx + max_frame_num_to_track, num_frames - 1
683
+ )
684
+ processing_order = range(start_frame_idx, end_frame_idx + 1)
685
+
686
+ for frame_idx in tqdm(processing_order, desc="propagate in video"):
687
+ # We skip those frames already in consolidated outputs (these are frames
688
+ # that received input clicks or mask). Note that we cannot directly run
689
+ # batched forward on them via `_run_single_frame_inference` because the
690
+ # number of clicks on each object might be different.
691
+ if frame_idx in consolidated_frame_inds["cond_frame_outputs"]:
692
+ storage_key = "cond_frame_outputs"
693
+ current_out = output_dict[storage_key][frame_idx]
694
+ pred_masks = current_out["pred_masks"]
695
+ if clear_non_cond_mem:
696
+ # clear non-conditioning memory of the surrounding frames
697
+ self._clear_non_cond_mem_around_input(inference_state, frame_idx)
698
+ elif frame_idx in consolidated_frame_inds["non_cond_frame_outputs"]:
699
+ storage_key = "non_cond_frame_outputs"
700
+ current_out = output_dict[storage_key][frame_idx]
701
+ pred_masks = current_out["pred_masks"]
702
+ else:
703
+ storage_key = "non_cond_frame_outputs"
704
+ current_out, pred_masks = self._run_single_frame_inference(
705
+ inference_state=inference_state,
706
+ output_dict=output_dict,
707
+ frame_idx=frame_idx,
708
+ batch_size=batch_size,
709
+ is_init_cond_frame=False,
710
+ point_inputs=None,
711
+ mask_inputs=None,
712
+ reverse=reverse,
713
+ run_mem_encoder=True,
714
+ )
715
+ output_dict[storage_key][frame_idx] = current_out
716
+ # Create slices of per-object outputs for subsequent interaction with each
717
+ # individual object after tracking.
718
+ self._add_output_per_object(
719
+ inference_state, frame_idx, current_out, storage_key
720
+ )
721
+ inference_state["frames_already_tracked"][frame_idx] = {"reverse": reverse}
722
+
723
+ # Resize the output mask to the original video resolution (we directly use
724
+ # the mask scores on GPU for output to avoid any CPU conversion in between)
725
+ _, video_res_masks = self._get_orig_video_res_output(
726
+ inference_state, pred_masks
727
+ )
728
+ yield frame_idx, obj_ids, video_res_masks
729
+
730
+ def _add_output_per_object(
731
+ self, inference_state, frame_idx, current_out, storage_key
732
+ ):
733
+ """
734
+ Split a multi-object output into per-object output slices and add them into
735
+ `output_dict_per_obj`. The resulting slices share the same tensor storage.
736
+ """
737
+ maskmem_features = current_out["maskmem_features"]
738
+ assert maskmem_features is None or isinstance(maskmem_features, torch.Tensor)
739
+
740
+ maskmem_pos_enc = current_out["maskmem_pos_enc"]
741
+ assert maskmem_pos_enc is None or isinstance(maskmem_pos_enc, list)
742
+
743
+ output_dict_per_obj = inference_state["output_dict_per_obj"]
744
+ for obj_idx, obj_output_dict in output_dict_per_obj.items():
745
+ obj_slice = slice(obj_idx, obj_idx + 1)
746
+ obj_out = {
747
+ "maskmem_features": None,
748
+ "maskmem_pos_enc": None,
749
+ "pred_masks": current_out["pred_masks"][obj_slice],
750
+ "obj_ptr": current_out["obj_ptr"][obj_slice],
751
+ "object_score_logits": current_out["object_score_logits"][obj_slice],
752
+ }
753
+ if maskmem_features is not None:
754
+ obj_out["maskmem_features"] = maskmem_features[obj_slice]
755
+ if maskmem_pos_enc is not None:
756
+ obj_out["maskmem_pos_enc"] = [x[obj_slice] for x in maskmem_pos_enc]
757
+ obj_output_dict[storage_key][frame_idx] = obj_out
758
+
759
+ @torch.inference_mode()
760
+ def clear_all_prompts_in_frame(
761
+ self, inference_state, frame_idx, obj_id, need_output=True
762
+ ):
763
+ """Remove all input points or mask in a specific frame for a given object."""
764
+ obj_idx = self._obj_id_to_idx(inference_state, obj_id)
765
+
766
+ # Clear the conditioning information on the given frame
767
+ inference_state["point_inputs_per_obj"][obj_idx].pop(frame_idx, None)
768
+ inference_state["mask_inputs_per_obj"][obj_idx].pop(frame_idx, None)
769
+
770
+ temp_output_dict_per_obj = inference_state["temp_output_dict_per_obj"]
771
+ temp_output_dict_per_obj[obj_idx]["cond_frame_outputs"].pop(frame_idx, None)
772
+ temp_output_dict_per_obj[obj_idx]["non_cond_frame_outputs"].pop(frame_idx, None)
773
+
774
+ # Check and see if there are still any inputs left on this frame
775
+ batch_size = self._get_obj_num(inference_state)
776
+ frame_has_input = False
777
+ for obj_idx2 in range(batch_size):
778
+ if frame_idx in inference_state["point_inputs_per_obj"][obj_idx2]:
779
+ frame_has_input = True
780
+ break
781
+ if frame_idx in inference_state["mask_inputs_per_obj"][obj_idx2]:
782
+ frame_has_input = True
783
+ break
784
+
785
+ # If this frame has no remaining inputs for any objects, we further clear its
786
+ # conditioning frame status
787
+ if not frame_has_input:
788
+ output_dict = inference_state["output_dict"]
789
+ consolidated_frame_inds = inference_state["consolidated_frame_inds"]
790
+ consolidated_frame_inds["cond_frame_outputs"].discard(frame_idx)
791
+ consolidated_frame_inds["non_cond_frame_outputs"].discard(frame_idx)
792
+ # Remove the frame's conditioning output (possibly downgrading it to non-conditioning)
793
+ out = output_dict["cond_frame_outputs"].pop(frame_idx, None)
794
+ if out is not None:
795
+ # The frame is not a conditioning frame anymore since it's not receiving inputs,
796
+ # so we "downgrade" its output (if exists) to a non-conditioning frame output.
797
+ output_dict["non_cond_frame_outputs"][frame_idx] = out
798
+ inference_state["frames_already_tracked"].pop(frame_idx, None)
799
+ # Similarly, do it for the sliced output on each object.
800
+ for obj_idx2 in range(batch_size):
801
+ obj_output_dict = inference_state["output_dict_per_obj"][obj_idx2]
802
+ obj_out = obj_output_dict["cond_frame_outputs"].pop(frame_idx, None)
803
+ if obj_out is not None:
804
+ obj_output_dict["non_cond_frame_outputs"][frame_idx] = obj_out
805
+
806
+ # If all the conditioning frames have been removed, we also clear the tracking outputs
807
+ if len(output_dict["cond_frame_outputs"]) == 0:
808
+ self._reset_tracking_results(inference_state)
809
+
810
+ if not need_output:
811
+ return
812
+ # Finally, output updated masks per object (after removing the inputs above)
813
+ obj_ids = inference_state["obj_ids"]
814
+ is_cond = any(
815
+ frame_idx in obj_temp_output_dict["cond_frame_outputs"]
816
+ for obj_temp_output_dict in temp_output_dict_per_obj.values()
817
+ )
818
+ consolidated_out = self._consolidate_temp_output_across_obj(
819
+ inference_state,
820
+ frame_idx,
821
+ is_cond=is_cond,
822
+ run_mem_encoder=False,
823
+ consolidate_at_video_res=True,
824
+ )
825
+ _, video_res_masks = self._get_orig_video_res_output(
826
+ inference_state, consolidated_out["pred_masks_video_res"]
827
+ )
828
+ return frame_idx, obj_ids, video_res_masks
829
+
830
+ @torch.inference_mode()
831
+ def reset_state(self, inference_state):
832
+ """Remove all input points or mask in all frames throughout the video."""
833
+ self._reset_tracking_results(inference_state)
834
+ # Remove all object ids
835
+ inference_state["obj_id_to_idx"].clear()
836
+ inference_state["obj_idx_to_id"].clear()
837
+ inference_state["obj_ids"].clear()
838
+ inference_state["point_inputs_per_obj"].clear()
839
+ inference_state["mask_inputs_per_obj"].clear()
840
+ inference_state["output_dict_per_obj"].clear()
841
+ inference_state["temp_output_dict_per_obj"].clear()
842
+
843
+ def _reset_tracking_results(self, inference_state):
844
+ """Reset all tracking inputs and results across the videos."""
845
+ for v in inference_state["point_inputs_per_obj"].values():
846
+ v.clear()
847
+ for v in inference_state["mask_inputs_per_obj"].values():
848
+ v.clear()
849
+ for v in inference_state["output_dict_per_obj"].values():
850
+ v["cond_frame_outputs"].clear()
851
+ v["non_cond_frame_outputs"].clear()
852
+ for v in inference_state["temp_output_dict_per_obj"].values():
853
+ v["cond_frame_outputs"].clear()
854
+ v["non_cond_frame_outputs"].clear()
855
+ inference_state["output_dict"]["cond_frame_outputs"].clear()
856
+ inference_state["output_dict"]["non_cond_frame_outputs"].clear()
857
+ inference_state["consolidated_frame_inds"]["cond_frame_outputs"].clear()
858
+ inference_state["consolidated_frame_inds"]["non_cond_frame_outputs"].clear()
859
+ inference_state["tracking_has_started"] = False
860
+ inference_state["frames_already_tracked"].clear()
861
+
862
+ def _get_image_feature(self, inference_state, frame_idx, batch_size):
863
+ """Compute the image features on a given frame."""
864
+ # Look up in the cache first
865
+ image, backbone_out = inference_state["cached_features"].get(
866
+ frame_idx, (None, None)
867
+ )
868
+ if backbone_out is None:
869
+ # Cache miss -- we will run inference on a single image
870
+ image = inference_state["images"][frame_idx].to(inference_state["device"]).float().unsqueeze(0)
871
+ backbone_out = self.forward_image(image)
872
+ # Cache the most recent frame's feature (for repeated interactions with
873
+ # a frame; we can use an LRU cache for more frames in the future).
874
+ inference_state["cached_features"] = {frame_idx: (image, backbone_out)}
875
+
876
+ # expand the features to have the same dimension as the number of objects
877
+ expanded_image = image.expand(batch_size, -1, -1, -1)
878
+ expanded_backbone_out = {
879
+ "backbone_fpn": backbone_out["backbone_fpn"].copy(),
880
+ "vision_pos_enc": backbone_out["vision_pos_enc"].copy(),
881
+ }
882
+ for i, feat in enumerate(expanded_backbone_out["backbone_fpn"]):
883
+ expanded_backbone_out["backbone_fpn"][i] = feat.expand(
884
+ batch_size, -1, -1, -1
885
+ )
886
+ for i, pos in enumerate(expanded_backbone_out["vision_pos_enc"]):
887
+ pos = pos.expand(batch_size, -1, -1, -1)
888
+ expanded_backbone_out["vision_pos_enc"][i] = pos
889
+
890
+ features = self._prepare_backbone_features(expanded_backbone_out)
891
+ features = (expanded_image,) + features
892
+ return features
893
+
894
+ def _run_single_frame_inference(
895
+ self,
896
+ inference_state,
897
+ output_dict,
898
+ frame_idx,
899
+ batch_size,
900
+ is_init_cond_frame,
901
+ point_inputs,
902
+ mask_inputs,
903
+ reverse,
904
+ run_mem_encoder,
905
+ prev_sam_mask_logits=None,
906
+ ):
907
+ """Run tracking on a single frame based on current inputs and previous memory."""
908
+ # Retrieve correct image features
909
+ (
910
+ _,
911
+ _,
912
+ current_vision_feats,
913
+ current_vision_pos_embeds,
914
+ feat_sizes,
915
+ ) = self._get_image_feature(inference_state, frame_idx, batch_size)
916
+
917
+ # point and mask should not appear as input simultaneously on the same frame
918
+ assert point_inputs is None or mask_inputs is None
919
+ current_out = self.track_step(
920
+ frame_idx=frame_idx,
921
+ is_init_cond_frame=is_init_cond_frame,
922
+ current_vision_feats=current_vision_feats,
923
+ current_vision_pos_embeds=current_vision_pos_embeds,
924
+ feat_sizes=feat_sizes,
925
+ point_inputs=point_inputs,
926
+ mask_inputs=mask_inputs,
927
+ output_dict=output_dict,
928
+ num_frames=inference_state["num_frames"],
929
+ track_in_reverse=reverse,
930
+ run_mem_encoder=run_mem_encoder,
931
+ prev_sam_mask_logits=prev_sam_mask_logits,
932
+ )
933
+
934
+ # optionally offload the output to CPU memory to save GPU space
935
+ storage_device = inference_state["storage_device"]
936
+ maskmem_features = current_out["maskmem_features"]
937
+ if maskmem_features is not None:
938
+ maskmem_features = maskmem_features.to(torch.bfloat16)
939
+ maskmem_features = maskmem_features.to(storage_device, non_blocking=True)
940
+ pred_masks_gpu = current_out["pred_masks"]
941
+ # potentially fill holes in the predicted masks
942
+ if self.fill_hole_area > 0:
943
+ pred_masks_gpu = fill_holes_in_mask_scores(
944
+ pred_masks_gpu, self.fill_hole_area
945
+ )
946
+ pred_masks = pred_masks_gpu.to(storage_device, non_blocking=True)
947
+ # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
948
+ maskmem_pos_enc = self._get_maskmem_pos_enc(inference_state, current_out)
949
+ # object pointer is a small tensor, so we always keep it on GPU memory for fast access
950
+ obj_ptr = current_out["obj_ptr"]
951
+ object_score_logits = current_out["object_score_logits"]
952
+ # make a compact version of this frame's output to reduce the state size
953
+ compact_current_out = {
954
+ "maskmem_features": maskmem_features,
955
+ "maskmem_pos_enc": maskmem_pos_enc,
956
+ "pred_masks": pred_masks,
957
+ "obj_ptr": obj_ptr,
958
+ "object_score_logits": object_score_logits,
959
+ }
960
+ return compact_current_out, pred_masks_gpu
961
+
962
+ def _run_memory_encoder(
963
+ self,
964
+ inference_state,
965
+ frame_idx,
966
+ batch_size,
967
+ high_res_masks,
968
+ object_score_logits,
969
+ is_mask_from_pts,
970
+ ):
971
+ """
972
+ Run the memory encoder on `high_res_masks`. This is usually after applying
973
+ non-overlapping constraints to object scores. Since their scores changed, their
974
+ memory also need to be computed again with the memory encoder.
975
+ """
976
+ # Retrieve correct image features
977
+ _, _, current_vision_feats, _, feat_sizes = self._get_image_feature(
978
+ inference_state, frame_idx, batch_size
979
+ )
980
+ maskmem_features, maskmem_pos_enc = self._encode_new_memory(
981
+ current_vision_feats=current_vision_feats,
982
+ feat_sizes=feat_sizes,
983
+ pred_masks_high_res=high_res_masks,
984
+ object_score_logits=object_score_logits,
985
+ is_mask_from_pts=is_mask_from_pts,
986
+ )
987
+
988
+ # optionally offload the output to CPU memory to save GPU space
989
+ storage_device = inference_state["storage_device"]
990
+ maskmem_features = maskmem_features.to(torch.bfloat16)
991
+ maskmem_features = maskmem_features.to(storage_device, non_blocking=True)
992
+ # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
993
+ maskmem_pos_enc = self._get_maskmem_pos_enc(
994
+ inference_state, {"maskmem_pos_enc": maskmem_pos_enc}
995
+ )
996
+ return maskmem_features, maskmem_pos_enc
997
+
998
+ def _get_maskmem_pos_enc(self, inference_state, current_out):
999
+ """
1000
+ `maskmem_pos_enc` is the same across frames and objects, so we cache it as
1001
+ a constant in the inference session to reduce session storage size.
1002
+ """
1003
+ model_constants = inference_state["constants"]
1004
+ # "out_maskmem_pos_enc" should be either a list of tensors or None
1005
+ out_maskmem_pos_enc = current_out["maskmem_pos_enc"]
1006
+ if out_maskmem_pos_enc is not None:
1007
+ if "maskmem_pos_enc" not in model_constants:
1008
+ assert isinstance(out_maskmem_pos_enc, list)
1009
+ # only take the slice for one object, since it's same across objects
1010
+ maskmem_pos_enc = [x[0:1].clone() for x in out_maskmem_pos_enc]
1011
+ model_constants["maskmem_pos_enc"] = maskmem_pos_enc
1012
+ else:
1013
+ maskmem_pos_enc = model_constants["maskmem_pos_enc"]
1014
+ # expand the cached maskmem_pos_enc to the actual batch size
1015
+ batch_size = out_maskmem_pos_enc[0].size(0)
1016
+ expanded_maskmem_pos_enc = [
1017
+ x.expand(batch_size, -1, -1, -1) for x in maskmem_pos_enc
1018
+ ]
1019
+ else:
1020
+ expanded_maskmem_pos_enc = None
1021
+ return expanded_maskmem_pos_enc
1022
+
1023
+ @torch.inference_mode()
1024
+ def remove_object(self, inference_state, obj_id, strict=False, need_output=True):
1025
+ """
1026
+ Remove an object id from the tracking state. If strict is True, we check whether
1027
+ the object id actually exists and raise an error if it doesn't exist.
1028
+ """
1029
+ old_obj_idx_to_rm = inference_state["obj_id_to_idx"].get(obj_id, None)
1030
+ updated_frames = []
1031
+ # Check whether this object_id to remove actually exists and possibly raise an error.
1032
+ if old_obj_idx_to_rm is None:
1033
+ if not strict:
1034
+ return inference_state["obj_ids"], updated_frames
1035
+ raise RuntimeError(
1036
+ f"Cannot remove object id {obj_id} as it doesn't exist. "
1037
+ f"All existing object ids: {inference_state['obj_ids']}."
1038
+ )
1039
+
1040
+ # If this is the only remaining object id, we simply reset the state.
1041
+ if len(inference_state["obj_id_to_idx"]) == 1:
1042
+ self.reset_state(inference_state)
1043
+ return inference_state["obj_ids"], updated_frames
1044
+
1045
+ # There are still remaining objects after removing this object id. In this case,
1046
+ # we need to delete the object storage from inference state tensors.
1047
+ # Step 0: clear the input on those frames where this object id has point or mask input
1048
+ # (note that this step is required as it might downgrade conditioning frames to
1049
+ # non-conditioning ones)
1050
+ obj_input_frames_inds = set()
1051
+ obj_input_frames_inds.update(
1052
+ inference_state["point_inputs_per_obj"][old_obj_idx_to_rm]
1053
+ )
1054
+ obj_input_frames_inds.update(
1055
+ inference_state["mask_inputs_per_obj"][old_obj_idx_to_rm]
1056
+ )
1057
+ for frame_idx in obj_input_frames_inds:
1058
+ self.clear_all_prompts_in_frame(
1059
+ inference_state, frame_idx, obj_id, need_output=False
1060
+ )
1061
+
1062
+ # Step 1: Update the object id mapping (note that it must be done after Step 0,
1063
+ # since Step 0 still requires the old object id mappings in inference_state)
1064
+ old_obj_ids = inference_state["obj_ids"]
1065
+ old_obj_inds = list(range(len(old_obj_ids)))
1066
+ remain_old_obj_inds = old_obj_inds.copy()
1067
+ remain_old_obj_inds.remove(old_obj_idx_to_rm)
1068
+ new_obj_ids = [old_obj_ids[old_idx] for old_idx in remain_old_obj_inds]
1069
+ new_obj_inds = list(range(len(new_obj_ids)))
1070
+ # build new mappings
1071
+ old_idx_to_new_idx = dict(zip(remain_old_obj_inds, new_obj_inds))
1072
+ inference_state["obj_id_to_idx"] = dict(zip(new_obj_ids, new_obj_inds))
1073
+ inference_state["obj_idx_to_id"] = dict(zip(new_obj_inds, new_obj_ids))
1074
+ inference_state["obj_ids"] = new_obj_ids
1075
+
1076
+ # Step 2: For per-object tensor storage, we shift their obj_idx in the dict keys.
1077
+ # (note that "consolidated_frame_inds" doesn't need to be updated in this step as
1078
+ # it's already handled in Step 0)
1079
+ def _map_keys(container):
1080
+ new_kvs = []
1081
+ for k in old_obj_inds:
1082
+ v = container.pop(k)
1083
+ if k in old_idx_to_new_idx:
1084
+ new_kvs.append((old_idx_to_new_idx[k], v))
1085
+ container.update(new_kvs)
1086
+
1087
+ _map_keys(inference_state["point_inputs_per_obj"])
1088
+ _map_keys(inference_state["mask_inputs_per_obj"])
1089
+ _map_keys(inference_state["output_dict_per_obj"])
1090
+ _map_keys(inference_state["temp_output_dict_per_obj"])
1091
+
1092
+ # Step 3: For packed tensor storage, we index the remaining ids and rebuild the per-object slices.
1093
+ def _slice_state(output_dict, storage_key):
1094
+ for frame_idx, out in output_dict[storage_key].items():
1095
+ out["maskmem_features"] = out["maskmem_features"][remain_old_obj_inds]
1096
+ out["maskmem_pos_enc"] = [
1097
+ x[remain_old_obj_inds] for x in out["maskmem_pos_enc"]
1098
+ ]
1099
+ # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
1100
+ out["maskmem_pos_enc"] = self._get_maskmem_pos_enc(inference_state, out)
1101
+ out["pred_masks"] = out["pred_masks"][remain_old_obj_inds]
1102
+ out["obj_ptr"] = out["obj_ptr"][remain_old_obj_inds]
1103
+ out["object_score_logits"] = out["object_score_logits"][
1104
+ remain_old_obj_inds
1105
+ ]
1106
+ # also update the per-object slices
1107
+ self._add_output_per_object(
1108
+ inference_state, frame_idx, out, storage_key
1109
+ )
1110
+
1111
+ _slice_state(inference_state["output_dict"], "cond_frame_outputs")
1112
+ _slice_state(inference_state["output_dict"], "non_cond_frame_outputs")
1113
+
1114
+ # Step 4: Further collect the outputs on those frames in `obj_input_frames_inds`, which
1115
+ # could show an updated mask for objects previously occluded by the object being removed
1116
+ if need_output:
1117
+ temp_output_dict_per_obj = inference_state["temp_output_dict_per_obj"]
1118
+ for frame_idx in obj_input_frames_inds:
1119
+ is_cond = any(
1120
+ frame_idx in obj_temp_output_dict["cond_frame_outputs"]
1121
+ for obj_temp_output_dict in temp_output_dict_per_obj.values()
1122
+ )
1123
+ consolidated_out = self._consolidate_temp_output_across_obj(
1124
+ inference_state,
1125
+ frame_idx,
1126
+ is_cond=is_cond,
1127
+ run_mem_encoder=False,
1128
+ consolidate_at_video_res=True,
1129
+ )
1130
+ _, video_res_masks = self._get_orig_video_res_output(
1131
+ inference_state, consolidated_out["pred_masks_video_res"]
1132
+ )
1133
+ updated_frames.append((frame_idx, video_res_masks))
1134
+
1135
+ return inference_state["obj_ids"], updated_frames
1136
+
1137
+ def _clear_non_cond_mem_around_input(self, inference_state, frame_idx):
1138
+ """
1139
+ Remove the non-conditioning memory around the input frame. When users provide
1140
+ correction clicks, the surrounding frames' non-conditioning memories can still
1141
+ contain outdated object appearance information and could confuse the model.
1142
+
1143
+ This method clears those non-conditioning memories surrounding the interacted
1144
+ frame to avoid giving the model both old and new information about the object.
1145
+ """
1146
+ r = self.memory_temporal_stride_for_eval
1147
+ frame_idx_begin = frame_idx - r * self.num_maskmem
1148
+ frame_idx_end = frame_idx + r * self.num_maskmem
1149
+ output_dict = inference_state["output_dict"]
1150
+ non_cond_frame_outputs = output_dict["non_cond_frame_outputs"]
1151
+ for t in range(frame_idx_begin, frame_idx_end + 1):
1152
+ non_cond_frame_outputs.pop(t, None)
1153
+ for obj_output_dict in inference_state["output_dict_per_obj"].values():
1154
+ obj_output_dict["non_cond_frame_outputs"].pop(t, None)
custom_nodes/comfyui-segment-anything-2/sam2/utils/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
custom_nodes/comfyui-segment-anything-2/sam2/utils/amg.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import math
8
+ from copy import deepcopy
9
+ from itertools import product
10
+ from typing import Any, Dict, Generator, ItemsView, List, Tuple
11
+
12
+ import numpy as np
13
+ import torch
14
+
15
+ # Very lightly adapted from https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/utils/amg.py
16
+
17
+
18
+ class MaskData:
19
+ """
20
+ A structure for storing masks and their related data in batched format.
21
+ Implements basic filtering and concatenation.
22
+ """
23
+
24
+ def __init__(self, **kwargs) -> None:
25
+ for v in kwargs.values():
26
+ assert isinstance(
27
+ v, (list, np.ndarray, torch.Tensor)
28
+ ), "MaskData only supports list, numpy arrays, and torch tensors."
29
+ self._stats = dict(**kwargs)
30
+
31
+ def __setitem__(self, key: str, item: Any) -> None:
32
+ assert isinstance(
33
+ item, (list, np.ndarray, torch.Tensor)
34
+ ), "MaskData only supports list, numpy arrays, and torch tensors."
35
+ self._stats[key] = item
36
+
37
+ def __delitem__(self, key: str) -> None:
38
+ del self._stats[key]
39
+
40
+ def __getitem__(self, key: str) -> Any:
41
+ return self._stats[key]
42
+
43
+ def items(self) -> ItemsView[str, Any]:
44
+ return self._stats.items()
45
+
46
+ def filter(self, keep: torch.Tensor) -> None:
47
+ for k, v in self._stats.items():
48
+ if v is None:
49
+ self._stats[k] = None
50
+ elif isinstance(v, torch.Tensor):
51
+ self._stats[k] = v[torch.as_tensor(keep, device=v.device)]
52
+ elif isinstance(v, np.ndarray):
53
+ self._stats[k] = v[keep.detach().cpu().numpy()]
54
+ elif isinstance(v, list) and keep.dtype == torch.bool:
55
+ self._stats[k] = [a for i, a in enumerate(v) if keep[i]]
56
+ elif isinstance(v, list):
57
+ self._stats[k] = [v[i] for i in keep]
58
+ else:
59
+ raise TypeError(f"MaskData key {k} has an unsupported type {type(v)}.")
60
+
61
+ def cat(self, new_stats: "MaskData") -> None:
62
+ for k, v in new_stats.items():
63
+ if k not in self._stats or self._stats[k] is None:
64
+ self._stats[k] = deepcopy(v)
65
+ elif isinstance(v, torch.Tensor):
66
+ self._stats[k] = torch.cat([self._stats[k], v], dim=0)
67
+ elif isinstance(v, np.ndarray):
68
+ self._stats[k] = np.concatenate([self._stats[k], v], axis=0)
69
+ elif isinstance(v, list):
70
+ self._stats[k] = self._stats[k] + deepcopy(v)
71
+ else:
72
+ raise TypeError(f"MaskData key {k} has an unsupported type {type(v)}.")
73
+
74
+ def to_numpy(self) -> None:
75
+ for k, v in self._stats.items():
76
+ if isinstance(v, torch.Tensor):
77
+ self._stats[k] = v.float().detach().cpu().numpy()
78
+
79
+
80
+ def is_box_near_crop_edge(
81
+ boxes: torch.Tensor, crop_box: List[int], orig_box: List[int], atol: float = 20.0
82
+ ) -> torch.Tensor:
83
+ """Filter masks at the edge of a crop, but not at the edge of the original image."""
84
+ crop_box_torch = torch.as_tensor(crop_box, dtype=torch.float, device=boxes.device)
85
+ orig_box_torch = torch.as_tensor(orig_box, dtype=torch.float, device=boxes.device)
86
+ boxes = uncrop_boxes_xyxy(boxes, crop_box).float()
87
+ near_crop_edge = torch.isclose(boxes, crop_box_torch[None, :], atol=atol, rtol=0)
88
+ near_image_edge = torch.isclose(boxes, orig_box_torch[None, :], atol=atol, rtol=0)
89
+ near_crop_edge = torch.logical_and(near_crop_edge, ~near_image_edge)
90
+ return torch.any(near_crop_edge, dim=1)
91
+
92
+
93
+ def box_xyxy_to_xywh(box_xyxy: torch.Tensor) -> torch.Tensor:
94
+ box_xywh = deepcopy(box_xyxy)
95
+ box_xywh[2] = box_xywh[2] - box_xywh[0]
96
+ box_xywh[3] = box_xywh[3] - box_xywh[1]
97
+ return box_xywh
98
+
99
+
100
+ def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
101
+ assert len(args) > 0 and all(
102
+ len(a) == len(args[0]) for a in args
103
+ ), "Batched iteration must have inputs of all the same size."
104
+ n_batches = len(args[0]) // batch_size + int(len(args[0]) % batch_size != 0)
105
+ for b in range(n_batches):
106
+ yield [arg[b * batch_size : (b + 1) * batch_size] for arg in args]
107
+
108
+
109
+ def mask_to_rle_pytorch(tensor: torch.Tensor) -> List[Dict[str, Any]]:
110
+ """
111
+ Encodes masks to an uncompressed RLE, in the format expected by
112
+ pycoco tools.
113
+ """
114
+ # Put in fortran order and flatten h,w
115
+ b, h, w = tensor.shape
116
+ tensor = tensor.permute(0, 2, 1).flatten(1)
117
+
118
+ # Compute change indices
119
+ diff = tensor[:, 1:] ^ tensor[:, :-1]
120
+ change_indices = diff.nonzero()
121
+
122
+ # Encode run length
123
+ out = []
124
+ for i in range(b):
125
+ cur_idxs = change_indices[change_indices[:, 0] == i, 1]
126
+ cur_idxs = torch.cat(
127
+ [
128
+ torch.tensor([0], dtype=cur_idxs.dtype, device=cur_idxs.device),
129
+ cur_idxs + 1,
130
+ torch.tensor([h * w], dtype=cur_idxs.dtype, device=cur_idxs.device),
131
+ ]
132
+ )
133
+ btw_idxs = cur_idxs[1:] - cur_idxs[:-1]
134
+ counts = [] if tensor[i, 0] == 0 else [0]
135
+ counts.extend(btw_idxs.detach().cpu().tolist())
136
+ out.append({"size": [h, w], "counts": counts})
137
+ return out
138
+
139
+
140
+ def rle_to_mask(rle: Dict[str, Any]) -> np.ndarray:
141
+ """Compute a binary mask from an uncompressed RLE."""
142
+ h, w = rle["size"]
143
+ mask = np.empty(h * w, dtype=bool)
144
+ idx = 0
145
+ parity = False
146
+ for count in rle["counts"]:
147
+ mask[idx : idx + count] = parity
148
+ idx += count
149
+ parity ^= True
150
+ mask = mask.reshape(w, h)
151
+ return mask.transpose() # Put in C order
152
+
153
+
154
+ def area_from_rle(rle: Dict[str, Any]) -> int:
155
+ return sum(rle["counts"][1::2])
156
+
157
+
158
+ def calculate_stability_score(
159
+ masks: torch.Tensor, mask_threshold: float, threshold_offset: float
160
+ ) -> torch.Tensor:
161
+ """
162
+ Computes the stability score for a batch of masks. The stability
163
+ score is the IoU between the binary masks obtained by thresholding
164
+ the predicted mask logits at high and low values.
165
+ """
166
+ # One mask is always contained inside the other.
167
+ # Save memory by preventing unnecessary cast to torch.int64
168
+ intersections = (
169
+ (masks > (mask_threshold + threshold_offset))
170
+ .sum(-1, dtype=torch.int16)
171
+ .sum(-1, dtype=torch.int32)
172
+ )
173
+ unions = (
174
+ (masks > (mask_threshold - threshold_offset))
175
+ .sum(-1, dtype=torch.int16)
176
+ .sum(-1, dtype=torch.int32)
177
+ )
178
+ return intersections / unions
179
+
180
+
181
+ def build_point_grid(n_per_side: int) -> np.ndarray:
182
+ """Generates a 2D grid of points evenly spaced in [0,1]x[0,1]."""
183
+ offset = 1 / (2 * n_per_side)
184
+ points_one_side = np.linspace(offset, 1 - offset, n_per_side)
185
+ points_x = np.tile(points_one_side[None, :], (n_per_side, 1))
186
+ points_y = np.tile(points_one_side[:, None], (1, n_per_side))
187
+ points = np.stack([points_x, points_y], axis=-1).reshape(-1, 2)
188
+ return points
189
+
190
+
191
+ def build_all_layer_point_grids(
192
+ n_per_side: int, n_layers: int, scale_per_layer: int
193
+ ) -> List[np.ndarray]:
194
+ """Generates point grids for all crop layers."""
195
+ points_by_layer = []
196
+ for i in range(n_layers + 1):
197
+ n_points = int(n_per_side / (scale_per_layer**i))
198
+ points_by_layer.append(build_point_grid(n_points))
199
+ return points_by_layer
200
+
201
+
202
+ def generate_crop_boxes(
203
+ im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float
204
+ ) -> Tuple[List[List[int]], List[int]]:
205
+ """
206
+ Generates a list of crop boxes of different sizes. Each layer
207
+ has (2**i)**2 boxes for the ith layer.
208
+ """
209
+ crop_boxes, layer_idxs = [], []
210
+ im_h, im_w = im_size
211
+ short_side = min(im_h, im_w)
212
+
213
+ # Original image
214
+ crop_boxes.append([0, 0, im_w, im_h])
215
+ layer_idxs.append(0)
216
+
217
+ def crop_len(orig_len, n_crops, overlap):
218
+ return int(math.ceil((overlap * (n_crops - 1) + orig_len) / n_crops))
219
+
220
+ for i_layer in range(n_layers):
221
+ n_crops_per_side = 2 ** (i_layer + 1)
222
+ overlap = int(overlap_ratio * short_side * (2 / n_crops_per_side))
223
+
224
+ crop_w = crop_len(im_w, n_crops_per_side, overlap)
225
+ crop_h = crop_len(im_h, n_crops_per_side, overlap)
226
+
227
+ crop_box_x0 = [int((crop_w - overlap) * i) for i in range(n_crops_per_side)]
228
+ crop_box_y0 = [int((crop_h - overlap) * i) for i in range(n_crops_per_side)]
229
+
230
+ # Crops in XYWH format
231
+ for x0, y0 in product(crop_box_x0, crop_box_y0):
232
+ box = [x0, y0, min(x0 + crop_w, im_w), min(y0 + crop_h, im_h)]
233
+ crop_boxes.append(box)
234
+ layer_idxs.append(i_layer + 1)
235
+
236
+ return crop_boxes, layer_idxs
237
+
238
+
239
+ def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
240
+ x0, y0, _, _ = crop_box
241
+ offset = torch.tensor([[x0, y0, x0, y0]], device=boxes.device)
242
+ # Check if boxes has a channel dimension
243
+ if len(boxes.shape) == 3:
244
+ offset = offset.unsqueeze(1)
245
+ return boxes + offset
246
+
247
+
248
+ def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
249
+ x0, y0, _, _ = crop_box
250
+ offset = torch.tensor([[x0, y0]], device=points.device)
251
+ # Check if points has a channel dimension
252
+ if len(points.shape) == 3:
253
+ offset = offset.unsqueeze(1)
254
+ return points + offset
255
+
256
+
257
+ def uncrop_masks(
258
+ masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w: int
259
+ ) -> torch.Tensor:
260
+ x0, y0, x1, y1 = crop_box
261
+ if x0 == 0 and y0 == 0 and x1 == orig_w and y1 == orig_h:
262
+ return masks
263
+ # Coordinate transform masks
264
+ pad_x, pad_y = orig_w - (x1 - x0), orig_h - (y1 - y0)
265
+ pad = (x0, pad_x - x0, y0, pad_y - y0)
266
+ return torch.nn.functional.pad(masks, pad, value=0)
267
+
268
+
269
+ def remove_small_regions(
270
+ mask: np.ndarray, area_thresh: float, mode: str
271
+ ) -> Tuple[np.ndarray, bool]:
272
+ """
273
+ Removes small disconnected regions and holes in a mask. Returns the
274
+ mask and an indicator of if the mask has been modified.
275
+ """
276
+ import cv2 # type: ignore
277
+
278
+ assert mode in ["holes", "islands"]
279
+ correct_holes = mode == "holes"
280
+ working_mask = (correct_holes ^ mask).astype(np.uint8)
281
+ n_labels, regions, stats, _ = cv2.connectedComponentsWithStats(working_mask, 8)
282
+ sizes = stats[:, -1][1:] # Row 0 is background label
283
+ small_regions = [i + 1 for i, s in enumerate(sizes) if s < area_thresh]
284
+ if len(small_regions) == 0:
285
+ return mask, False
286
+ fill_labels = [0] + small_regions
287
+ if not correct_holes:
288
+ fill_labels = [i for i in range(n_labels) if i not in fill_labels]
289
+ # If every region is below threshold, keep largest
290
+ if len(fill_labels) == 0:
291
+ fill_labels = [int(np.argmax(sizes)) + 1]
292
+ mask = np.isin(regions, fill_labels)
293
+ return mask, True
294
+
295
+
296
+ def coco_encode_rle(uncompressed_rle: Dict[str, Any]) -> Dict[str, Any]:
297
+ from pycocotools import mask as mask_utils # type: ignore
298
+
299
+ h, w = uncompressed_rle["size"]
300
+ rle = mask_utils.frPyObjects(uncompressed_rle, h, w)
301
+ rle["counts"] = rle["counts"].decode("utf-8") # Necessary to serialize with json
302
+ return rle
303
+
304
+
305
+ def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
306
+ """
307
+ Calculates boxes in XYXY format around masks. Return [0,0,0,0] for
308
+ an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
309
+ """
310
+ # torch.max below raises an error on empty inputs, just skip in this case
311
+ if torch.numel(masks) == 0:
312
+ return torch.zeros(*masks.shape[:-2], 4, device=masks.device)
313
+
314
+ # Normalize shape to CxHxW
315
+ shape = masks.shape
316
+ h, w = shape[-2:]
317
+ if len(shape) > 2:
318
+ masks = masks.flatten(0, -3)
319
+ else:
320
+ masks = masks.unsqueeze(0)
321
+
322
+ # Get top and bottom edges
323
+ in_height, _ = torch.max(masks, dim=-1)
324
+ in_height_coords = in_height * torch.arange(h, device=in_height.device)[None, :]
325
+ bottom_edges, _ = torch.max(in_height_coords, dim=-1)
326
+ in_height_coords = in_height_coords + h * (~in_height)
327
+ top_edges, _ = torch.min(in_height_coords, dim=-1)
328
+
329
+ # Get left and right edges
330
+ in_width, _ = torch.max(masks, dim=-2)
331
+ in_width_coords = in_width * torch.arange(w, device=in_width.device)[None, :]
332
+ right_edges, _ = torch.max(in_width_coords, dim=-1)
333
+ in_width_coords = in_width_coords + w * (~in_width)
334
+ left_edges, _ = torch.min(in_width_coords, dim=-1)
335
+
336
+ # If the mask is empty the right edge will be to the left of the left edge.
337
+ # Replace these boxes with [0, 0, 0, 0]
338
+ empty_filter = (right_edges < left_edges) | (bottom_edges < top_edges)
339
+ out = torch.stack([left_edges, top_edges, right_edges, bottom_edges], dim=-1)
340
+ out = out * (~empty_filter).unsqueeze(-1)
341
+
342
+ # Return to original shape
343
+ if len(shape) > 2:
344
+ out = out.reshape(*shape[:-2], 4)
345
+ else:
346
+ out = out[0]
347
+
348
+ return out
custom_nodes/comfyui-segment-anything-2/sam2/utils/misc.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import os
8
+ import warnings
9
+ from threading import Thread
10
+
11
+ import numpy as np
12
+ import torch
13
+ from PIL import Image
14
+ from tqdm import tqdm
15
+ import platform
16
+
17
+ def get_sdpa_settings():
18
+ if torch.cuda.is_available():
19
+ old_gpu = torch.cuda.get_device_properties(0).major < 7
20
+ # only use Flash Attention on Ampere (8.0) or newer GPUs
21
+ use_flash_attn = torch.cuda.get_device_properties(0).major >= 8 and platform.system() == 'Linux'
22
+ # if not use_flash_attn:
23
+ # warnings.warn(
24
+ # "Flash Attention is disabled as it requires a GPU with Ampere (8.0) CUDA capability.",
25
+ # category=UserWarning,
26
+ # stacklevel=2,
27
+ # )
28
+ # keep math kernel for PyTorch versions before 2.2 (Flash Attention v2 is only
29
+ # available on PyTorch 2.2+, while Flash Attention v1 cannot handle all cases)
30
+ pytorch_version = tuple(int(v) for v in torch.__version__.split(".")[:2])
31
+ if pytorch_version < (2, 2):
32
+ warnings.warn(
33
+ f"You are using PyTorch {torch.__version__} without Flash Attention v2 support. "
34
+ "Consider upgrading to PyTorch 2.2+ for Flash Attention v2 (which could be faster).",
35
+ category=UserWarning,
36
+ stacklevel=2,
37
+ )
38
+ math_kernel_on = pytorch_version < (2, 2) or not use_flash_attn
39
+ else:
40
+ old_gpu = True
41
+ use_flash_attn = False
42
+ math_kernel_on = True
43
+
44
+ return old_gpu, use_flash_attn, math_kernel_on
45
+
46
+
47
+ def get_connected_components(mask):
48
+ """
49
+ Get the connected components (8-connectivity) of binary masks of shape (N, 1, H, W).
50
+
51
+ Inputs:
52
+ - mask: A binary mask tensor of shape (N, 1, H, W), where 1 is foreground and 0 is
53
+ background.
54
+
55
+ Outputs:
56
+ - labels: A tensor of shape (N, 1, H, W) containing the connected component labels
57
+ for foreground pixels and 0 for background pixels.
58
+ - counts: A tensor of shape (N, 1, H, W) containing the area of the connected
59
+ components for foreground pixels and 0 for background pixels.
60
+ """
61
+ from ...sam2 import _C
62
+
63
+ return _C.get_connected_componnets(mask.to(torch.uint8).contiguous())
64
+
65
+
66
+ def mask_to_box(masks: torch.Tensor):
67
+ """
68
+ compute bounding box given an input mask
69
+
70
+ Inputs:
71
+ - masks: [B, 1, H, W] masks, dtype=torch.Tensor
72
+
73
+ Returns:
74
+ - box_coords: [B, 1, 4], contains (x, y) coordinates of top left and bottom right box corners, dtype=torch.Tensor
75
+ """
76
+ B, _, h, w = masks.shape
77
+ device = masks.device
78
+ xs = torch.arange(w, device=device, dtype=torch.int32)
79
+ ys = torch.arange(h, device=device, dtype=torch.int32)
80
+ grid_xs, grid_ys = torch.meshgrid(xs, ys, indexing="xy")
81
+ grid_xs = grid_xs[None, None, ...].expand(B, 1, h, w)
82
+ grid_ys = grid_ys[None, None, ...].expand(B, 1, h, w)
83
+ min_xs, _ = torch.min(torch.where(masks, grid_xs, w).flatten(-2), dim=-1)
84
+ max_xs, _ = torch.max(torch.where(masks, grid_xs, -1).flatten(-2), dim=-1)
85
+ min_ys, _ = torch.min(torch.where(masks, grid_ys, h).flatten(-2), dim=-1)
86
+ max_ys, _ = torch.max(torch.where(masks, grid_ys, -1).flatten(-2), dim=-1)
87
+ bbox_coords = torch.stack((min_xs, min_ys, max_xs, max_ys), dim=-1)
88
+
89
+ return bbox_coords
90
+
91
+
92
+ def _load_img_as_tensor(img_path, image_size):
93
+ img_pil = Image.open(img_path)
94
+ img_np = np.array(img_pil.convert("RGB").resize((image_size, image_size)))
95
+ if img_np.dtype == np.uint8: # np.uint8 is expected for JPEG images
96
+ img_np = img_np / 255.0
97
+ else:
98
+ raise RuntimeError(f"Unknown image dtype: {img_np.dtype} on {img_path}")
99
+ img = torch.from_numpy(img_np).permute(2, 0, 1)
100
+ video_width, video_height = img_pil.size # the original video size
101
+ return img, video_height, video_width
102
+
103
+
104
+ class AsyncVideoFrameLoader:
105
+ """
106
+ A list of video frames to be load asynchronously without blocking session start.
107
+ """
108
+
109
+ def __init__(
110
+ self,
111
+ img_paths,
112
+ image_size,
113
+ offload_video_to_cpu,
114
+ img_mean,
115
+ img_std,
116
+ compute_device,
117
+ ):
118
+ self.img_paths = img_paths
119
+ self.image_size = image_size
120
+ self.offload_video_to_cpu = offload_video_to_cpu
121
+ self.img_mean = img_mean
122
+ self.img_std = img_std
123
+ # items in `self.images` will be loaded asynchronously
124
+ self.images = [None] * len(img_paths)
125
+ # catch and raise any exceptions in the async loading thread
126
+ self.exception = None
127
+ # video_height and video_width be filled when loading the first image
128
+ self.video_height = None
129
+ self.video_width = None
130
+ self.compute_device = compute_device
131
+
132
+ # load the first frame to fill video_height and video_width and also
133
+ # to cache it (since it's most likely where the user will click)
134
+ self.__getitem__(0)
135
+
136
+ # load the rest of frames asynchronously without blocking the session start
137
+ def _load_frames():
138
+ try:
139
+ for n in tqdm(range(len(self.images)), desc="frame loading (JPEG)"):
140
+ self.__getitem__(n)
141
+ except Exception as e:
142
+ self.exception = e
143
+
144
+ self.thread = Thread(target=_load_frames, daemon=True)
145
+ self.thread.start()
146
+
147
+ def __getitem__(self, index):
148
+ if self.exception is not None:
149
+ raise RuntimeError("Failure in frame loading thread") from self.exception
150
+
151
+ img = self.images[index]
152
+ if img is not None:
153
+ return img
154
+
155
+ img, video_height, video_width = _load_img_as_tensor(
156
+ self.img_paths[index], self.image_size
157
+ )
158
+ self.video_height = video_height
159
+ self.video_width = video_width
160
+ # normalize by mean and std
161
+ img -= self.img_mean
162
+ img /= self.img_std
163
+ if not self.offload_video_to_cpu:
164
+ img = img.to(self.compute_device, non_blocking=True)
165
+ self.images[index] = img
166
+ return img
167
+
168
+ def __len__(self):
169
+ return len(self.images)
170
+
171
+
172
+ def load_video_frames(
173
+ video_path,
174
+ image_size,
175
+ offload_video_to_cpu,
176
+ img_mean=(0.485, 0.456, 0.406),
177
+ img_std=(0.229, 0.224, 0.225),
178
+ async_loading_frames=False,
179
+ compute_device=torch.device("cuda"),
180
+ ):
181
+ """
182
+ Load the video frames from video_path. The frames are resized to image_size as in
183
+ the model and are loaded to GPU if offload_video_to_cpu=False. This is used by the demo.
184
+ """
185
+ is_bytes = isinstance(video_path, bytes)
186
+ is_str = isinstance(video_path, str)
187
+ is_mp4_path = is_str and os.path.splitext(video_path)[-1] in [".mp4", ".MP4"]
188
+ if is_bytes or is_mp4_path:
189
+ return load_video_frames_from_video_file(
190
+ video_path=video_path,
191
+ image_size=image_size,
192
+ offload_video_to_cpu=offload_video_to_cpu,
193
+ img_mean=img_mean,
194
+ img_std=img_std,
195
+ compute_device=compute_device,
196
+ )
197
+ elif is_str and os.path.isdir(video_path):
198
+ return load_video_frames_from_jpg_images(
199
+ video_path=video_path,
200
+ image_size=image_size,
201
+ offload_video_to_cpu=offload_video_to_cpu,
202
+ img_mean=img_mean,
203
+ img_std=img_std,
204
+ async_loading_frames=async_loading_frames,
205
+ compute_device=compute_device,
206
+ )
207
+ else:
208
+ raise NotImplementedError(
209
+ "Only MP4 video and JPEG folder are supported at this moment"
210
+ )
211
+
212
+
213
+ def load_video_frames_from_jpg_images(
214
+ video_path,
215
+ image_size,
216
+ offload_video_to_cpu,
217
+ img_mean=(0.485, 0.456, 0.406),
218
+ img_std=(0.229, 0.224, 0.225),
219
+ async_loading_frames=False,
220
+ compute_device=torch.device("cuda"),
221
+ ):
222
+ """
223
+ Load the video frames from a directory of JPEG files ("<frame_index>.jpg" format).
224
+
225
+ The frames are resized to image_size x image_size and are loaded to GPU if
226
+ `offload_video_to_cpu` is `False` and to CPU if `offload_video_to_cpu` is `True`.
227
+
228
+ You can load a frame asynchronously by setting `async_loading_frames` to `True`.
229
+ """
230
+ if isinstance(video_path, str) and os.path.isdir(video_path):
231
+ jpg_folder = video_path
232
+ else:
233
+ raise NotImplementedError(
234
+ "Only JPEG frames are supported at this moment. For video files, you may use "
235
+ "ffmpeg (https://ffmpeg.org/) to extract frames into a folder of JPEG files, such as \n"
236
+ "```\n"
237
+ "ffmpeg -i <your_video>.mp4 -q:v 2 -start_number 0 <output_dir>/'%05d.jpg'\n"
238
+ "```\n"
239
+ "where `-q:v` generates high-quality JPEG frames and `-start_number 0` asks "
240
+ "ffmpeg to start the JPEG file from 00000.jpg."
241
+ )
242
+
243
+ frame_names = [
244
+ p
245
+ for p in os.listdir(jpg_folder)
246
+ if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
247
+ ]
248
+ frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))
249
+ num_frames = len(frame_names)
250
+ if num_frames == 0:
251
+ raise RuntimeError(f"no images found in {jpg_folder}")
252
+ img_paths = [os.path.join(jpg_folder, frame_name) for frame_name in frame_names]
253
+ img_mean = torch.tensor(img_mean, dtype=torch.float32)[:, None, None]
254
+ img_std = torch.tensor(img_std, dtype=torch.float32)[:, None, None]
255
+
256
+ if async_loading_frames:
257
+ lazy_images = AsyncVideoFrameLoader(
258
+ img_paths,
259
+ image_size,
260
+ offload_video_to_cpu,
261
+ img_mean,
262
+ img_std,
263
+ compute_device,
264
+ )
265
+ return lazy_images, lazy_images.video_height, lazy_images.video_width
266
+
267
+ images = torch.zeros(num_frames, 3, image_size, image_size, dtype=torch.float32)
268
+ for n, img_path in enumerate(tqdm(img_paths, desc="frame loading (JPEG)")):
269
+ images[n], video_height, video_width = _load_img_as_tensor(img_path, image_size)
270
+ if not offload_video_to_cpu:
271
+ images = images.to(compute_device)
272
+ img_mean = img_mean.to(compute_device)
273
+ img_std = img_std.to(compute_device)
274
+ # normalize by mean and std
275
+ images -= img_mean
276
+ images /= img_std
277
+ return images, video_height, video_width
278
+
279
+
280
+ def load_video_frames_from_video_file(
281
+ video_path,
282
+ image_size,
283
+ offload_video_to_cpu,
284
+ img_mean=(0.485, 0.456, 0.406),
285
+ img_std=(0.229, 0.224, 0.225),
286
+ compute_device=torch.device("cuda"),
287
+ ):
288
+ """Load the video frames from a video file."""
289
+ import decord
290
+
291
+ img_mean = torch.tensor(img_mean, dtype=torch.float32)[:, None, None]
292
+ img_std = torch.tensor(img_std, dtype=torch.float32)[:, None, None]
293
+ # Get the original video height and width
294
+ decord.bridge.set_bridge("torch")
295
+ video_height, video_width, _ = decord.VideoReader(video_path).next().shape
296
+ # Iterate over all frames in the video
297
+ images = []
298
+ for frame in decord.VideoReader(video_path, width=image_size, height=image_size):
299
+ images.append(frame.permute(2, 0, 1))
300
+
301
+ images = torch.stack(images, dim=0).float() / 255.0
302
+ if not offload_video_to_cpu:
303
+ images = images.to(compute_device)
304
+ img_mean = img_mean.to(compute_device)
305
+ img_std = img_std.to(compute_device)
306
+ # normalize by mean and std
307
+ images -= img_mean
308
+ images /= img_std
309
+ return images, video_height, video_width
310
+
311
+
312
+ def fill_holes_in_mask_scores(mask, max_area):
313
+ """
314
+ A post processor to fill small holes in mask scores with area under `max_area`.
315
+ """
316
+ # Holes are those connected components in background with area <= self.max_area
317
+ # (background regions are those with mask scores <= 0)
318
+ assert max_area > 0, "max_area must be positive"
319
+
320
+ input_mask = mask
321
+ try:
322
+ labels, areas = get_connected_components(mask <= 0)
323
+ is_hole = (labels > 0) & (areas <= max_area)
324
+ # We fill holes with a small positive mask score (0.1) to change them to foreground.
325
+ mask = torch.where(is_hole, 0.1, mask)
326
+ except Exception as e:
327
+ # Skip the post-processing step on removing small holes if the CUDA kernel fails
328
+ warnings.warn(
329
+ f"{e}\n\nSkipping the post-processing step due to the error above. You can "
330
+ "still use SAM 2 and it's OK to ignore the error above, although some post-processing "
331
+ "functionality may be limited (which doesn't affect the results in most cases; see "
332
+ "https://github.com/facebookresearch/sam2/blob/main/INSTALL.md).",
333
+ category=UserWarning,
334
+ stacklevel=2,
335
+ )
336
+ mask = input_mask
337
+
338
+ return mask
339
+
340
+
341
+ def concat_points(old_point_inputs, new_points, new_labels):
342
+ """Add new points and labels to previous point inputs (add at the end)."""
343
+ if old_point_inputs is None:
344
+ points, labels = new_points, new_labels
345
+ else:
346
+ points = torch.cat([old_point_inputs["point_coords"], new_points], dim=1)
347
+ labels = torch.cat([old_point_inputs["point_labels"], new_labels], dim=1)
348
+
349
+ return {"point_coords": points, "point_labels": labels}
custom_nodes/comfyui-segment-anything-2/sam2/utils/transforms.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+ from torchvision.transforms import Normalize, Resize, ToTensor
11
+
12
+
13
+ class SAM2Transforms(nn.Module):
14
+ def __init__(
15
+ self, resolution, mask_threshold, max_hole_area=0.0, max_sprinkle_area=0.0
16
+ ):
17
+ """
18
+ Transforms for SAM2.
19
+ """
20
+ super().__init__()
21
+ self.resolution = resolution
22
+ self.mask_threshold = mask_threshold
23
+ self.max_hole_area = max_hole_area
24
+ self.max_sprinkle_area = max_sprinkle_area
25
+ self.mean = [0.485, 0.456, 0.406]
26
+ self.std = [0.229, 0.224, 0.225]
27
+ self.to_tensor = ToTensor()
28
+ try:
29
+ self.transforms = torch.jit.script(
30
+ nn.Sequential(
31
+ Resize((self.resolution, self.resolution)),
32
+ Normalize(self.mean, self.std),
33
+ )
34
+ )
35
+ except Exception as e:
36
+ print(f"Failed to torch jit script transforms: {e}, falling back to normal transforms")
37
+ self.transforms = nn.Sequential(
38
+ Resize((self.resolution, self.resolution)),
39
+ Normalize(self.mean, self.std),
40
+ )
41
+
42
+ def __call__(self, x):
43
+ x = self.to_tensor(x)
44
+ return self.transforms(x)
45
+
46
+ def forward_batch(self, img_list):
47
+ img_batch = [self.transforms(self.to_tensor(img)) for img in img_list]
48
+ img_batch = torch.stack(img_batch, dim=0)
49
+ return img_batch
50
+
51
+ def transform_coords(
52
+ self, coords: torch.Tensor, normalize=False, orig_hw=None
53
+ ) -> torch.Tensor:
54
+ """
55
+ Expects a torch tensor with length 2 in the last dimension. The coordinates can be in absolute image or normalized coordinates,
56
+ If the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
57
+
58
+ Returns
59
+ Un-normalized coordinates in the range of [0, 1] which is expected by the SAM2 model.
60
+ """
61
+ if normalize:
62
+ assert orig_hw is not None
63
+ h, w = orig_hw
64
+ coords = coords.clone()
65
+ coords[..., 0] = coords[..., 0] / w
66
+ coords[..., 1] = coords[..., 1] / h
67
+
68
+ coords = coords * self.resolution # unnormalize coords
69
+ return coords
70
+
71
+ def transform_boxes(
72
+ self, boxes: torch.Tensor, normalize=False, orig_hw=None
73
+ ) -> torch.Tensor:
74
+ """
75
+ Expects a tensor of shape Bx4. The coordinates can be in absolute image or normalized coordinates,
76
+ if the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
77
+ """
78
+ boxes = self.transform_coords(boxes.reshape(-1, 2, 2), normalize, orig_hw)
79
+ return boxes
80
+
81
+ def postprocess_masks(self, masks: torch.Tensor, orig_hw) -> torch.Tensor:
82
+ """
83
+ Perform PostProcessing on output masks.
84
+ """
85
+ #from ...sam2.utils.misc import get_connected_components
86
+
87
+ masks = masks.float()
88
+ # if self.max_hole_area > 0:
89
+ # # Holes are those connected components in background with area <= self.fill_hole_area
90
+ # # (background regions are those with mask scores <= self.mask_threshold)
91
+ # mask_flat = masks.flatten(0, 1).unsqueeze(1) # flatten as 1-channel image
92
+ # labels, areas = get_connected_components(mask_flat <= self.mask_threshold)
93
+ # is_hole = (labels > 0) & (areas <= self.max_hole_area)
94
+ # is_hole = is_hole.reshape_as(masks)
95
+ # # We fill holes with a small positive mask score (10.0) to change them to foreground.
96
+ # masks = torch.where(is_hole, self.mask_threshold + 10.0, masks)
97
+
98
+ # if self.max_sprinkle_area > 0:
99
+ # labels, areas = get_connected_components(mask_flat > self.mask_threshold)
100
+ # is_hole = (labels > 0) & (areas <= self.max_sprinkle_area)
101
+ # is_hole = is_hole.reshape_as(masks)
102
+ # # We fill holes with negative mask score (-10.0) to change them to background.
103
+ # masks = torch.where(is_hole, self.mask_threshold - 10.0, masks)
104
+
105
+ masks = F.interpolate(masks, orig_hw, mode="bilinear", align_corners=False)
106
+ return masks
custom_nodes/comfyui-segment-anything-2/sam2_configs/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2.1_hiera_b+.yaml ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # Model
4
+ model:
5
+ _target_: sam2.modeling.sam2_base.SAM2Base
6
+ image_encoder:
7
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8
+ scalp: 1
9
+ trunk:
10
+ _target_: sam2.modeling.backbones.hieradet.Hiera
11
+ embed_dim: 112
12
+ num_heads: 2
13
+ neck:
14
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
15
+ position_encoding:
16
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
17
+ num_pos_feats: 256
18
+ normalize: true
19
+ scale: null
20
+ temperature: 10000
21
+ d_model: 256
22
+ backbone_channel_list: [896, 448, 224, 112]
23
+ fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
24
+ fpn_interp_model: nearest
25
+
26
+ memory_attention:
27
+ _target_: sam2.modeling.memory_attention.MemoryAttention
28
+ d_model: 256
29
+ pos_enc_at_input: true
30
+ layer:
31
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
32
+ activation: relu
33
+ dim_feedforward: 2048
34
+ dropout: 0.1
35
+ pos_enc_at_attn: false
36
+ self_attention:
37
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
38
+ rope_theta: 10000.0
39
+ feat_sizes: [32, 32]
40
+ embedding_dim: 256
41
+ num_heads: 1
42
+ downsample_rate: 1
43
+ dropout: 0.1
44
+ d_model: 256
45
+ pos_enc_at_cross_attn_keys: true
46
+ pos_enc_at_cross_attn_queries: false
47
+ cross_attention:
48
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
49
+ rope_theta: 10000.0
50
+ feat_sizes: [32, 32]
51
+ rope_k_repeat: True
52
+ embedding_dim: 256
53
+ num_heads: 1
54
+ downsample_rate: 1
55
+ dropout: 0.1
56
+ kv_in_dim: 64
57
+ num_layers: 4
58
+
59
+ memory_encoder:
60
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
61
+ out_dim: 64
62
+ position_encoding:
63
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
64
+ num_pos_feats: 64
65
+ normalize: true
66
+ scale: null
67
+ temperature: 10000
68
+ mask_downsampler:
69
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
70
+ kernel_size: 3
71
+ stride: 2
72
+ padding: 1
73
+ fuser:
74
+ _target_: sam2.modeling.memory_encoder.Fuser
75
+ layer:
76
+ _target_: sam2.modeling.memory_encoder.CXBlock
77
+ dim: 256
78
+ kernel_size: 7
79
+ padding: 3
80
+ layer_scale_init_value: 1e-6
81
+ use_dwconv: True # depth-wise convs
82
+ num_layers: 2
83
+
84
+ num_maskmem: 7
85
+ image_size: 1024
86
+ # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
87
+ sigmoid_scale_for_mem_enc: 20.0
88
+ sigmoid_bias_for_mem_enc: -10.0
89
+ use_mask_input_as_output_without_sam: true
90
+ # Memory
91
+ directly_add_no_mem_embed: true
92
+ no_obj_embed_spatial: true
93
+ # use high-resolution feature map in the SAM mask decoder
94
+ use_high_res_features_in_sam: true
95
+ # output 3 masks on the first click on initial conditioning frames
96
+ multimask_output_in_sam: true
97
+ # SAM heads
98
+ iou_prediction_use_sigmoid: True
99
+ # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
100
+ use_obj_ptrs_in_encoder: true
101
+ add_tpos_enc_to_obj_ptrs: true
102
+ proj_tpos_enc_in_obj_ptrs: true
103
+ use_signed_tpos_enc_to_obj_ptrs: true
104
+ only_obj_ptrs_in_the_past_for_eval: true
105
+ # object occlusion prediction
106
+ pred_obj_scores: true
107
+ pred_obj_scores_mlp: true
108
+ fixed_no_obj_ptr: true
109
+ # multimask tracking settings
110
+ multimask_output_for_tracking: true
111
+ use_multimask_token_for_obj_ptr: true
112
+ multimask_min_pt_num: 0
113
+ multimask_max_pt_num: 1
114
+ use_mlp_for_obj_ptr_proj: true
115
+ # Compilation flag
116
+ compile_image_encoder: False
custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2.1_hiera_l.yaml ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # Model
4
+ model:
5
+ _target_: sam2.modeling.sam2_base.SAM2Base
6
+ image_encoder:
7
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8
+ scalp: 1
9
+ trunk:
10
+ _target_: sam2.modeling.backbones.hieradet.Hiera
11
+ embed_dim: 144
12
+ num_heads: 2
13
+ stages: [2, 6, 36, 4]
14
+ global_att_blocks: [23, 33, 43]
15
+ window_pos_embed_bkg_spatial_size: [7, 7]
16
+ window_spec: [8, 4, 16, 8]
17
+ neck:
18
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
19
+ position_encoding:
20
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
21
+ num_pos_feats: 256
22
+ normalize: true
23
+ scale: null
24
+ temperature: 10000
25
+ d_model: 256
26
+ backbone_channel_list: [1152, 576, 288, 144]
27
+ fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
28
+ fpn_interp_model: nearest
29
+
30
+ memory_attention:
31
+ _target_: sam2.modeling.memory_attention.MemoryAttention
32
+ d_model: 256
33
+ pos_enc_at_input: true
34
+ layer:
35
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
36
+ activation: relu
37
+ dim_feedforward: 2048
38
+ dropout: 0.1
39
+ pos_enc_at_attn: false
40
+ self_attention:
41
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
42
+ rope_theta: 10000.0
43
+ feat_sizes: [32, 32]
44
+ embedding_dim: 256
45
+ num_heads: 1
46
+ downsample_rate: 1
47
+ dropout: 0.1
48
+ d_model: 256
49
+ pos_enc_at_cross_attn_keys: true
50
+ pos_enc_at_cross_attn_queries: false
51
+ cross_attention:
52
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
53
+ rope_theta: 10000.0
54
+ feat_sizes: [32, 32]
55
+ rope_k_repeat: True
56
+ embedding_dim: 256
57
+ num_heads: 1
58
+ downsample_rate: 1
59
+ dropout: 0.1
60
+ kv_in_dim: 64
61
+ num_layers: 4
62
+
63
+ memory_encoder:
64
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
65
+ out_dim: 64
66
+ position_encoding:
67
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
68
+ num_pos_feats: 64
69
+ normalize: true
70
+ scale: null
71
+ temperature: 10000
72
+ mask_downsampler:
73
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
74
+ kernel_size: 3
75
+ stride: 2
76
+ padding: 1
77
+ fuser:
78
+ _target_: sam2.modeling.memory_encoder.Fuser
79
+ layer:
80
+ _target_: sam2.modeling.memory_encoder.CXBlock
81
+ dim: 256
82
+ kernel_size: 7
83
+ padding: 3
84
+ layer_scale_init_value: 1e-6
85
+ use_dwconv: True # depth-wise convs
86
+ num_layers: 2
87
+
88
+ num_maskmem: 7
89
+ image_size: 1024
90
+ # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
91
+ sigmoid_scale_for_mem_enc: 20.0
92
+ sigmoid_bias_for_mem_enc: -10.0
93
+ use_mask_input_as_output_without_sam: true
94
+ # Memory
95
+ directly_add_no_mem_embed: true
96
+ no_obj_embed_spatial: true
97
+ # use high-resolution feature map in the SAM mask decoder
98
+ use_high_res_features_in_sam: true
99
+ # output 3 masks on the first click on initial conditioning frames
100
+ multimask_output_in_sam: true
101
+ # SAM heads
102
+ iou_prediction_use_sigmoid: True
103
+ # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
104
+ use_obj_ptrs_in_encoder: true
105
+ add_tpos_enc_to_obj_ptrs: true
106
+ proj_tpos_enc_in_obj_ptrs: true
107
+ use_signed_tpos_enc_to_obj_ptrs: true
108
+ only_obj_ptrs_in_the_past_for_eval: true
109
+ # object occlusion prediction
110
+ pred_obj_scores: true
111
+ pred_obj_scores_mlp: true
112
+ fixed_no_obj_ptr: true
113
+ # multimask tracking settings
114
+ multimask_output_for_tracking: true
115
+ use_multimask_token_for_obj_ptr: true
116
+ multimask_min_pt_num: 0
117
+ multimask_max_pt_num: 1
118
+ use_mlp_for_obj_ptr_proj: true
119
+ # Compilation flag
120
+ compile_image_encoder: False
custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2.1_hiera_s.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # Model
4
+ model:
5
+ _target_: sam2.modeling.sam2_base.SAM2Base
6
+ image_encoder:
7
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8
+ scalp: 1
9
+ trunk:
10
+ _target_: sam2.modeling.backbones.hieradet.Hiera
11
+ embed_dim: 96
12
+ num_heads: 1
13
+ stages: [1, 2, 11, 2]
14
+ global_att_blocks: [7, 10, 13]
15
+ window_pos_embed_bkg_spatial_size: [7, 7]
16
+ neck:
17
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
18
+ position_encoding:
19
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
20
+ num_pos_feats: 256
21
+ normalize: true
22
+ scale: null
23
+ temperature: 10000
24
+ d_model: 256
25
+ backbone_channel_list: [768, 384, 192, 96]
26
+ fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
27
+ fpn_interp_model: nearest
28
+
29
+ memory_attention:
30
+ _target_: sam2.modeling.memory_attention.MemoryAttention
31
+ d_model: 256
32
+ pos_enc_at_input: true
33
+ layer:
34
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
35
+ activation: relu
36
+ dim_feedforward: 2048
37
+ dropout: 0.1
38
+ pos_enc_at_attn: false
39
+ self_attention:
40
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
41
+ rope_theta: 10000.0
42
+ feat_sizes: [32, 32]
43
+ embedding_dim: 256
44
+ num_heads: 1
45
+ downsample_rate: 1
46
+ dropout: 0.1
47
+ d_model: 256
48
+ pos_enc_at_cross_attn_keys: true
49
+ pos_enc_at_cross_attn_queries: false
50
+ cross_attention:
51
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
52
+ rope_theta: 10000.0
53
+ feat_sizes: [32, 32]
54
+ rope_k_repeat: True
55
+ embedding_dim: 256
56
+ num_heads: 1
57
+ downsample_rate: 1
58
+ dropout: 0.1
59
+ kv_in_dim: 64
60
+ num_layers: 4
61
+
62
+ memory_encoder:
63
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
64
+ out_dim: 64
65
+ position_encoding:
66
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
67
+ num_pos_feats: 64
68
+ normalize: true
69
+ scale: null
70
+ temperature: 10000
71
+ mask_downsampler:
72
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
73
+ kernel_size: 3
74
+ stride: 2
75
+ padding: 1
76
+ fuser:
77
+ _target_: sam2.modeling.memory_encoder.Fuser
78
+ layer:
79
+ _target_: sam2.modeling.memory_encoder.CXBlock
80
+ dim: 256
81
+ kernel_size: 7
82
+ padding: 3
83
+ layer_scale_init_value: 1e-6
84
+ use_dwconv: True # depth-wise convs
85
+ num_layers: 2
86
+
87
+ num_maskmem: 7
88
+ image_size: 1024
89
+ # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
90
+ sigmoid_scale_for_mem_enc: 20.0
91
+ sigmoid_bias_for_mem_enc: -10.0
92
+ use_mask_input_as_output_without_sam: true
93
+ # Memory
94
+ directly_add_no_mem_embed: true
95
+ no_obj_embed_spatial: true
96
+ # use high-resolution feature map in the SAM mask decoder
97
+ use_high_res_features_in_sam: true
98
+ # output 3 masks on the first click on initial conditioning frames
99
+ multimask_output_in_sam: true
100
+ # SAM heads
101
+ iou_prediction_use_sigmoid: True
102
+ # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
103
+ use_obj_ptrs_in_encoder: true
104
+ add_tpos_enc_to_obj_ptrs: true
105
+ proj_tpos_enc_in_obj_ptrs: true
106
+ use_signed_tpos_enc_to_obj_ptrs: true
107
+ only_obj_ptrs_in_the_past_for_eval: true
108
+ # object occlusion prediction
109
+ pred_obj_scores: true
110
+ pred_obj_scores_mlp: true
111
+ fixed_no_obj_ptr: true
112
+ # multimask tracking settings
113
+ multimask_output_for_tracking: true
114
+ use_multimask_token_for_obj_ptr: true
115
+ multimask_min_pt_num: 0
116
+ multimask_max_pt_num: 1
117
+ use_mlp_for_obj_ptr_proj: true
118
+ # Compilation flag
119
+ compile_image_encoder: False
custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2.1_hiera_t.yaml ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # Model
4
+ model:
5
+ _target_: sam2.modeling.sam2_base.SAM2Base
6
+ image_encoder:
7
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8
+ scalp: 1
9
+ trunk:
10
+ _target_: sam2.modeling.backbones.hieradet.Hiera
11
+ embed_dim: 96
12
+ num_heads: 1
13
+ stages: [1, 2, 7, 2]
14
+ global_att_blocks: [5, 7, 9]
15
+ window_pos_embed_bkg_spatial_size: [7, 7]
16
+ neck:
17
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
18
+ position_encoding:
19
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
20
+ num_pos_feats: 256
21
+ normalize: true
22
+ scale: null
23
+ temperature: 10000
24
+ d_model: 256
25
+ backbone_channel_list: [768, 384, 192, 96]
26
+ fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
27
+ fpn_interp_model: nearest
28
+
29
+ memory_attention:
30
+ _target_: sam2.modeling.memory_attention.MemoryAttention
31
+ d_model: 256
32
+ pos_enc_at_input: true
33
+ layer:
34
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
35
+ activation: relu
36
+ dim_feedforward: 2048
37
+ dropout: 0.1
38
+ pos_enc_at_attn: false
39
+ self_attention:
40
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
41
+ rope_theta: 10000.0
42
+ feat_sizes: [32, 32]
43
+ embedding_dim: 256
44
+ num_heads: 1
45
+ downsample_rate: 1
46
+ dropout: 0.1
47
+ d_model: 256
48
+ pos_enc_at_cross_attn_keys: true
49
+ pos_enc_at_cross_attn_queries: false
50
+ cross_attention:
51
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
52
+ rope_theta: 10000.0
53
+ feat_sizes: [32, 32]
54
+ rope_k_repeat: True
55
+ embedding_dim: 256
56
+ num_heads: 1
57
+ downsample_rate: 1
58
+ dropout: 0.1
59
+ kv_in_dim: 64
60
+ num_layers: 4
61
+
62
+ memory_encoder:
63
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
64
+ out_dim: 64
65
+ position_encoding:
66
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
67
+ num_pos_feats: 64
68
+ normalize: true
69
+ scale: null
70
+ temperature: 10000
71
+ mask_downsampler:
72
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
73
+ kernel_size: 3
74
+ stride: 2
75
+ padding: 1
76
+ fuser:
77
+ _target_: sam2.modeling.memory_encoder.Fuser
78
+ layer:
79
+ _target_: sam2.modeling.memory_encoder.CXBlock
80
+ dim: 256
81
+ kernel_size: 7
82
+ padding: 3
83
+ layer_scale_init_value: 1e-6
84
+ use_dwconv: True # depth-wise convs
85
+ num_layers: 2
86
+
87
+ num_maskmem: 7
88
+ image_size: 1024
89
+ # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
90
+ # SAM decoder
91
+ sigmoid_scale_for_mem_enc: 20.0
92
+ sigmoid_bias_for_mem_enc: -10.0
93
+ use_mask_input_as_output_without_sam: true
94
+ # Memory
95
+ directly_add_no_mem_embed: true
96
+ no_obj_embed_spatial: true
97
+ # use high-resolution feature map in the SAM mask decoder
98
+ use_high_res_features_in_sam: true
99
+ # output 3 masks on the first click on initial conditioning frames
100
+ multimask_output_in_sam: true
101
+ # SAM heads
102
+ iou_prediction_use_sigmoid: True
103
+ # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
104
+ use_obj_ptrs_in_encoder: true
105
+ add_tpos_enc_to_obj_ptrs: true
106
+ proj_tpos_enc_in_obj_ptrs: true
107
+ use_signed_tpos_enc_to_obj_ptrs: true
108
+ only_obj_ptrs_in_the_past_for_eval: true
109
+ # object occlusion prediction
110
+ pred_obj_scores: true
111
+ pred_obj_scores_mlp: true
112
+ fixed_no_obj_ptr: true
113
+ # multimask tracking settings
114
+ multimask_output_for_tracking: true
115
+ use_multimask_token_for_obj_ptr: true
116
+ multimask_min_pt_num: 0
117
+ multimask_max_pt_num: 1
118
+ use_mlp_for_obj_ptr_proj: true
119
+ # Compilation flag
120
+ # HieraT does not currently support compilation, should always be set to False
121
+ compile_image_encoder: False
custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2_hiera_b+.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # Model
4
+ model:
5
+ _target_: sam2.modeling.sam2_base.SAM2Base
6
+ image_encoder:
7
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8
+ scalp: 1
9
+ trunk:
10
+ _target_: sam2.modeling.backbones.hieradet.Hiera
11
+ embed_dim: 112
12
+ num_heads: 2
13
+ stages: [2, 3, 16, 3]
14
+ global_att_blocks: [12, 16, 20]
15
+ window_pos_embed_bkg_spatial_size: [14, 14]
16
+ neck:
17
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
18
+ position_encoding:
19
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
20
+ num_pos_feats: 256
21
+ normalize: true
22
+ scale: null
23
+ temperature: 10000
24
+ d_model: 256
25
+ backbone_channel_list: [896, 448, 224, 112]
26
+ fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
27
+ fpn_interp_model: nearest
28
+
29
+ memory_attention:
30
+ _target_: sam2.modeling.memory_attention.MemoryAttention
31
+ d_model: 256
32
+ pos_enc_at_input: true
33
+ layer:
34
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
35
+ activation: relu
36
+ dim_feedforward: 2048
37
+ dropout: 0.1
38
+ pos_enc_at_attn: false
39
+ self_attention:
40
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
41
+ rope_theta: 10000.0
42
+ feat_sizes: [32, 32]
43
+ embedding_dim: 256
44
+ num_heads: 1
45
+ downsample_rate: 1
46
+ dropout: 0.1
47
+ d_model: 256
48
+ pos_enc_at_cross_attn_keys: true
49
+ pos_enc_at_cross_attn_queries: false
50
+ cross_attention:
51
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
52
+ rope_theta: 10000.0
53
+ feat_sizes: [32, 32]
54
+ rope_k_repeat: True
55
+ embedding_dim: 256
56
+ num_heads: 1
57
+ downsample_rate: 1
58
+ dropout: 0.1
59
+ kv_in_dim: 64
60
+ num_layers: 4
61
+
62
+ memory_encoder:
63
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
64
+ out_dim: 64
65
+ position_encoding:
66
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
67
+ num_pos_feats: 64
68
+ normalize: true
69
+ scale: null
70
+ temperature: 10000
71
+ mask_downsampler:
72
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
73
+ kernel_size: 3
74
+ stride: 2
75
+ padding: 1
76
+ fuser:
77
+ _target_: sam2.modeling.memory_encoder.Fuser
78
+ layer:
79
+ _target_: sam2.modeling.memory_encoder.CXBlock
80
+ dim: 256
81
+ kernel_size: 7
82
+ padding: 3
83
+ layer_scale_init_value: 1e-6
84
+ use_dwconv: True # depth-wise convs
85
+ num_layers: 2
86
+
87
+ num_maskmem: 7
88
+ image_size: 1024
89
+ # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
90
+ sigmoid_scale_for_mem_enc: 20.0
91
+ sigmoid_bias_for_mem_enc: -10.0
92
+ use_mask_input_as_output_without_sam: true
93
+ # Memory
94
+ directly_add_no_mem_embed: true
95
+ no_obj_embed_spatial: false
96
+ # use high-resolution feature map in the SAM mask decoder
97
+ use_high_res_features_in_sam: true
98
+ # output 3 masks on the first click on initial conditioning frames
99
+ multimask_output_in_sam: true
100
+ # SAM heads
101
+ iou_prediction_use_sigmoid: True
102
+ # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
103
+ use_obj_ptrs_in_encoder: true
104
+ add_tpos_enc_to_obj_ptrs: false
105
+ proj_tpos_enc_in_obj_ptrs: false
106
+ use_signed_tpos_enc_to_obj_ptrs: false
107
+ only_obj_ptrs_in_the_past_for_eval: true
108
+ # object occlusion prediction
109
+ pred_obj_scores: true
110
+ pred_obj_scores_mlp: true
111
+ fixed_no_obj_ptr: true
112
+ # multimask tracking settings
113
+ multimask_output_for_tracking: true
114
+ use_multimask_token_for_obj_ptr: true
115
+ multimask_min_pt_num: 0
116
+ multimask_max_pt_num: 1
117
+ use_mlp_for_obj_ptr_proj: true
118
+ # Compilation flag
119
+ compile_image_encoder: False
custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2_hiera_l.yaml ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # Model
4
+ model:
5
+ _target_: sam2.modeling.sam2_base.SAM2Base
6
+ image_encoder:
7
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8
+ scalp: 1
9
+ trunk:
10
+ _target_: sam2.modeling.backbones.hieradet.Hiera
11
+ embed_dim: 144
12
+ num_heads: 2
13
+ stages: [2, 6, 36, 4]
14
+ global_att_blocks: [23, 33, 43]
15
+ window_pos_embed_bkg_spatial_size: [7, 7]
16
+ window_spec: [8, 4, 16, 8]
17
+ neck:
18
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
19
+ position_encoding:
20
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
21
+ num_pos_feats: 256
22
+ normalize: true
23
+ scale: null
24
+ temperature: 10000
25
+ d_model: 256
26
+ backbone_channel_list: [1152, 576, 288, 144]
27
+ fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
28
+ fpn_interp_model: nearest
29
+
30
+ memory_attention:
31
+ _target_: sam2.modeling.memory_attention.MemoryAttention
32
+ d_model: 256
33
+ pos_enc_at_input: true
34
+ layer:
35
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
36
+ activation: relu
37
+ dim_feedforward: 2048
38
+ dropout: 0.1
39
+ pos_enc_at_attn: false
40
+ self_attention:
41
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
42
+ rope_theta: 10000.0
43
+ feat_sizes: [32, 32]
44
+ embedding_dim: 256
45
+ num_heads: 1
46
+ downsample_rate: 1
47
+ dropout: 0.1
48
+ d_model: 256
49
+ pos_enc_at_cross_attn_keys: true
50
+ pos_enc_at_cross_attn_queries: false
51
+ cross_attention:
52
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
53
+ rope_theta: 10000.0
54
+ feat_sizes: [32, 32]
55
+ rope_k_repeat: True
56
+ embedding_dim: 256
57
+ num_heads: 1
58
+ downsample_rate: 1
59
+ dropout: 0.1
60
+ kv_in_dim: 64
61
+ num_layers: 4
62
+
63
+ memory_encoder:
64
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
65
+ out_dim: 64
66
+ position_encoding:
67
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
68
+ num_pos_feats: 64
69
+ normalize: true
70
+ scale: null
71
+ temperature: 10000
72
+ mask_downsampler:
73
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
74
+ kernel_size: 3
75
+ stride: 2
76
+ padding: 1
77
+ fuser:
78
+ _target_: sam2.modeling.memory_encoder.Fuser
79
+ layer:
80
+ _target_: sam2.modeling.memory_encoder.CXBlock
81
+ dim: 256
82
+ kernel_size: 7
83
+ padding: 3
84
+ layer_scale_init_value: 1e-6
85
+ use_dwconv: True # depth-wise convs
86
+ num_layers: 2
87
+
88
+ num_maskmem: 7
89
+ image_size: 1024
90
+ # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
91
+ sigmoid_scale_for_mem_enc: 20.0
92
+ sigmoid_bias_for_mem_enc: -10.0
93
+ use_mask_input_as_output_without_sam: true
94
+ # Memory
95
+ directly_add_no_mem_embed: true
96
+ no_obj_embed_spatial: false
97
+ # use high-resolution feature map in the SAM mask decoder
98
+ use_high_res_features_in_sam: true
99
+ # output 3 masks on the first click on initial conditioning frames
100
+ multimask_output_in_sam: true
101
+ # SAM heads
102
+ iou_prediction_use_sigmoid: True
103
+ # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
104
+ use_obj_ptrs_in_encoder: true
105
+ add_tpos_enc_to_obj_ptrs: false
106
+ proj_tpos_enc_in_obj_ptrs: false
107
+ use_signed_tpos_enc_to_obj_ptrs: false
108
+ only_obj_ptrs_in_the_past_for_eval: true
109
+ # object occlusion prediction
110
+ pred_obj_scores: true
111
+ pred_obj_scores_mlp: true
112
+ fixed_no_obj_ptr: true
113
+ # multimask tracking settings
114
+ multimask_output_for_tracking: true
115
+ use_multimask_token_for_obj_ptr: true
116
+ multimask_min_pt_num: 0
117
+ multimask_max_pt_num: 1
118
+ use_mlp_for_obj_ptr_proj: true
119
+ # Compilation flag
120
+ compile_image_encoder: False
custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2_hiera_s.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # Model
4
+ model:
5
+ _target_: sam2.modeling.sam2_base.SAM2Base
6
+ image_encoder:
7
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8
+ scalp: 1
9
+ trunk:
10
+ _target_: sam2.modeling.backbones.hieradet.Hiera
11
+ embed_dim: 96
12
+ num_heads: 1
13
+ stages: [1, 2, 11, 2]
14
+ global_att_blocks: [7, 10, 13]
15
+ window_pos_embed_bkg_spatial_size: [7, 7]
16
+ neck:
17
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
18
+ position_encoding:
19
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
20
+ num_pos_feats: 256
21
+ normalize: true
22
+ scale: null
23
+ temperature: 10000
24
+ d_model: 256
25
+ backbone_channel_list: [768, 384, 192, 96]
26
+ fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
27
+ fpn_interp_model: nearest
28
+
29
+ memory_attention:
30
+ _target_: sam2.modeling.memory_attention.MemoryAttention
31
+ d_model: 256
32
+ pos_enc_at_input: true
33
+ layer:
34
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
35
+ activation: relu
36
+ dim_feedforward: 2048
37
+ dropout: 0.1
38
+ pos_enc_at_attn: false
39
+ self_attention:
40
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
41
+ rope_theta: 10000.0
42
+ feat_sizes: [32, 32]
43
+ embedding_dim: 256
44
+ num_heads: 1
45
+ downsample_rate: 1
46
+ dropout: 0.1
47
+ d_model: 256
48
+ pos_enc_at_cross_attn_keys: true
49
+ pos_enc_at_cross_attn_queries: false
50
+ cross_attention:
51
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
52
+ rope_theta: 10000.0
53
+ feat_sizes: [32, 32]
54
+ rope_k_repeat: True
55
+ embedding_dim: 256
56
+ num_heads: 1
57
+ downsample_rate: 1
58
+ dropout: 0.1
59
+ kv_in_dim: 64
60
+ num_layers: 4
61
+
62
+ memory_encoder:
63
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
64
+ out_dim: 64
65
+ position_encoding:
66
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
67
+ num_pos_feats: 64
68
+ normalize: true
69
+ scale: null
70
+ temperature: 10000
71
+ mask_downsampler:
72
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
73
+ kernel_size: 3
74
+ stride: 2
75
+ padding: 1
76
+ fuser:
77
+ _target_: sam2.modeling.memory_encoder.Fuser
78
+ layer:
79
+ _target_: sam2.modeling.memory_encoder.CXBlock
80
+ dim: 256
81
+ kernel_size: 7
82
+ padding: 3
83
+ layer_scale_init_value: 1e-6
84
+ use_dwconv: True # depth-wise convs
85
+ num_layers: 2
86
+
87
+ num_maskmem: 7
88
+ image_size: 1024
89
+ # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
90
+ sigmoid_scale_for_mem_enc: 20.0
91
+ sigmoid_bias_for_mem_enc: -10.0
92
+ use_mask_input_as_output_without_sam: true
93
+ # Memory
94
+ directly_add_no_mem_embed: true
95
+ no_obj_embed_spatial: false
96
+ # use high-resolution feature map in the SAM mask decoder
97
+ use_high_res_features_in_sam: true
98
+ # output 3 masks on the first click on initial conditioning frames
99
+ multimask_output_in_sam: true
100
+ # SAM heads
101
+ iou_prediction_use_sigmoid: True
102
+ # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
103
+ use_obj_ptrs_in_encoder: true
104
+ add_tpos_enc_to_obj_ptrs: false
105
+ proj_tpos_enc_in_obj_ptrs: false
106
+ use_signed_tpos_enc_to_obj_ptrs: false
107
+ only_obj_ptrs_in_the_past_for_eval: true
108
+ # object occlusion prediction
109
+ pred_obj_scores: true
110
+ pred_obj_scores_mlp: true
111
+ fixed_no_obj_ptr: true
112
+ # multimask tracking settings
113
+ multimask_output_for_tracking: true
114
+ use_multimask_token_for_obj_ptr: true
115
+ multimask_min_pt_num: 0
116
+ multimask_max_pt_num: 1
117
+ use_mlp_for_obj_ptr_proj: true
118
+ # Compilation flag
119
+ compile_image_encoder: False
custom_nodes/comfyui-segment-anything-2/sam2_configs/sam2_hiera_t.yaml ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # Model
4
+ model:
5
+ _target_: sam2.modeling.sam2_base.SAM2Base
6
+ image_encoder:
7
+ _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8
+ scalp: 1
9
+ trunk:
10
+ _target_: sam2.modeling.backbones.hieradet.Hiera
11
+ embed_dim: 96
12
+ num_heads: 1
13
+ stages: [1, 2, 7, 2]
14
+ global_att_blocks: [5, 7, 9]
15
+ window_pos_embed_bkg_spatial_size: [7, 7]
16
+ neck:
17
+ _target_: sam2.modeling.backbones.image_encoder.FpnNeck
18
+ position_encoding:
19
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
20
+ num_pos_feats: 256
21
+ normalize: true
22
+ scale: null
23
+ temperature: 10000
24
+ d_model: 256
25
+ backbone_channel_list: [768, 384, 192, 96]
26
+ fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
27
+ fpn_interp_model: nearest
28
+
29
+ memory_attention:
30
+ _target_: sam2.modeling.memory_attention.MemoryAttention
31
+ d_model: 256
32
+ pos_enc_at_input: true
33
+ layer:
34
+ _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
35
+ activation: relu
36
+ dim_feedforward: 2048
37
+ dropout: 0.1
38
+ pos_enc_at_attn: false
39
+ self_attention:
40
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
41
+ rope_theta: 10000.0
42
+ feat_sizes: [32, 32]
43
+ embedding_dim: 256
44
+ num_heads: 1
45
+ downsample_rate: 1
46
+ dropout: 0.1
47
+ d_model: 256
48
+ pos_enc_at_cross_attn_keys: true
49
+ pos_enc_at_cross_attn_queries: false
50
+ cross_attention:
51
+ _target_: sam2.modeling.sam.transformer.RoPEAttention
52
+ rope_theta: 10000.0
53
+ feat_sizes: [32, 32]
54
+ rope_k_repeat: True
55
+ embedding_dim: 256
56
+ num_heads: 1
57
+ downsample_rate: 1
58
+ dropout: 0.1
59
+ kv_in_dim: 64
60
+ num_layers: 4
61
+
62
+ memory_encoder:
63
+ _target_: sam2.modeling.memory_encoder.MemoryEncoder
64
+ out_dim: 64
65
+ position_encoding:
66
+ _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
67
+ num_pos_feats: 64
68
+ normalize: true
69
+ scale: null
70
+ temperature: 10000
71
+ mask_downsampler:
72
+ _target_: sam2.modeling.memory_encoder.MaskDownSampler
73
+ kernel_size: 3
74
+ stride: 2
75
+ padding: 1
76
+ fuser:
77
+ _target_: sam2.modeling.memory_encoder.Fuser
78
+ layer:
79
+ _target_: sam2.modeling.memory_encoder.CXBlock
80
+ dim: 256
81
+ kernel_size: 7
82
+ padding: 3
83
+ layer_scale_init_value: 1e-6
84
+ use_dwconv: True # depth-wise convs
85
+ num_layers: 2
86
+
87
+ num_maskmem: 7
88
+ image_size: 1024
89
+ # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
90
+ # SAM decoder
91
+ sigmoid_scale_for_mem_enc: 20.0
92
+ sigmoid_bias_for_mem_enc: -10.0
93
+ use_mask_input_as_output_without_sam: true
94
+ # Memory
95
+ directly_add_no_mem_embed: true
96
+ no_obj_embed_spatial: false
97
+ # use high-resolution feature map in the SAM mask decoder
98
+ use_high_res_features_in_sam: true
99
+ # output 3 masks on the first click on initial conditioning frames
100
+ multimask_output_in_sam: true
101
+ # SAM heads
102
+ iou_prediction_use_sigmoid: True
103
+ # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
104
+ use_obj_ptrs_in_encoder: true
105
+ add_tpos_enc_to_obj_ptrs: false
106
+ proj_tpos_enc_in_obj_ptrs: false
107
+ use_signed_tpos_enc_to_obj_ptrs: false
108
+ only_obj_ptrs_in_the_past_for_eval: true
109
+ # object occlusion prediction
110
+ pred_obj_scores: true
111
+ pred_obj_scores_mlp: true
112
+ fixed_no_obj_ptr: true
113
+ # multimask tracking settings
114
+ multimask_output_for_tracking: true
115
+ use_multimask_token_for_obj_ptr: true
116
+ multimask_min_pt_num: 0
117
+ multimask_max_pt_num: 1
118
+ use_mlp_for_obj_ptr_proj: true
119
+ # Compilation flag
120
+ # HieraT does not currently support compilation, should always be set to False
121
+ compile_image_encoder: False
custom_nodes/comfyui-tensorops/.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
custom_nodes/comfyui-tensorops/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__/
2
+ config_.py
custom_nodes/comfyui-tensorops/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
2
+
3
+ __all__ = ['NODE_CLASS_MAPPINGS', 'NODE_DISPLAY_NAME_MAPPINGS']
custom_nodes/comfyui-tensorops/nodes/__init__.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .channel_select import ChannelSelector
2
+ from .mask_image import MaskImage
3
+ from .save_surreal import SaveJsonToSurreal, SaveTextToSurreal
4
+ from .fetch_surreal import FetchJsonFromSurreal
5
+ from .foreground_mask import ForegroundMask
6
+ from .save_to_s3 import SaveImageToS3
7
+ from .redis import SaveToRedis, FetchFromRedis
8
+ from .fal import FalDifferentialDiffusion, FalDiffusion
9
+ from .background_select import BackgroundSelect
10
+ from .layer_mask import GetLayerMask
11
+ from .stream import SendImageOnWebSocket, SendJsonOnWebSocket
12
+ from .separate_mask import SeparateMask
13
+ from .face_swap import FaceSwap
14
+
15
+ NODE_CLASS_MAPPINGS = {
16
+ "ChannelSelector": ChannelSelector,
17
+ "MaskImage": MaskImage,
18
+ "SaveImageToS3": SaveImageToS3,
19
+ "SaveJsonToSurreal": SaveJsonToSurreal,
20
+ "SaveTextToSurreal": SaveTextToSurreal,
21
+ "FetchJsonFromSurreal": FetchJsonFromSurreal,
22
+ "ForegroundMask": ForegroundMask,
23
+ "SaveToRedis": SaveToRedis,
24
+ "FetchFromRedis": FetchFromRedis,
25
+ "FalDifferentialDiffusion": FalDifferentialDiffusion,
26
+ "FalDiffusion": FalDiffusion,
27
+ "BackgroundSelect": BackgroundSelect,
28
+ "GetLayerMask": GetLayerMask,
29
+ "SendImageOnWebSocket": SendImageOnWebSocket,
30
+ "SendJsonOnWebSocket": SendJsonOnWebSocket,
31
+ "SeparateMask": SeparateMask,
32
+ "FaceSwap": FaceSwap
33
+ }
34
+
35
+ # A dictionary that contains the friendly/humanly readable titles for the nodes
36
+ NODE_DISPLAY_NAME_MAPPINGS = {
37
+ "ChannelSelector":"ChannelSelector",
38
+ "MaskImage": "MaskImage",
39
+ "SaveImageToS3": "SaveImageToS3",
40
+ "SaveJsonToSurreal": "SaveJsonToSurreal",
41
+ "SaveTextToSurreal": "SaveTextToSurreal",
42
+ "FetchJsonFromSurreal": "FetchJsonFromSurreal",
43
+ "ForegroundMask": "ForegroundMask",
44
+ "SaveToRedis": "SaveToRedis",
45
+ "FetchFromRedis": "FetchFromRedis",
46
+ "FalDifferentialDiffusion": "FalDifferentialDiffusion",
47
+ "FalDiffusion": "FalDiffusion",
48
+ "BackgroundSelect": "BackgroundSelect",
49
+ "GetLayerMask": "GetLayerMask",
50
+ "SendImageOnWebSocket": "SendImageOnWebSocket",
51
+ "SendJsonOnWebSocket": "SendJsonOnWebSocket",
52
+ "SeparateMask": "SeparateMask",
53
+ "FaceSwap": "FaceSwap"
54
+ }
custom_nodes/comfyui-tensorops/nodes/background_select.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ def get_background_mask(tensor: torch.Tensor):
5
+ """
6
+ Function to identify the background mask from a batch of masks in a PyTorch tensor.
7
+
8
+ Args:
9
+ tensor (torch.Tensor): A tensor of shape (B, H, W, 1) where B is the batch size, H is the height, W is the width.
10
+
11
+ Returns:
12
+ List of masks as torch.Tensor and the background mask as torch.Tensor.
13
+ """
14
+ B, H, W = tensor.shape
15
+
16
+ # Compute areas of each mask
17
+ areas = tensor.sum(dim=(1, 2)) # Shape: (B,)
18
+
19
+ # Find the mask with the largest area
20
+ largest_idx = torch.argmax(areas)
21
+ background_mask = tensor[largest_idx]
22
+
23
+ # Identify if the largest mask touches the borders
24
+ border_touched = (
25
+ torch.any(background_mask[0, :]) or
26
+ torch.any(background_mask[-1, :]) or
27
+ torch.any(background_mask[:, 0]) or
28
+ torch.any(background_mask[:, -1])
29
+ )
30
+
31
+ # If the largest mask doesn't touch the border, search for another one
32
+ if not border_touched:
33
+ for i in range(B):
34
+ if i != largest_idx:
35
+ mask = tensor[i]
36
+ border_touched = (
37
+ torch.any(mask[0, :]) or
38
+ torch.any(mask[-1, :]) or
39
+ torch.any(mask[:, 0]) or
40
+ torch.any(mask[:, -1])
41
+ )
42
+ if border_touched:
43
+ background_mask = mask
44
+ break
45
+
46
+ # Reshape the masks to match the original tensor shape
47
+ return background_mask
48
+
49
+ class BackgroundSelect:
50
+
51
+ @classmethod
52
+ def INPUT_TYPES(s):
53
+ return {
54
+ "required": {
55
+ "mask": ("MASK",),
56
+ },
57
+ }
58
+
59
+ RETURN_TYPES = ("MASK",)
60
+
61
+ FUNCTION = "main"
62
+
63
+ CATEGORY = "tensorops"
64
+
65
+ def main(self, mask: torch.Tensor):
66
+ # TODO loop through all masks
67
+ # identify the background mask
68
+ # return the background mask
69
+ background_mask = get_background_mask(mask)
70
+ return (background_mask,)
71
+