Delete sd-webui-stablesr
Browse files- sd-webui-stablesr/.gitignore +0 -7
- sd-webui-stablesr/LICENSE +0 -35
- sd-webui-stablesr/LICENSE2 +0 -437
- sd-webui-stablesr/README.md +0 -156
- sd-webui-stablesr/README_CN.md +0 -150
- sd-webui-stablesr/scripts/__pycache__/stablesr.cpython-310.pyc +0 -0
- sd-webui-stablesr/scripts/stablesr.py +0 -276
- sd-webui-stablesr/srmodule/__pycache__/attn.cpython-310.pyc +0 -0
- sd-webui-stablesr/srmodule/__pycache__/colorfix.cpython-310.pyc +0 -0
- sd-webui-stablesr/srmodule/__pycache__/spade.cpython-310.pyc +0 -0
- sd-webui-stablesr/srmodule/__pycache__/struct_cond.cpython-310.pyc +0 -0
- sd-webui-stablesr/srmodule/attn.py +0 -111
- sd-webui-stablesr/srmodule/colorfix.py +0 -114
- sd-webui-stablesr/srmodule/spade.py +0 -206
- sd-webui-stablesr/srmodule/struct_cond.py +0 -353
- sd-webui-stablesr/tools/extract_srmodule.py +0 -20
- sd-webui-stablesr/tools/extract_vaecfw.py +0 -20
sd-webui-stablesr/.gitignore
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
# meta
|
| 2 |
-
.vscode/
|
| 3 |
-
__pycache__/
|
| 4 |
-
.DS_Store
|
| 5 |
-
|
| 6 |
-
# settings
|
| 7 |
-
models/*.ckpt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/LICENSE
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
S-Lab License 1.0
|
| 2 |
-
|
| 3 |
-
Copyright 2022 S-Lab
|
| 4 |
-
|
| 5 |
-
Redistribution and use for non-commercial purpose in source and
|
| 6 |
-
binary forms, with or without modification, are permitted provided
|
| 7 |
-
that the following conditions are met:
|
| 8 |
-
|
| 9 |
-
1. Redistributions of source code must retain the above copyright
|
| 10 |
-
notice, this list of conditions and the following disclaimer.
|
| 11 |
-
|
| 12 |
-
2. Redistributions in binary form must reproduce the above copyright
|
| 13 |
-
notice, this list of conditions and the following disclaimer in
|
| 14 |
-
the documentation and/or other materials provided with the
|
| 15 |
-
distribution.
|
| 16 |
-
|
| 17 |
-
3. Neither the name of the copyright holder nor the names of its
|
| 18 |
-
contributors may be used to endorse or promote products derived
|
| 19 |
-
from this software without specific prior written permission.
|
| 20 |
-
|
| 21 |
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 22 |
-
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 23 |
-
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 24 |
-
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 25 |
-
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| 26 |
-
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| 27 |
-
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| 28 |
-
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| 29 |
-
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 30 |
-
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 31 |
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 32 |
-
|
| 33 |
-
In the event that redistribution and/or use for commercial purpose in
|
| 34 |
-
source or binary forms, with or without modification is required,
|
| 35 |
-
please contact the contributor(s) of the work.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/LICENSE2
DELETED
|
@@ -1,437 +0,0 @@
|
|
| 1 |
-
Attribution-NonCommercial-ShareAlike 4.0 International
|
| 2 |
-
|
| 3 |
-
=======================================================================
|
| 4 |
-
|
| 5 |
-
Creative Commons Corporation ("Creative Commons") is not a law firm and
|
| 6 |
-
does not provide legal services or legal advice. Distribution of
|
| 7 |
-
Creative Commons public licenses does not create a lawyer-client or
|
| 8 |
-
other relationship. Creative Commons makes its licenses and related
|
| 9 |
-
information available on an "as-is" basis. Creative Commons gives no
|
| 10 |
-
warranties regarding its licenses, any material licensed under their
|
| 11 |
-
terms and conditions, or any related information. Creative Commons
|
| 12 |
-
disclaims all liability for damages resulting from their use to the
|
| 13 |
-
fullest extent possible.
|
| 14 |
-
|
| 15 |
-
Using Creative Commons Public Licenses
|
| 16 |
-
|
| 17 |
-
Creative Commons public licenses provide a standard set of terms and
|
| 18 |
-
conditions that creators and other rights holders may use to share
|
| 19 |
-
original works of authorship and other material subject to copyright
|
| 20 |
-
and certain other rights specified in the public license below. The
|
| 21 |
-
following considerations are for informational purposes only, are not
|
| 22 |
-
exhaustive, and do not form part of our licenses.
|
| 23 |
-
|
| 24 |
-
Considerations for licensors: Our public licenses are
|
| 25 |
-
intended for use by those authorized to give the public
|
| 26 |
-
permission to use material in ways otherwise restricted by
|
| 27 |
-
copyright and certain other rights. Our licenses are
|
| 28 |
-
irrevocable. Licensors should read and understand the terms
|
| 29 |
-
and conditions of the license they choose before applying it.
|
| 30 |
-
Licensors should also secure all rights necessary before
|
| 31 |
-
applying our licenses so that the public can reuse the
|
| 32 |
-
material as expected. Licensors should clearly mark any
|
| 33 |
-
material not subject to the license. This includes other CC-
|
| 34 |
-
licensed material, or material used under an exception or
|
| 35 |
-
limitation to copyright. More considerations for licensors:
|
| 36 |
-
wiki.creativecommons.org/Considerations_for_licensors
|
| 37 |
-
|
| 38 |
-
Considerations for the public: By using one of our public
|
| 39 |
-
licenses, a licensor grants the public permission to use the
|
| 40 |
-
licensed material under specified terms and conditions. If
|
| 41 |
-
the licensor's permission is not necessary for any reason--for
|
| 42 |
-
example, because of any applicable exception or limitation to
|
| 43 |
-
copyright--then that use is not regulated by the license. Our
|
| 44 |
-
licenses grant only permissions under copyright and certain
|
| 45 |
-
other rights that a licensor has authority to grant. Use of
|
| 46 |
-
the licensed material may still be restricted for other
|
| 47 |
-
reasons, including because others have copyright or other
|
| 48 |
-
rights in the material. A licensor may make special requests,
|
| 49 |
-
such as asking that all changes be marked or described.
|
| 50 |
-
Although not required by our licenses, you are encouraged to
|
| 51 |
-
respect those requests where reasonable. More considerations
|
| 52 |
-
for the public:
|
| 53 |
-
wiki.creativecommons.org/Considerations_for_licensees
|
| 54 |
-
|
| 55 |
-
=======================================================================
|
| 56 |
-
|
| 57 |
-
Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
|
| 58 |
-
Public License
|
| 59 |
-
|
| 60 |
-
By exercising the Licensed Rights (defined below), You accept and agree
|
| 61 |
-
to be bound by the terms and conditions of this Creative Commons
|
| 62 |
-
Attribution-NonCommercial-ShareAlike 4.0 International Public License
|
| 63 |
-
("Public License"). To the extent this Public License may be
|
| 64 |
-
interpreted as a contract, You are granted the Licensed Rights in
|
| 65 |
-
consideration of Your acceptance of these terms and conditions, and the
|
| 66 |
-
Licensor grants You such rights in consideration of benefits the
|
| 67 |
-
Licensor receives from making the Licensed Material available under
|
| 68 |
-
these terms and conditions.
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
Section 1 -- Definitions.
|
| 72 |
-
|
| 73 |
-
a. Adapted Material means material subject to Copyright and Similar
|
| 74 |
-
Rights that is derived from or based upon the Licensed Material
|
| 75 |
-
and in which the Licensed Material is translated, altered,
|
| 76 |
-
arranged, transformed, or otherwise modified in a manner requiring
|
| 77 |
-
permission under the Copyright and Similar Rights held by the
|
| 78 |
-
Licensor. For purposes of this Public License, where the Licensed
|
| 79 |
-
Material is a musical work, performance, or sound recording,
|
| 80 |
-
Adapted Material is always produced where the Licensed Material is
|
| 81 |
-
synched in timed relation with a moving image.
|
| 82 |
-
|
| 83 |
-
b. Adapter's License means the license You apply to Your Copyright
|
| 84 |
-
and Similar Rights in Your contributions to Adapted Material in
|
| 85 |
-
accordance with the terms and conditions of this Public License.
|
| 86 |
-
|
| 87 |
-
c. BY-NC-SA Compatible License means a license listed at
|
| 88 |
-
creativecommons.org/compatiblelicenses, approved by Creative
|
| 89 |
-
Commons as essentially the equivalent of this Public License.
|
| 90 |
-
|
| 91 |
-
d. Copyright and Similar Rights means copyright and/or similar rights
|
| 92 |
-
closely related to copyright including, without limitation,
|
| 93 |
-
performance, broadcast, sound recording, and Sui Generis Database
|
| 94 |
-
Rights, without regard to how the rights are labeled or
|
| 95 |
-
categorized. For purposes of this Public License, the rights
|
| 96 |
-
specified in Section 2(b)(1)-(2) are not Copyright and Similar
|
| 97 |
-
Rights.
|
| 98 |
-
|
| 99 |
-
e. Effective Technological Measures means those measures that, in the
|
| 100 |
-
absence of proper authority, may not be circumvented under laws
|
| 101 |
-
fulfilling obligations under Article 11 of the WIPO Copyright
|
| 102 |
-
Treaty adopted on December 20, 1996, and/or similar international
|
| 103 |
-
agreements.
|
| 104 |
-
|
| 105 |
-
f. Exceptions and Limitations means fair use, fair dealing, and/or
|
| 106 |
-
any other exception or limitation to Copyright and Similar Rights
|
| 107 |
-
that applies to Your use of the Licensed Material.
|
| 108 |
-
|
| 109 |
-
g. License Elements means the license attributes listed in the name
|
| 110 |
-
of a Creative Commons Public License. The License Elements of this
|
| 111 |
-
Public License are Attribution, NonCommercial, and ShareAlike.
|
| 112 |
-
|
| 113 |
-
h. Licensed Material means the artistic or literary work, database,
|
| 114 |
-
or other material to which the Licensor applied this Public
|
| 115 |
-
License.
|
| 116 |
-
|
| 117 |
-
i. Licensed Rights means the rights granted to You subject to the
|
| 118 |
-
terms and conditions of this Public License, which are limited to
|
| 119 |
-
all Copyright and Similar Rights that apply to Your use of the
|
| 120 |
-
Licensed Material and that the Licensor has authority to license.
|
| 121 |
-
|
| 122 |
-
j. Licensor means the individual(s) or entity(ies) granting rights
|
| 123 |
-
under this Public License.
|
| 124 |
-
|
| 125 |
-
k. NonCommercial means not primarily intended for or directed towards
|
| 126 |
-
commercial advantage or monetary compensation. For purposes of
|
| 127 |
-
this Public License, the exchange of the Licensed Material for
|
| 128 |
-
other material subject to Copyright and Similar Rights by digital
|
| 129 |
-
file-sharing or similar means is NonCommercial provided there is
|
| 130 |
-
no payment of monetary compensation in connection with the
|
| 131 |
-
exchange.
|
| 132 |
-
|
| 133 |
-
l. Share means to provide material to the public by any means or
|
| 134 |
-
process that requires permission under the Licensed Rights, such
|
| 135 |
-
as reproduction, public display, public performance, distribution,
|
| 136 |
-
dissemination, communication, or importation, and to make material
|
| 137 |
-
available to the public including in ways that members of the
|
| 138 |
-
public may access the material from a place and at a time
|
| 139 |
-
individually chosen by them.
|
| 140 |
-
|
| 141 |
-
m. Sui Generis Database Rights means rights other than copyright
|
| 142 |
-
resulting from Directive 96/9/EC of the European Parliament and of
|
| 143 |
-
the Council of 11 March 1996 on the legal protection of databases,
|
| 144 |
-
as amended and/or succeeded, as well as other essentially
|
| 145 |
-
equivalent rights anywhere in the world.
|
| 146 |
-
|
| 147 |
-
n. You means the individual or entity exercising the Licensed Rights
|
| 148 |
-
under this Public License. Your has a corresponding meaning.
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
Section 2 -- Scope.
|
| 152 |
-
|
| 153 |
-
a. License grant.
|
| 154 |
-
|
| 155 |
-
1. Subject to the terms and conditions of this Public License,
|
| 156 |
-
the Licensor hereby grants You a worldwide, royalty-free,
|
| 157 |
-
non-sublicensable, non-exclusive, irrevocable license to
|
| 158 |
-
exercise the Licensed Rights in the Licensed Material to:
|
| 159 |
-
|
| 160 |
-
a. reproduce and Share the Licensed Material, in whole or
|
| 161 |
-
in part, for NonCommercial purposes only; and
|
| 162 |
-
|
| 163 |
-
b. produce, reproduce, and Share Adapted Material for
|
| 164 |
-
NonCommercial purposes only.
|
| 165 |
-
|
| 166 |
-
2. Exceptions and Limitations. For the avoidance of doubt, where
|
| 167 |
-
Exceptions and Limitations apply to Your use, this Public
|
| 168 |
-
License does not apply, and You do not need to comply with
|
| 169 |
-
its terms and conditions.
|
| 170 |
-
|
| 171 |
-
3. Term. The term of this Public License is specified in Section
|
| 172 |
-
6(a).
|
| 173 |
-
|
| 174 |
-
4. Media and formats; technical modifications allowed. The
|
| 175 |
-
Licensor authorizes You to exercise the Licensed Rights in
|
| 176 |
-
all media and formats whether now known or hereafter created,
|
| 177 |
-
and to make technical modifications necessary to do so. The
|
| 178 |
-
Licensor waives and/or agrees not to assert any right or
|
| 179 |
-
authority to forbid You from making technical modifications
|
| 180 |
-
necessary to exercise the Licensed Rights, including
|
| 181 |
-
technical modifications necessary to circumvent Effective
|
| 182 |
-
Technological Measures. For purposes of this Public License,
|
| 183 |
-
simply making modifications authorized by this Section 2(a)
|
| 184 |
-
(4) never produces Adapted Material.
|
| 185 |
-
|
| 186 |
-
5. Downstream recipients.
|
| 187 |
-
|
| 188 |
-
a. Offer from the Licensor -- Licensed Material. Every
|
| 189 |
-
recipient of the Licensed Material automatically
|
| 190 |
-
receives an offer from the Licensor to exercise the
|
| 191 |
-
Licensed Rights under the terms and conditions of this
|
| 192 |
-
Public License.
|
| 193 |
-
|
| 194 |
-
b. Additional offer from the Licensor -- Adapted Material.
|
| 195 |
-
Every recipient of Adapted Material from You
|
| 196 |
-
automatically receives an offer from the Licensor to
|
| 197 |
-
exercise the Licensed Rights in the Adapted Material
|
| 198 |
-
under the conditions of the Adapter's License You apply.
|
| 199 |
-
|
| 200 |
-
c. No downstream restrictions. You may not offer or impose
|
| 201 |
-
any additional or different terms or conditions on, or
|
| 202 |
-
apply any Effective Technological Measures to, the
|
| 203 |
-
Licensed Material if doing so restricts exercise of the
|
| 204 |
-
Licensed Rights by any recipient of the Licensed
|
| 205 |
-
Material.
|
| 206 |
-
|
| 207 |
-
6. No endorsement. Nothing in this Public License constitutes or
|
| 208 |
-
may be construed as permission to assert or imply that You
|
| 209 |
-
are, or that Your use of the Licensed Material is, connected
|
| 210 |
-
with, or sponsored, endorsed, or granted official status by,
|
| 211 |
-
the Licensor or others designated to receive attribution as
|
| 212 |
-
provided in Section 3(a)(1)(A)(i).
|
| 213 |
-
|
| 214 |
-
b. Other rights.
|
| 215 |
-
|
| 216 |
-
1. Moral rights, such as the right of integrity, are not
|
| 217 |
-
licensed under this Public License, nor are publicity,
|
| 218 |
-
privacy, and/or other similar personality rights; however, to
|
| 219 |
-
the extent possible, the Licensor waives and/or agrees not to
|
| 220 |
-
assert any such rights held by the Licensor to the limited
|
| 221 |
-
extent necessary to allow You to exercise the Licensed
|
| 222 |
-
Rights, but not otherwise.
|
| 223 |
-
|
| 224 |
-
2. Patent and trademark rights are not licensed under this
|
| 225 |
-
Public License.
|
| 226 |
-
|
| 227 |
-
3. To the extent possible, the Licensor waives any right to
|
| 228 |
-
collect royalties from You for the exercise of the Licensed
|
| 229 |
-
Rights, whether directly or through a collecting society
|
| 230 |
-
under any voluntary or waivable statutory or compulsory
|
| 231 |
-
licensing scheme. In all other cases the Licensor expressly
|
| 232 |
-
reserves any right to collect such royalties, including when
|
| 233 |
-
the Licensed Material is used other than for NonCommercial
|
| 234 |
-
purposes.
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
Section 3 -- License Conditions.
|
| 238 |
-
|
| 239 |
-
Your exercise of the Licensed Rights is expressly made subject to the
|
| 240 |
-
following conditions.
|
| 241 |
-
|
| 242 |
-
a. Attribution.
|
| 243 |
-
|
| 244 |
-
1. If You Share the Licensed Material (including in modified
|
| 245 |
-
form), You must:
|
| 246 |
-
|
| 247 |
-
a. retain the following if it is supplied by the Licensor
|
| 248 |
-
with the Licensed Material:
|
| 249 |
-
|
| 250 |
-
i. identification of the creator(s) of the Licensed
|
| 251 |
-
Material and any others designated to receive
|
| 252 |
-
attribution, in any reasonable manner requested by
|
| 253 |
-
the Licensor (including by pseudonym if
|
| 254 |
-
designated);
|
| 255 |
-
|
| 256 |
-
ii. a copyright notice;
|
| 257 |
-
|
| 258 |
-
iii. a notice that refers to this Public License;
|
| 259 |
-
|
| 260 |
-
iv. a notice that refers to the disclaimer of
|
| 261 |
-
warranties;
|
| 262 |
-
|
| 263 |
-
v. a URI or hyperlink to the Licensed Material to the
|
| 264 |
-
extent reasonably practicable;
|
| 265 |
-
|
| 266 |
-
b. indicate if You modified the Licensed Material and
|
| 267 |
-
retain an indication of any previous modifications; and
|
| 268 |
-
|
| 269 |
-
c. indicate the Licensed Material is licensed under this
|
| 270 |
-
Public License, and include the text of, or the URI or
|
| 271 |
-
hyperlink to, this Public License.
|
| 272 |
-
|
| 273 |
-
2. You may satisfy the conditions in Section 3(a)(1) in any
|
| 274 |
-
reasonable manner based on the medium, means, and context in
|
| 275 |
-
which You Share the Licensed Material. For example, it may be
|
| 276 |
-
reasonable to satisfy the conditions by providing a URI or
|
| 277 |
-
hyperlink to a resource that includes the required
|
| 278 |
-
information.
|
| 279 |
-
3. If requested by the Licensor, You must remove any of the
|
| 280 |
-
information required by Section 3(a)(1)(A) to the extent
|
| 281 |
-
reasonably practicable.
|
| 282 |
-
|
| 283 |
-
b. ShareAlike.
|
| 284 |
-
|
| 285 |
-
In addition to the conditions in Section 3(a), if You Share
|
| 286 |
-
Adapted Material You produce, the following conditions also apply.
|
| 287 |
-
|
| 288 |
-
1. The Adapter's License You apply must be a Creative Commons
|
| 289 |
-
license with the same License Elements, this version or
|
| 290 |
-
later, or a BY-NC-SA Compatible License.
|
| 291 |
-
|
| 292 |
-
2. You must include the text of, or the URI or hyperlink to, the
|
| 293 |
-
Adapter's License You apply. You may satisfy this condition
|
| 294 |
-
in any reasonable manner based on the medium, means, and
|
| 295 |
-
context in which You Share Adapted Material.
|
| 296 |
-
|
| 297 |
-
3. You may not offer or impose any additional or different terms
|
| 298 |
-
or conditions on, or apply any Effective Technological
|
| 299 |
-
Measures to, Adapted Material that restrict exercise of the
|
| 300 |
-
rights granted under the Adapter's License You apply.
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
Section 4 -- Sui Generis Database Rights.
|
| 304 |
-
|
| 305 |
-
Where the Licensed Rights include Sui Generis Database Rights that
|
| 306 |
-
apply to Your use of the Licensed Material:
|
| 307 |
-
|
| 308 |
-
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
|
| 309 |
-
to extract, reuse, reproduce, and Share all or a substantial
|
| 310 |
-
portion of the contents of the database for NonCommercial purposes
|
| 311 |
-
only;
|
| 312 |
-
|
| 313 |
-
b. if You include all or a substantial portion of the database
|
| 314 |
-
contents in a database in which You have Sui Generis Database
|
| 315 |
-
Rights, then the database in which You have Sui Generis Database
|
| 316 |
-
Rights (but not its individual contents) is Adapted Material,
|
| 317 |
-
including for purposes of Section 3(b); and
|
| 318 |
-
|
| 319 |
-
c. You must comply with the conditions in Section 3(a) if You Share
|
| 320 |
-
all or a substantial portion of the contents of the database.
|
| 321 |
-
|
| 322 |
-
For the avoidance of doubt, this Section 4 supplements and does not
|
| 323 |
-
replace Your obligations under this Public License where the Licensed
|
| 324 |
-
Rights include other Copyright and Similar Rights.
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
|
| 328 |
-
|
| 329 |
-
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
|
| 330 |
-
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
|
| 331 |
-
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
| 332 |
-
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
|
| 333 |
-
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
|
| 334 |
-
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
| 335 |
-
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
|
| 336 |
-
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
|
| 337 |
-
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
|
| 338 |
-
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
|
| 339 |
-
|
| 340 |
-
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
|
| 341 |
-
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
|
| 342 |
-
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
|
| 343 |
-
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
|
| 344 |
-
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
|
| 345 |
-
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
|
| 346 |
-
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
|
| 347 |
-
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
|
| 348 |
-
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
|
| 349 |
-
|
| 350 |
-
c. The disclaimer of warranties and limitation of liability provided
|
| 351 |
-
above shall be interpreted in a manner that, to the extent
|
| 352 |
-
possible, most closely approximates an absolute disclaimer and
|
| 353 |
-
waiver of all liability.
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
Section 6 -- Term and Termination.
|
| 357 |
-
|
| 358 |
-
a. This Public License applies for the term of the Copyright and
|
| 359 |
-
Similar Rights licensed here. However, if You fail to comply with
|
| 360 |
-
this Public License, then Your rights under this Public License
|
| 361 |
-
terminate automatically.
|
| 362 |
-
|
| 363 |
-
b. Where Your right to use the Licensed Material has terminated under
|
| 364 |
-
Section 6(a), it reinstates:
|
| 365 |
-
|
| 366 |
-
1. automatically as of the date the violation is cured, provided
|
| 367 |
-
it is cured within 30 days of Your discovery of the
|
| 368 |
-
violation; or
|
| 369 |
-
|
| 370 |
-
2. upon express reinstatement by the Licensor.
|
| 371 |
-
|
| 372 |
-
For the avoidance of doubt, this Section 6(b) does not affect any
|
| 373 |
-
right the Licensor may have to seek remedies for Your violations
|
| 374 |
-
of this Public License.
|
| 375 |
-
|
| 376 |
-
c. For the avoidance of doubt, the Licensor may also offer the
|
| 377 |
-
Licensed Material under separate terms or conditions or stop
|
| 378 |
-
distributing the Licensed Material at any time; however, doing so
|
| 379 |
-
will not terminate this Public License.
|
| 380 |
-
|
| 381 |
-
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
|
| 382 |
-
License.
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
Section 7 -- Other Terms and Conditions.
|
| 386 |
-
|
| 387 |
-
a. The Licensor shall not be bound by any additional or different
|
| 388 |
-
terms or conditions communicated by You unless expressly agreed.
|
| 389 |
-
|
| 390 |
-
b. Any arrangements, understandings, or agreements regarding the
|
| 391 |
-
Licensed Material not stated herein are separate from and
|
| 392 |
-
independent of the terms and conditions of this Public License.
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
Section 8 -- Interpretation.
|
| 396 |
-
|
| 397 |
-
a. For the avoidance of doubt, this Public License does not, and
|
| 398 |
-
shall not be interpreted to, reduce, limit, restrict, or impose
|
| 399 |
-
conditions on any use of the Licensed Material that could lawfully
|
| 400 |
-
be made without permission under this Public License.
|
| 401 |
-
|
| 402 |
-
b. To the extent possible, if any provision of this Public License is
|
| 403 |
-
deemed unenforceable, it shall be automatically reformed to the
|
| 404 |
-
minimum extent necessary to make it enforceable. If the provision
|
| 405 |
-
cannot be reformed, it shall be severed from this Public License
|
| 406 |
-
without affecting the enforceability of the remaining terms and
|
| 407 |
-
conditions.
|
| 408 |
-
|
| 409 |
-
c. No term or condition of this Public License will be waived and no
|
| 410 |
-
failure to comply consented to unless expressly agreed to by the
|
| 411 |
-
Licensor.
|
| 412 |
-
|
| 413 |
-
d. Nothing in this Public License constitutes or may be interpreted
|
| 414 |
-
as a limitation upon, or waiver of, any privileges and immunities
|
| 415 |
-
that apply to the Licensor or You, including from the legal
|
| 416 |
-
processes of any jurisdiction or authority.
|
| 417 |
-
|
| 418 |
-
=======================================================================
|
| 419 |
-
|
| 420 |
-
Creative Commons is not a party to its public
|
| 421 |
-
licenses. Notwithstanding, Creative Commons may elect to apply one of
|
| 422 |
-
its public licenses to material it publishes and in those instances
|
| 423 |
-
will be considered the “Licensor.” The text of the Creative Commons
|
| 424 |
-
public licenses is dedicated to the public domain under the CC0 Public
|
| 425 |
-
Domain Dedication. Except for the limited purpose of indicating that
|
| 426 |
-
material is shared under a Creative Commons public license or as
|
| 427 |
-
otherwise permitted by the Creative Commons policies published at
|
| 428 |
-
creativecommons.org/policies, Creative Commons does not authorize the
|
| 429 |
-
use of the trademark "Creative Commons" or any other trademark or logo
|
| 430 |
-
of Creative Commons without its prior written consent including,
|
| 431 |
-
without limitation, in connection with any unauthorized modifications
|
| 432 |
-
to any of its public licenses or any other arrangements,
|
| 433 |
-
understandings, or agreements concerning use of licensed material. For
|
| 434 |
-
the avoidance of doubt, this paragraph does not form part of the
|
| 435 |
-
public licenses.
|
| 436 |
-
|
| 437 |
-
Creative Commons may be contacted at creativecommons.org.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/README.md
DELETED
|
@@ -1,156 +0,0 @@
|
|
| 1 |
-
# StableSR for Stable Diffusion WebUI
|
| 2 |
-
|
| 3 |
-
Licensed under S-Lab License 1.0
|
| 4 |
-
|
| 5 |
-
[![CC BY-NC-SA 4.0][cc-by-nc-sa-shield]][cc-by-nc-sa]
|
| 6 |
-
|
| 7 |
-
English|[中文](README_CN.md)
|
| 8 |
-
|
| 9 |
-
- StableSR is a competitive super-resolution method originally proposed by Jianyi Wang et al.
|
| 10 |
-
- This repository is a migration of the StableSR project to the Automatic1111 WebUI.
|
| 11 |
-
|
| 12 |
-
Relevant Links
|
| 13 |
-
|
| 14 |
-
> Click to view high-quality official examples!
|
| 15 |
-
|
| 16 |
-
- [Project Page](https://iceclear.github.io/projects/stablesr/)
|
| 17 |
-
- [Official Repository](https://github.com/IceClear/StableSR)
|
| 18 |
-
- [Paper on arXiv](https://arxiv.org/abs/2305.07015)
|
| 19 |
-
|
| 20 |
-
> If you find this project useful, please give me & Jianyi Wang a star! ⭐
|
| 21 |
-
|
| 22 |
-
***
|
| 23 |
-
|
| 24 |
-
## Features
|
| 25 |
-
|
| 26 |
-
1. **High-fidelity detailed image upscaling**:
|
| 27 |
-
- Being very detailed while keeping the face identity of your characters.
|
| 28 |
-
- Suitable for most images (Realistic or Anime, Photography or AIGC, SD 1.5 or Midjourney images...) [Official Examples](https://iceclear.github.io/projects/stablesr/)
|
| 29 |
-
2. **Less VRAM consumption**
|
| 30 |
-
- I remove the VRAM-expensive modules in the official implementation.
|
| 31 |
-
- The remaining model is much smaller than ControlNet Tile model and requires less VRAM.
|
| 32 |
-
- When combined with Tiled Diffusion & VAE, you can do 4k image super-resolution with limited VRAM (e.g., < 12 GB).
|
| 33 |
-
> Please be aware that sdp may lead to OOM for some unknown reasons. You may use xformers instead.
|
| 34 |
-
3. **Wavelet Color Fix**
|
| 35 |
-
- The official StableSR will significantly change the color of the generated image. The problem will be even more prominent when upscaling in tiles.
|
| 36 |
-
- I implement a powerful post-processing technique that effectively matches the color of the upscaled image to the original. See [Wavelet Color Fix Example](https://imgsli.com/MTgwNDg2/).
|
| 37 |
-
|
| 38 |
-
***
|
| 39 |
-
|
| 40 |
-
## Usage
|
| 41 |
-
|
| 42 |
-
### 1. Installation
|
| 43 |
-
|
| 44 |
-
⚪ Method 1: Official Market
|
| 45 |
-
|
| 46 |
-
- Open Automatic1111 WebUI -> Click Tab "Extensions" -> Click Tab "Available" -> Find "StableSR" -> Click "Install"
|
| 47 |
-
|
| 48 |
-
⚪ Method 2: URL Install
|
| 49 |
-
|
| 50 |
-
- Open Automatic1111 WebUI -> Click Tab "Extensions" -> Click Tab "Install from URL" -> type in https://github.com/pkuliyi2015/sd-webui-stablesr.git -> Click "Install"
|
| 51 |
-
|
| 52 |
-

|
| 53 |
-
|
| 54 |
-
### 2. Download the main components
|
| 55 |
-
|
| 56 |
-
- You MUST use the Stable Diffusion V2.1 512 **EMA** checkpoint (~5.21GB) from StabilityAI
|
| 57 |
-
- You can download it from [HuggingFace](https://huggingface.co/stabilityai/stable-diffusion-2-1-base)
|
| 58 |
-
- Put into stable-diffusion-webui/models/Stable-Diffusion/
|
| 59 |
-
|
| 60 |
-
> While it requires a SD2.1 checkpoint, you can still upscale ANY image (even from SD1.5 or NSFW). Your image won't be censored and the output quality won't be affected.
|
| 61 |
-
|
| 62 |
-
- Download the extracted StableSR module
|
| 63 |
-
- Official resources: [HuggingFace](https://huggingface.co/Iceclear/StableSR/resolve/main/weibu_models.zip) (~1.2 G). Note that this is a zip file containing both the StableSR module and the VQVAE.
|
| 64 |
-
- My resources: <[GoogleDrive](https://drive.google.com/file/d/1tWjkZQhfj07sHDR4r9Ta5Fk4iMp1t3Qw/view?usp=sharing)> <[百度网盘-提取码aguq](https://pan.baidu.com/s/1Nq_6ciGgKnTu0W14QcKKWg?pwd=aguq)>
|
| 65 |
-
- Put the StableSR module (~400MB) into your stable-diffusion-webui/extensions/sd-webui-stablesr/models/
|
| 66 |
-
|
| 67 |
-
### 3. Optional components
|
| 68 |
-
|
| 69 |
-
- Install [Tiled Diffusion & VAE]((https://github.com/pkuliyi2015/multidiffusion-upscaler-for-automatic1111)) extension
|
| 70 |
-
- The original StableSR easily gets OOM for large images > 512.
|
| 71 |
-
- For better quality and less VRAM usage, we recommend Tiled Diffusion & VAE.
|
| 72 |
-
- Use the Official VQGAN VAE
|
| 73 |
-
- Official resources: See the link in 2.
|
| 74 |
-
- My resources: <[GoogleDrive](https://drive.google.com/file/d/1ARtDMia3_CbwNsGxxGcZ5UP75W4PeIEI/view?usp=share_link)> <[百度网盘-提取码83u9](https://pan.baidu.com/s/1YCYmGBethR9JZ8-eypoIiQ?pwd=83u9)>
|
| 75 |
-
- Put the VQVAE (~700MB) into your stable-diffusion-webui/models/VAE
|
| 76 |
-
|
| 77 |
-
### 4. Extension Usage
|
| 78 |
-
|
| 79 |
-
- At the top of the WebUI, select the v2-1_512-ema-pruned checkpoint you downloaded.
|
| 80 |
-
- Switch to img2img tag. Find the "Scripts" dropdown at the bottom of the page.
|
| 81 |
-
- Select the StableSR script.
|
| 82 |
-
- Click the refresh button and select the StableSR checkpoint you have downloaded.
|
| 83 |
-
- Choose a scale factor.
|
| 84 |
-
- Upload your image and start generation (can work without prompts).
|
| 85 |
-
- Euler a sampler is recommended. CFG Scale<=2, Steps >= 20.
|
| 86 |
-
- For output image size > 512, we recommend using Tiled Diffusion & VAE, otherwise, the image quality may not be ideal, and the VRAM usage will be huge.
|
| 87 |
-
- Here are the official Tiled Diffusion settings:
|
| 88 |
-
- Method = Mixture of Diffusers
|
| 89 |
-
- Latent tile size = 64, Latent tile overlap = 32
|
| 90 |
-
- Latent tile batch size as large as possible before Out of Memory.
|
| 91 |
-
- Upscaler MUST be None (will not upscale here; instead, upscale in StableSR).
|
| 92 |
-
- The following figure shows the recommended settings for 24GB VRAM.
|
| 93 |
-
- For a 6GB device, **just change Tiled Diffusion Latent tile batch size to 1, Tiled VAE Encoder Tile Size to 1024, Decoder Tile Size to 128.**
|
| 94 |
-
- SDP attention optimization may lead to OOM. Please use xformers in that case.
|
| 95 |
-
- You DON'T need to change other settings in Tiled Diffusion & Tiled VAE unless you have a very deep understanding. **These params are almost optimal for StableSR.**
|
| 96 |
-

|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
### 5. Options Explained
|
| 101 |
-
|
| 102 |
-
- What is "Pure Noise"?
|
| 103 |
-
- Pure Noise refers to starting from a fully random noise tensor instead of your image. **This is the default behavior in the StableSR paper.**
|
| 104 |
-
- When enabling it, the script ignores your denoising strength and gives you much more detailed images, but also changes the color & sharpness significantly
|
| 105 |
-
- When disabling it, the script starts by adding some noise to your image. The result will be not fully detailed, even if you set denoising strength = 1 (but maybe aesthetically good). See [Comparison](https://imgsli.com/MTgwMTMx).
|
| 106 |
-
- If you disable Pure Noise, we recommend denoising strength=1
|
| 107 |
-
- What is "Color Fix"?
|
| 108 |
-
- This is to mitigate the color shift problem from StableSR and the tiling process.
|
| 109 |
-
- AdaIN simply adjusts the color statistics between the original and the outcome images. This is the official algorithm but ineffective in many cases.
|
| 110 |
-
- Wavelet decomposes the original and the outcome images into low and high frequency, and then replace the outcome image's low-frequency part (colors) with the original image's. This is very powerful for uneven color shifting. The algorithm is from GIMP and Krita, which will take several seconds for each image.
|
| 111 |
-
- When enabling color fix, the original image will also show up in your preview window, but will NOT be saved automatically.
|
| 112 |
-
|
| 113 |
-
### 6. Important Notice
|
| 114 |
-
|
| 115 |
-
> Why my results are different from the offical examples?
|
| 116 |
-
|
| 117 |
-
- It is not your or our fault.
|
| 118 |
-
- This extension has the same UNet model weights as the StableSR if installed correctly.
|
| 119 |
-
- If you install the optional VQVAE, the whole model weights will be the same as the official model with fusion weights=0.
|
| 120 |
-
- However, your result will be **not as good as** the official results, because:
|
| 121 |
-
- Sampler Difference:
|
| 122 |
-
- The official repo does 100 or 200 steps of legacy DDPM sampling with a custom timestep scheduler, and samples without negative prompts.
|
| 123 |
-
- However, WebUI doesn't offer such a sampler, and it must sample with negative prompts. **This is the main difference.**
|
| 124 |
-
- VQVAE Decoder Difference:
|
| 125 |
-
- The official VQVAE Decoder takes some Encoder features as input.
|
| 126 |
-
- However, in practice, I found these features are astonishingly huge for large images. (>10G for 4k images even in float16!)
|
| 127 |
-
- Hence, **I removed the CFW component in VAE Decoder**. As this lead to inferior fidelity in details, I will try to add it back later as an option.
|
| 128 |
-
|
| 129 |
-
***
|
| 130 |
-
## License
|
| 131 |
-
|
| 132 |
-
This project is licensed under:
|
| 133 |
-
|
| 134 |
-
- S-Lab License 1.0.
|
| 135 |
-
- [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License][cc-by-nc-sa], due to the use of the NVIDIA SPADE module.
|
| 136 |
-
|
| 137 |
-
[![CC BY-NC-SA 4.0][cc-by-nc-sa-image]][cc-by-nc-sa]
|
| 138 |
-
|
| 139 |
-
[cc-by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/
|
| 140 |
-
[cc-by-nc-sa-image]: https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png
|
| 141 |
-
[cc-by-nc-sa-shield]: https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg
|
| 142 |
-
|
| 143 |
-
### Disclaimer
|
| 144 |
-
|
| 145 |
-
- All code in this extension is for research purposes only.
|
| 146 |
-
- The commercial use of the code and checkpoint is **strictly prohibited**.
|
| 147 |
-
|
| 148 |
-
### Important Notice for Outcome Images
|
| 149 |
-
|
| 150 |
-
- Please note that the CC BY-NC-SA 4.0 license in the NVIDIA SPADE module also prohibits the commercial use of outcome images.
|
| 151 |
-
- Jianyi Wang may change the SPADE module to a commercial-friendly one but he is busy.
|
| 152 |
-
- If you wish to *speed up* his process for commercial purposes, please contact him through email: iceclearwjy@gmail.com
|
| 153 |
-
|
| 154 |
-
## Acknowledgments
|
| 155 |
-
|
| 156 |
-
I would like to thank Jianyi Wang et al. for the original StableSR method.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/README_CN.md
DELETED
|
@@ -1,150 +0,0 @@
|
|
| 1 |
-
# StableSR - Stable Diffusion WebUI
|
| 2 |
-
|
| 3 |
-
Licensed under S-Lab License 1.0
|
| 4 |
-
|
| 5 |
-
[![CC BY-NC-SA 4.0][cc-by-nc-sa-shield]][cc-by-nc-sa]
|
| 6 |
-
|
| 7 |
-
[English](README.md) | 中文
|
| 8 |
-
|
| 9 |
-
- StableSR 是由 Jianyi Wang 等人提出的强力超分辨率项目。
|
| 10 |
-
- 本仓库将 StableSR 项目迁移到 Automatic1111 WebUI。
|
| 11 |
-
|
| 12 |
-
相关链接
|
| 13 |
-
|
| 14 |
-
> 点击查看大量官方示例!
|
| 15 |
-
|
| 16 |
-
- [项目页面](https://iceclear.github.io/projects/stablesr/)
|
| 17 |
-
- [官方仓库](https://github.com/IceClear/StableSR)
|
| 18 |
-
- [论文](https://arxiv.org/abs/2305.07015)
|
| 19 |
-
|
| 20 |
-
> 如果你觉得这个项目有帮助,请给我和 Jianyi Wang 的仓库点个星!⭐
|
| 21 |
-
***
|
| 22 |
-
|
| 23 |
-
## 功能
|
| 24 |
-
|
| 25 |
-
1. **高保真图像放大**:
|
| 26 |
-
- 不修改人物脸部的同时添加非常细致的细节和纹理
|
| 27 |
-
- 适合大多数图片(真实或动漫,摄影作品或AIGC,SD 1.5或Midjourney图片...)
|
| 28 |
-
2. **较少的显存消耗**:
|
| 29 |
-
- 我移除了官方实现中显存消耗高的模块。
|
| 30 |
-
- 剩下的模型比ControlNet Tile模型小得多,需要的显存也少得多。
|
| 31 |
-
- 当结合Tiled Diffusion & VAE时,你可以在有限的显存(例如,<12GB)中进行4k图像放大。
|
| 32 |
-
> 注意,sdp可能会不明原因炸显存。建议使用xformers。
|
| 33 |
-
3. **小波分解颜色修正**:
|
| 34 |
-
- StableSR官方实现有明显的颜色偏移,这一问题在分块放大时更加明显。
|
| 35 |
-
- 我实现了一个强大的后处理技术,有效地匹配放大图像与原图的颜色。请看[小波分解颜色修正例子](https://imgsli.com/MTgwNDg2/)。
|
| 36 |
-
|
| 37 |
-
***
|
| 38 |
-
## 使用
|
| 39 |
-
|
| 40 |
-
### 1. 安装
|
| 41 |
-
|
| 42 |
-
⚪ 方法 1: 官方市场
|
| 43 |
-
|
| 44 |
-
- 打开Automatic1111 WebUI -> 点击“扩展”选项卡 -> 点击“可用”选项卡 -> 找到“StableSR” -> 点击“安装”
|
| 45 |
-
|
| 46 |
-
⚪ 方法 2: URL 安装
|
| 47 |
-
|
| 48 |
-
- 打开 Automatic1111 WebUI -> 点击 "Extensions" 标签页 -> 点击 "Install from URL" 标签页 -> 输入 https://github.com/pkuliyi2015/sd-webui-stablesr.git -> 点击 "Install"
|
| 49 |
-
|
| 50 |
-

|
| 51 |
-
|
| 52 |
-
### 2. 必须模型
|
| 53 |
-
|
| 54 |
-
- 你必须使用 StabilityAI 官方的 Stable Diffusion V2.1 512 **EMA** 模型(约 5.21GB)
|
| 55 |
-
- 你可以从 [HuggingFace](https://huggingface.co/stabilityai/stable-diffusion-2-1-base) 下载
|
| 56 |
-
- 放入 stable-diffusion-webui/models/Stable-Diffusion/ 文件夹
|
| 57 |
-
> 虽然StableSR需要一个SD2.1的模型权重,但你仍然可以放大来自SD1.5的图片。NSFW图片不会被模型扭曲,输出质量也不会受到影响。
|
| 58 |
-
- 下载 StableSR 模块
|
| 59 |
-
- 官方资源:[HuggingFace](https://huggingface.co/Iceclear/StableSR/resolve/main/weibu_models.zip) (约1.2G)。请注意这是一个zip文件,同时包含StableSR模块和可选组件VQVAE.
|
| 60 |
-
- 我的资源:<[GoogleDrive](https://drive.google.com/file/d/1tWjkZQhfj07sHDR4r9Ta5Fk4iMp1t3Qw/view?usp=sharing)> <[百度网盘-提取码aguq](https://pan.baidu.com/s/1Nq_6ciGgKnTu0W14QcKKWg?pwd=aguq)>
|
| 61 |
-
- 把StableSR模块(约400M大小)放入 stable-diffusion-webui/extensions/sd-webui-stablesr/models/ 文件夹
|
| 62 |
-
|
| 63 |
-
### 3. 可选组件
|
| 64 |
-
|
| 65 |
-
- 安装 [Tiled Diffusion & VAE](https://github.com/pkuliyi2015/multidiffusion-upscaler-for-automatic1111) 扩展
|
| 66 |
-
- 原始的 StableSR 对大于 512 的大图像容易出现 OOM。
|
| 67 |
-
- 为了获得更好的质量和更少的 VRAM 使用,我们建议使用 Tiled Diffusion & VAE。
|
| 68 |
-
- 使用官方 VQGAN VAE
|
| 69 |
-
- 官方资源:同2中的链接
|
| 70 |
-
- 我的资源:<[GoogleDrive](https://drive.google.com/file/d/1ARtDMia3_CbwNsGxxGcZ5UP75W4PeIEI/view?usp=share_link)> <[百度网盘-提取码83u9](https://pan.baidu.com/s/1YCYmGBethR9JZ8-eypoIiQ?pwd=83u9)>
|
| 71 |
-
- 把VQVAE(约750MB大小)放在你的 stable-diffusion-webui/models/VAE 中
|
| 72 |
-
|
| 73 |
-
### 4. 扩展使用
|
| 74 |
-
|
| 75 |
-
- 在 WebUI 的顶部,选择你下载的 v2-1_512-ema-pruned 模型。
|
| 76 |
-
- 切换到 img2img 标签。在页面底部找到 "Scripts" 下拉列表。
|
| 77 |
-
- 选择 StableSR 脚本。
|
| 78 |
-
- 点击刷新按钮,选择你已下载的 StableSR 检查点。
|
| 79 |
-
- 选择一个放大因子。
|
| 80 |
-
- 上传你的图像并开始生成(无需提示也能工作)。
|
| 81 |
-
- 推荐使用 Euler a 采样器,CFG值<=2,步数 >= 20。
|
| 82 |
-
- 如果生成图像尺寸 > 512,我们推荐使用 Tiled Diffusion & VAE,否则,图像质量可能不理想,VRAM 使用量也会很大。
|
| 83 |
-
- 这里是官方推荐的 Tiled Diffusion 设置。
|
| 84 |
-
- 方法 = Mixture of Diffusers
|
| 85 |
-
- 隐空间Tile大小 = 64,隐空间Tile重叠 = 32
|
| 86 |
-
- Tile批大小尽可能大,直到差一点点就炸显存为止。
|
| 87 |
-
- Upscaler**必须**选择None。
|
| 88 |
-
- 下图是24GB显存的推荐设置。
|
| 89 |
-
- 对于4GB的设备,**只需将Tiled Diffusion Latent tile批处理大小改为1,Tiled VAE编码器Tile大小改为1024,解码器Tile大小改为128。**
|
| 90 |
-
- SDP注意力优化可能会导致OOM(内存不足),因此推荐使用xformers。
|
| 91 |
-
- 除非你有深入的理解,否则你**不要**改变Tiled Diffusion & Tiled VAE中的其他设置。**这些参数对于StableSR基本上是最优��。**
|
| 92 |
-

|
| 93 |
-
|
| 94 |
-
### 5. 参数解释
|
| 95 |
-
|
| 96 |
-
- 什么是 "Pure Noise"?
|
| 97 |
-
- Pure Noise也就是纯噪声,指的是从完全随机的噪声张量开始,而不是从你的图像开始。**这是 StableSR 论文中的默认做法。**
|
| 98 |
-
- 启用这个选项时,脚本会忽略你的重绘幅度设置。产出将会是更详细的图像,但也会显著改变颜色和锐度。
|
| 99 |
-
- 禁用这个选项时,脚本会开始添加一些噪声到你的图像。即使你将去噪强度设为1,结果也不会那么的细节(但可能更和谐好看)。参见 [对比图](https://imgsli.com/MTgwMTMx)。
|
| 100 |
-
- 如果禁用Pure Noise,推荐重绘幅度设置为1
|
| 101 |
-
- 什么是"颜色修正"?
|
| 102 |
-
- 这是为了缓解来自StableSR和Tile处理过程中的颜色偏移问题。
|
| 103 |
-
- AdaIN简单地匹配原图和结果图的颜色统计信息。这是StableSR官方算法,但常常效果不佳。
|
| 104 |
-
- Wavelet将原图和结果图分解为低频和高频,然后用原图的低频信息(颜色)替换掉结果图的低频信息。该算法对于不均匀的颜色偏移非常强力。算法来自GIMP和Krita,对每张图像需要几秒钟的时间。
|
| 105 |
-
- 启用颜色修正时,原图也会出现在您的预览窗口中,但不会被自动保存。
|
| 106 |
-
|
| 107 |
-
### 6. 重要问题
|
| 108 |
-
|
| 109 |
-
> 为什么我的结果和官方示例不同?
|
| 110 |
-
|
| 111 |
-
- 这不是你或我们的错。
|
| 112 |
-
- 如果正确安装,这个扩展有与 StableSR 相同的 UNet 模型权重。
|
| 113 |
-
- 如果你安装了可选的 VQVAE,整个模型权重将与融合权重为 0 的官方模型相同。
|
| 114 |
-
- 但是,你的结果将**不如**官方结果,因为:
|
| 115 |
-
- 采样器差异:
|
| 116 |
-
- 官方仓库进行 100 或 200 步的 legacy DDPM 采样,并使用自定义的时间步调度器,采样时不使用负提示。
|
| 117 |
-
- 然而,WebUI 不提供这样的采样器,必须带有负提示进行采样。**这是主要的差异。**
|
| 118 |
-
- VQVAE 解码器差异:
|
| 119 |
-
- 官方 VQVAE 解码器将一些编码器特征作为输入。
|
| 120 |
-
- 然而,在实践中,我发现这些特征对于大图像来说非常大。 (>10G 用于 4k 图像,即使是在 float16!)
|
| 121 |
-
- 因此,**我移除了 VAE 解码器中的 CFW 组件**。由于这导致了对细节的较低保真度,我将尝试将它作为一个选项添加回去。
|
| 122 |
-
|
| 123 |
-
***
|
| 124 |
-
## 许可
|
| 125 |
-
|
| 126 |
-
此项目在以下许可下授权:
|
| 127 |
-
|
| 128 |
-
- S-Lab License 1.0.
|
| 129 |
-
- [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License][cc-by-nc-sa],由于使用了 NVIDIA SPADE 模块。
|
| 130 |
-
|
| 131 |
-
[![CC BY-NC-SA 4.0][cc-by-nc-sa-image]][cc-by-nc-sa]
|
| 132 |
-
|
| 133 |
-
[cc-by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/
|
| 134 |
-
[cc-by-nc-sa-image]: https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png
|
| 135 |
-
[cc-by-nc-sa-shield]: https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg
|
| 136 |
-
|
| 137 |
-
### 免责声明
|
| 138 |
-
|
| 139 |
-
- 此扩展中的所有代码仅供研究目的。
|
| 140 |
-
- 严禁贩售代码和权重
|
| 141 |
-
|
| 142 |
-
### 产出图像的重要通知
|
| 143 |
-
|
| 144 |
-
- 请注意,NVIDIA SPADE 模块中的 CC BY-NC-SA 4.0 许可也禁止把产生的图像用于商业用途。
|
| 145 |
-
- Jianyi Wang 可能会将 SPADE 模块更改为商业友好的一个,但他很忙。
|
| 146 |
-
- 如果你希望**加快**他的进度,请通过电子邮件与他联系:iceclearwjy@gmail.com
|
| 147 |
-
|
| 148 |
-
## 致谢
|
| 149 |
-
|
| 150 |
-
感谢 Jianyi Wang 等人提出的 StableSR 方法
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/scripts/__pycache__/stablesr.cpython-310.pyc
DELETED
|
Binary file (10.2 kB)
|
|
|
sd-webui-stablesr/scripts/stablesr.py
DELETED
|
@@ -1,276 +0,0 @@
|
|
| 1 |
-
'''
|
| 2 |
-
# --------------------------------------------------------------------------------
|
| 3 |
-
#
|
| 4 |
-
# StableSR for Automatic1111 WebUI
|
| 5 |
-
#
|
| 6 |
-
# Introducing state-of-the super-resolution method: StableSR!
|
| 7 |
-
# Techniques is originally proposed by my schoolmate Jianyi Wang et, al.
|
| 8 |
-
#
|
| 9 |
-
# Project Page: https://iceclear.github.io/projects/stablesr/
|
| 10 |
-
# Official Repo: https://github.com/IceClear/StableSR
|
| 11 |
-
# Paper: https://arxiv.org/abs/2305.07015
|
| 12 |
-
#
|
| 13 |
-
# @original author: Jianyi Wang et, al.
|
| 14 |
-
# @migration: LI YI
|
| 15 |
-
# @organization: Nanyang Technological University - Singapore
|
| 16 |
-
# @date: 2023-05-20
|
| 17 |
-
# @license:
|
| 18 |
-
# S-Lab License 1.0 (see LICENSE file)
|
| 19 |
-
# CC BY-NC-SA 4.0 (required by NVIDIA SPADE module)
|
| 20 |
-
#
|
| 21 |
-
# @disclaimer:
|
| 22 |
-
# All code in this extension is for research purpose only.
|
| 23 |
-
# The commercial use of the code & checkpoint is strictly prohibited.
|
| 24 |
-
#
|
| 25 |
-
# --------------------------------------------------------------------------------
|
| 26 |
-
#
|
| 27 |
-
# IMPORTANT NOTICE FOR OUTCOME IMAGES:
|
| 28 |
-
# - Please be aware that the CC BY-NC-SA 4.0 license in SPADE module
|
| 29 |
-
# also prohibits the commercial use of outcome images.
|
| 30 |
-
# - Jianyi Wang may change the SPADE module to a commercial-friendly one.
|
| 31 |
-
# If you want to use the outcome images for commercial purposes, please
|
| 32 |
-
# contact Jianyi Wang for more information.
|
| 33 |
-
#
|
| 34 |
-
# Please give me a star (and also Jianyi's repo) if you like this project!
|
| 35 |
-
#
|
| 36 |
-
# --------------------------------------------------------------------------------
|
| 37 |
-
'''
|
| 38 |
-
|
| 39 |
-
import os
|
| 40 |
-
import torch
|
| 41 |
-
import gradio as gr
|
| 42 |
-
import numpy as np
|
| 43 |
-
import PIL.Image as Image
|
| 44 |
-
|
| 45 |
-
from pathlib import Path
|
| 46 |
-
from torch import Tensor
|
| 47 |
-
from tqdm import tqdm
|
| 48 |
-
|
| 49 |
-
from modules import scripts, processing, sd_samplers, devices, images, shared
|
| 50 |
-
from modules.processing import StableDiffusionProcessingImg2Img, Processed
|
| 51 |
-
from modules.shared import opts
|
| 52 |
-
from ldm.modules.diffusionmodules.openaimodel import UNetModel
|
| 53 |
-
|
| 54 |
-
from srmodule.spade import SPADELayers
|
| 55 |
-
from srmodule.struct_cond import EncoderUNetModelWT, build_unetwt
|
| 56 |
-
from srmodule.colorfix import adain_color_fix, wavelet_color_fix
|
| 57 |
-
|
| 58 |
-
SD_WEBUI_PATH = Path.cwd()
|
| 59 |
-
ME_PATH = SD_WEBUI_PATH / 'extensions' / 'sd-webui-stablesr'
|
| 60 |
-
MODEL_PATH = ME_PATH / 'models'
|
| 61 |
-
FORWARD_CACHE_NAME = 'org_forward_stablesr'
|
| 62 |
-
|
| 63 |
-
class StableSR:
|
| 64 |
-
def __init__(self, path, dtype, device):
|
| 65 |
-
state_dict = torch.load(path, map_location='cpu')
|
| 66 |
-
self.struct_cond_model: EncoderUNetModelWT = build_unetwt()
|
| 67 |
-
self.spade_layers: SPADELayers = SPADELayers()
|
| 68 |
-
self.struct_cond_model.load_from_dict(state_dict)
|
| 69 |
-
self.spade_layers.load_from_dict(state_dict)
|
| 70 |
-
del state_dict
|
| 71 |
-
self.struct_cond_model.apply(lambda x: x.to(dtype=dtype, device=device))
|
| 72 |
-
self.spade_layers.apply(lambda x: x.to(dtype=dtype, device=device))
|
| 73 |
-
|
| 74 |
-
self.latent_image: Tensor = None
|
| 75 |
-
self.set_image_hooks = {}
|
| 76 |
-
self.struct_cond: Tensor = None
|
| 77 |
-
|
| 78 |
-
def set_latent_image(self, latent_image):
|
| 79 |
-
self.latent_image = latent_image
|
| 80 |
-
for hook in self.set_image_hooks.values():
|
| 81 |
-
hook(latent_image)
|
| 82 |
-
|
| 83 |
-
def hook(self, unet: UNetModel):
|
| 84 |
-
# hook unet to set the struct_cond
|
| 85 |
-
if not hasattr(unet, FORWARD_CACHE_NAME):
|
| 86 |
-
setattr(unet, FORWARD_CACHE_NAME, unet.forward)
|
| 87 |
-
|
| 88 |
-
def unet_forward(x, timesteps=None, context=None, y=None,**kwargs):
|
| 89 |
-
self.latent_image = self.latent_image.to(x.device)
|
| 90 |
-
# Ensure the device of all modules layers is the same as the unet
|
| 91 |
-
# This will fix the issue when user use --medvram or --lowvram
|
| 92 |
-
self.spade_layers.to(x.device)
|
| 93 |
-
self.struct_cond_model.to(x.device)
|
| 94 |
-
timesteps = timesteps.to(x.device)
|
| 95 |
-
self.struct_cond = None # mitigate vram peak
|
| 96 |
-
self.struct_cond = self.struct_cond_model(self.latent_image, timesteps[:self.latent_image.shape[0]])
|
| 97 |
-
return getattr(unet, FORWARD_CACHE_NAME)(x, timesteps, context, y, **kwargs)
|
| 98 |
-
|
| 99 |
-
unet.forward = unet_forward
|
| 100 |
-
|
| 101 |
-
self.spade_layers.hook(unet, lambda: self.struct_cond)
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
def unhook(self, unet: UNetModel):
|
| 105 |
-
# clean up cache
|
| 106 |
-
self.latent_image = None
|
| 107 |
-
self.struct_cond = None
|
| 108 |
-
self.set_image_hooks = {}
|
| 109 |
-
# unhook unet forward
|
| 110 |
-
if hasattr(unet, FORWARD_CACHE_NAME):
|
| 111 |
-
unet.forward = getattr(unet, FORWARD_CACHE_NAME)
|
| 112 |
-
delattr(unet, FORWARD_CACHE_NAME)
|
| 113 |
-
|
| 114 |
-
# unhook spade layers
|
| 115 |
-
self.spade_layers.unhook()
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
class Script(scripts.Script):
|
| 119 |
-
def __init__(self) -> None:
|
| 120 |
-
self.model_list = {}
|
| 121 |
-
self.load_model_list()
|
| 122 |
-
self.last_path = None
|
| 123 |
-
self.stablesr_model: StableSR = None
|
| 124 |
-
|
| 125 |
-
def load_model_list(self):
|
| 126 |
-
# traverse the CFG_PATH and add all files to the model list
|
| 127 |
-
self.model_list = {}
|
| 128 |
-
if not MODEL_PATH.exists():
|
| 129 |
-
MODEL_PATH.mkdir()
|
| 130 |
-
for file in MODEL_PATH.iterdir():
|
| 131 |
-
if file.is_file():
|
| 132 |
-
# save tha absolute path
|
| 133 |
-
self.model_list[file.name] = str(file.absolute())
|
| 134 |
-
self.model_list['None'] = None
|
| 135 |
-
|
| 136 |
-
def title(self):
|
| 137 |
-
return "StableSR"
|
| 138 |
-
|
| 139 |
-
def show(self, is_img2img):
|
| 140 |
-
return is_img2img
|
| 141 |
-
|
| 142 |
-
def ui(self, is_img2img):
|
| 143 |
-
with gr.Row():
|
| 144 |
-
model = gr.Dropdown(list(self.model_list.keys()), label="SR Model")
|
| 145 |
-
refresh = gr.Button(value='↻', variant='tool')
|
| 146 |
-
def refresh_fn(selected):
|
| 147 |
-
self.load_model_list()
|
| 148 |
-
if selected not in self.model_list:
|
| 149 |
-
selected = 'None'
|
| 150 |
-
return gr.Dropdown.update(value=selected, choices=list(self.model_list.keys()))
|
| 151 |
-
refresh.click(fn=refresh_fn,inputs=model, outputs=model)
|
| 152 |
-
with gr.Row():
|
| 153 |
-
scale_factor = gr.Slider(minimum=1, maximum=16, step=0.1, value=2, label='Scale Factor', elem_id=f'StableSR-scale')
|
| 154 |
-
with gr.Row():
|
| 155 |
-
color_fix = gr.Dropdown(['None', 'Wavelet', 'AdaIN'], label="Color Fix", value='Wavelet', elem_id=f'StableSR-color-fix')
|
| 156 |
-
save_original = gr.Checkbox(label='Save Original', value=False, elem_id=f'StableSR-save-original', visible=color_fix.value != 'None')
|
| 157 |
-
color_fix.change(fn=lambda selected: gr.Checkbox.update(visible=selected != 'None'), inputs=color_fix, outputs=save_original, show_progress=False)
|
| 158 |
-
pure_noise = gr.Checkbox(label='Pure Noise', value=True, elem_id=f'StableSR-pure-noise')
|
| 159 |
-
unload_model= gr.Button(value='Unload Model', variant='tool')
|
| 160 |
-
def unload_model_fn():
|
| 161 |
-
if self.stablesr_model is not None:
|
| 162 |
-
self.stablesr_model = None
|
| 163 |
-
devices.torch_gc()
|
| 164 |
-
print('[StableSR] Model unloaded!')
|
| 165 |
-
else:
|
| 166 |
-
print('[StableSR] No model loaded.')
|
| 167 |
-
unload_model.click(fn=unload_model_fn)
|
| 168 |
-
return [model, scale_factor, pure_noise, color_fix, save_original]
|
| 169 |
-
|
| 170 |
-
def run(self, p: StableDiffusionProcessingImg2Img, model: str, scale_factor:float, pure_noise: bool, color_fix:str, save_original:bool) -> Processed:
|
| 171 |
-
|
| 172 |
-
if model == 'None':
|
| 173 |
-
# do clean up
|
| 174 |
-
self.stablesr_model = None
|
| 175 |
-
self.last_model_path = None
|
| 176 |
-
return
|
| 177 |
-
|
| 178 |
-
if model not in self.model_list:
|
| 179 |
-
raise gr.Error(f"Model {model} is not in the list! Please refresh your browser!")
|
| 180 |
-
|
| 181 |
-
if not os.path.exists(self.model_list[model]):
|
| 182 |
-
raise gr.Error(f"Model {model} is not on your disk! Please refresh the model list!")
|
| 183 |
-
|
| 184 |
-
if color_fix not in ['None', 'Wavelet', 'AdaIN']:
|
| 185 |
-
print(f'[StableSR] Invalid color fix method: {color_fix}')
|
| 186 |
-
color_fix = 'None'
|
| 187 |
-
|
| 188 |
-
# upscale the image, set the ouput size
|
| 189 |
-
init_img: Image = p.init_images[0]
|
| 190 |
-
target_width = int(init_img.width * scale_factor)
|
| 191 |
-
target_height = int(init_img.height * scale_factor)
|
| 192 |
-
# if the target width is not dividable by 8, then round it up
|
| 193 |
-
if target_width % 8 != 0:
|
| 194 |
-
target_width = target_width + 8 - target_width % 8
|
| 195 |
-
# if the target height is not dividable by 8, then round it up
|
| 196 |
-
if target_height % 8 != 0:
|
| 197 |
-
target_height = target_height + 8 - target_height % 8
|
| 198 |
-
init_img = init_img.resize((target_width, target_height), Image.LANCZOS)
|
| 199 |
-
p.init_images[0] = init_img
|
| 200 |
-
p.width = init_img.width
|
| 201 |
-
p.height = init_img.height
|
| 202 |
-
|
| 203 |
-
print('[StableSR] Target image size: {}x{}'.format(init_img.width, init_img.height))
|
| 204 |
-
|
| 205 |
-
first_param = shared.sd_model.parameters().__next__()
|
| 206 |
-
if self.last_path != self.model_list[model]:
|
| 207 |
-
# load the model
|
| 208 |
-
self.stablesr_model = None
|
| 209 |
-
|
| 210 |
-
if self.stablesr_model is None:
|
| 211 |
-
self.stablesr_model = StableSR(self.model_list[model], dtype=first_param.dtype, device=first_param.device)
|
| 212 |
-
self.last_path = self.model_list[model]
|
| 213 |
-
|
| 214 |
-
def sample_custom(conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
|
| 215 |
-
try:
|
| 216 |
-
unet: UNetModel = shared.sd_model.model.diffusion_model
|
| 217 |
-
self.stablesr_model.hook(unet)
|
| 218 |
-
self.stablesr_model.set_latent_image(p.init_latent)
|
| 219 |
-
x = processing.create_random_tensors(p.init_latent.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength, seed_resize_from_h=p.seed_resize_from_h, seed_resize_from_w=p.seed_resize_from_w, p=p)
|
| 220 |
-
sampler = sd_samplers.create_sampler(p.sampler_name, p.sd_model)
|
| 221 |
-
if pure_noise:
|
| 222 |
-
# NOTE: use txt2img instead of img2img sampling
|
| 223 |
-
samples = sampler.sample(p, x, conditioning, unconditional_conditioning, image_conditioning=p.image_conditioning)
|
| 224 |
-
else:
|
| 225 |
-
if p.initial_noise_multiplier != 1.0:
|
| 226 |
-
p.extra_generation_params["Noise multiplier"] =p.initial_noise_multiplier
|
| 227 |
-
x *= p.initial_noise_multiplier
|
| 228 |
-
samples = sampler.sample_img2img(p, p.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=p.image_conditioning)
|
| 229 |
-
|
| 230 |
-
if p.mask is not None:
|
| 231 |
-
samples = samples * p.nmask + p.init_latent * p.mask
|
| 232 |
-
del x
|
| 233 |
-
devices.torch_gc()
|
| 234 |
-
return samples
|
| 235 |
-
finally:
|
| 236 |
-
self.stablesr_model.unhook(unet)
|
| 237 |
-
# in --medvram and --lowvram mode, we send the model back to the initial device
|
| 238 |
-
self.stablesr_model.struct_cond_model.to(device=first_param.device)
|
| 239 |
-
self.stablesr_model.spade_layers.to(device=first_param.device)
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
# replace the sample function
|
| 243 |
-
p.sample = sample_custom
|
| 244 |
-
|
| 245 |
-
if color_fix != 'None':
|
| 246 |
-
p.do_not_save_samples = True
|
| 247 |
-
|
| 248 |
-
result: Processed = processing.process_images(p)
|
| 249 |
-
|
| 250 |
-
if color_fix != 'None':
|
| 251 |
-
|
| 252 |
-
fixed_images = []
|
| 253 |
-
# fix the color
|
| 254 |
-
color_fix_func = wavelet_color_fix if color_fix == 'Wavelet' else adain_color_fix
|
| 255 |
-
for i in range(len(result.images)):
|
| 256 |
-
try:
|
| 257 |
-
fixed_images.append(color_fix_func(result.images[i], init_img))
|
| 258 |
-
except Exception as e:
|
| 259 |
-
print(f'[StableSR] Error fixing color with default method: {e}')
|
| 260 |
-
|
| 261 |
-
# save the fixed color images
|
| 262 |
-
for i in range(len(fixed_images)):
|
| 263 |
-
try:
|
| 264 |
-
images.save_image(fixed_images[i], p.outpath_samples, "", p.all_seeds[i], p.all_prompts[i], opts.samples_format, info=result.infotexts[i], p=p)
|
| 265 |
-
except Exception as e:
|
| 266 |
-
print(f'[StableSR] Error saving color fixed image: {e}')
|
| 267 |
-
|
| 268 |
-
if save_original:
|
| 269 |
-
for i in range(len(result.images)):
|
| 270 |
-
try:
|
| 271 |
-
images.save_image(result.images[i], p.outpath_samples, "", p.all_seeds[i], p.all_prompts[i], opts.samples_format, info=result.infotexts[i], p=p, suffix="-before-color-fix")
|
| 272 |
-
except Exception as e:
|
| 273 |
-
print(f'[StableSR] Error saving original image: {e}')
|
| 274 |
-
result.images = result.images + fixed_images
|
| 275 |
-
|
| 276 |
-
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/srmodule/__pycache__/attn.cpython-310.pyc
DELETED
|
Binary file (3.29 kB)
|
|
|
sd-webui-stablesr/srmodule/__pycache__/colorfix.cpython-310.pyc
DELETED
|
Binary file (3.41 kB)
|
|
|
sd-webui-stablesr/srmodule/__pycache__/spade.cpython-310.pyc
DELETED
|
Binary file (5.54 kB)
|
|
|
sd-webui-stablesr/srmodule/__pycache__/struct_cond.cpython-310.pyc
DELETED
|
Binary file (7.5 kB)
|
|
|
sd-webui-stablesr/srmodule/attn.py
DELETED
|
@@ -1,111 +0,0 @@
|
|
| 1 |
-
'''
|
| 2 |
-
This file is modified from the TiledVAE attn.py, so that the StableSR can save much VRAM.
|
| 3 |
-
'''
|
| 4 |
-
import math
|
| 5 |
-
import torch
|
| 6 |
-
|
| 7 |
-
from modules import shared, sd_hijack
|
| 8 |
-
from modules.sd_hijack_optimizations import get_available_vram, get_xformers_flash_attention_op, sub_quad_attention
|
| 9 |
-
|
| 10 |
-
try:
|
| 11 |
-
import xformers
|
| 12 |
-
import xformers.ops
|
| 13 |
-
except ImportError:
|
| 14 |
-
pass
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
def get_attn_func():
|
| 18 |
-
method = sd_hijack.model_hijack.optimization_method
|
| 19 |
-
if method is None:
|
| 20 |
-
return attn_forward
|
| 21 |
-
method = method.lower()
|
| 22 |
-
# The method should be one of the following:
|
| 23 |
-
# ['none', 'sdp-no-mem', 'sdp', 'xformers', ''sub-quadratic', 'v1', 'invokeai', 'doggettx']
|
| 24 |
-
if method not in ['none', 'sdp-no-mem', 'sdp', 'xformers', 'sub-quadratic', 'v1', 'invokeai', 'doggettx']:
|
| 25 |
-
print(f"[StableSR] Warning: Unknown attention optimization method {method}. Please try to update the extension.")
|
| 26 |
-
return attn_forward
|
| 27 |
-
|
| 28 |
-
if method == 'none':
|
| 29 |
-
return attn_forward
|
| 30 |
-
elif method == 'xformers':
|
| 31 |
-
return xformers_attnblock_forward
|
| 32 |
-
elif method == 'sdp-no-mem':
|
| 33 |
-
return sdp_no_mem_attnblock_forward
|
| 34 |
-
elif method == 'sdp':
|
| 35 |
-
return sdp_attnblock_forward
|
| 36 |
-
elif method == 'sub-quadratic':
|
| 37 |
-
return sub_quad_attnblock_forward
|
| 38 |
-
elif method == 'doggettx':
|
| 39 |
-
return cross_attention_attnblock_forward
|
| 40 |
-
|
| 41 |
-
return attn_forward
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
# The following functions are all copied from modules.sd_hijack_optimizations
|
| 45 |
-
# However, the residual & normalization are removed and computed separately.
|
| 46 |
-
|
| 47 |
-
def attn_forward(q, k, v):
|
| 48 |
-
# compute attention
|
| 49 |
-
# q: b,hw,c
|
| 50 |
-
k = k.permute(0, 2, 1) # b,c,hw
|
| 51 |
-
c = k.shape[1]
|
| 52 |
-
w_ = torch.bmm(q, k) # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
|
| 53 |
-
w_ = w_ * (int(c)**(-0.5))
|
| 54 |
-
w_ = torch.nn.functional.softmax(w_, dim=2)
|
| 55 |
-
|
| 56 |
-
# attend to values
|
| 57 |
-
v = v.permute(0, 2, 1) # b,c,hw
|
| 58 |
-
w_ = w_.permute(0, 2, 1) # b,hw,hw (first hw of k, second of q)
|
| 59 |
-
# b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
|
| 60 |
-
h_ = torch.bmm(v, w_)
|
| 61 |
-
|
| 62 |
-
return h_.permute(0, 2, 1)
|
| 63 |
-
|
| 64 |
-
def xformers_attnblock_forward(q, k, v):
|
| 65 |
-
return xformers.ops.memory_efficient_attention(q, k, v, op=get_xformers_flash_attention_op(q, k, v))
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
def cross_attention_attnblock_forward(q, k, v):
|
| 69 |
-
# compute attention
|
| 70 |
-
k = k.permute(0, 2, 1)# b,c,hw
|
| 71 |
-
v = v.permute(0, 2, 1)# b,c,hw
|
| 72 |
-
c = k.shape[1]
|
| 73 |
-
h_ = torch.zeros_like(k, device=q.device)
|
| 74 |
-
|
| 75 |
-
mem_free_total = get_available_vram()
|
| 76 |
-
|
| 77 |
-
tensor_size = q.shape[0] * q.shape[1] * k.shape[2] * q.element_size()
|
| 78 |
-
mem_required = tensor_size * 2.5
|
| 79 |
-
steps = 1
|
| 80 |
-
|
| 81 |
-
if mem_required > mem_free_total:
|
| 82 |
-
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
| 83 |
-
|
| 84 |
-
slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]
|
| 85 |
-
for i in range(0, q.shape[1], slice_size):
|
| 86 |
-
end = i + slice_size
|
| 87 |
-
|
| 88 |
-
w1 = torch.bmm(q[:, i:end], k) # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
|
| 89 |
-
w2 = w1 * (int(c)**(-0.5))
|
| 90 |
-
del w1
|
| 91 |
-
w3 = torch.nn.functional.softmax(w2, dim=2, dtype=q.dtype)
|
| 92 |
-
del w2
|
| 93 |
-
|
| 94 |
-
# attend to values
|
| 95 |
-
w4 = w3.permute(0, 2, 1) # b,hw,hw (first hw of k, second of q)
|
| 96 |
-
del w3
|
| 97 |
-
|
| 98 |
-
h_[:, :, i:end] = torch.bmm(v, w4) # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
|
| 99 |
-
del w4
|
| 100 |
-
|
| 101 |
-
return h_.permute(0, 2, 1)
|
| 102 |
-
|
| 103 |
-
def sdp_no_mem_attnblock_forward(q, k, v):
|
| 104 |
-
with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=False):
|
| 105 |
-
return sdp_attnblock_forward(q, k, v)
|
| 106 |
-
|
| 107 |
-
def sdp_attnblock_forward(q, k, v):
|
| 108 |
-
return torch.nn.functional.scaled_dot_product_attention(q, k, v, dropout_p=0.0, is_causal=False)
|
| 109 |
-
|
| 110 |
-
def sub_quad_attnblock_forward(q, k, v):
|
| 111 |
-
return sub_quad_attention(q, k, v, q_chunk_size=shared.cmd_opts.sub_quad_q_chunk_size, kv_chunk_size=shared.cmd_opts.sub_quad_kv_chunk_size, chunk_threshold=shared.cmd_opts.sub_quad_chunk_threshold, use_checkpoint=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/srmodule/colorfix.py
DELETED
|
@@ -1,114 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
from PIL import Image
|
| 3 |
-
from torch import Tensor
|
| 4 |
-
from torch.nn import functional as F
|
| 5 |
-
|
| 6 |
-
from torchvision.transforms import ToTensor, ToPILImage
|
| 7 |
-
|
| 8 |
-
def adain_color_fix(target: Image, source: Image):
|
| 9 |
-
# Convert images to tensors
|
| 10 |
-
to_tensor = ToTensor()
|
| 11 |
-
target_tensor = to_tensor(target).unsqueeze(0)
|
| 12 |
-
source_tensor = to_tensor(source).unsqueeze(0)
|
| 13 |
-
|
| 14 |
-
# Apply adaptive instance normalization
|
| 15 |
-
result_tensor = adaptive_instance_normalization(target_tensor, source_tensor)
|
| 16 |
-
|
| 17 |
-
# Convert tensor back to image
|
| 18 |
-
to_image = ToPILImage()
|
| 19 |
-
result_image = to_image(result_tensor.squeeze(0).clamp_(0.0, 1.0))
|
| 20 |
-
|
| 21 |
-
return result_image
|
| 22 |
-
|
| 23 |
-
def wavelet_color_fix(target: Image, source: Image):
|
| 24 |
-
# Convert images to tensors
|
| 25 |
-
to_tensor = ToTensor()
|
| 26 |
-
target_tensor = to_tensor(target).unsqueeze(0)
|
| 27 |
-
source_tensor = to_tensor(source).unsqueeze(0)
|
| 28 |
-
|
| 29 |
-
# Apply wavelet reconstruction
|
| 30 |
-
result_tensor = wavelet_reconstruction(target_tensor, source_tensor)
|
| 31 |
-
|
| 32 |
-
# Convert tensor back to image
|
| 33 |
-
to_image = ToPILImage()
|
| 34 |
-
result_image = to_image(result_tensor.squeeze(0).clamp_(0.0, 1.0))
|
| 35 |
-
|
| 36 |
-
return result_image
|
| 37 |
-
|
| 38 |
-
def calc_mean_std(feat: Tensor, eps=1e-5):
|
| 39 |
-
"""Calculate mean and std for adaptive_instance_normalization.
|
| 40 |
-
Args:
|
| 41 |
-
feat (Tensor): 4D tensor.
|
| 42 |
-
eps (float): A small value added to the variance to avoid
|
| 43 |
-
divide-by-zero. Default: 1e-5.
|
| 44 |
-
"""
|
| 45 |
-
size = feat.size()
|
| 46 |
-
assert len(size) == 4, 'The input feature should be 4D tensor.'
|
| 47 |
-
b, c = size[:2]
|
| 48 |
-
feat_var = feat.view(b, c, -1).var(dim=2) + eps
|
| 49 |
-
feat_std = feat_var.sqrt().view(b, c, 1, 1)
|
| 50 |
-
feat_mean = feat.view(b, c, -1).mean(dim=2).view(b, c, 1, 1)
|
| 51 |
-
return feat_mean, feat_std
|
| 52 |
-
|
| 53 |
-
def adaptive_instance_normalization(content_feat:Tensor, style_feat:Tensor):
|
| 54 |
-
"""Adaptive instance normalization.
|
| 55 |
-
Adjust the reference features to have the similar color and illuminations
|
| 56 |
-
as those in the degradate features.
|
| 57 |
-
Args:
|
| 58 |
-
content_feat (Tensor): The reference feature.
|
| 59 |
-
style_feat (Tensor): The degradate features.
|
| 60 |
-
"""
|
| 61 |
-
size = content_feat.size()
|
| 62 |
-
style_mean, style_std = calc_mean_std(style_feat)
|
| 63 |
-
content_mean, content_std = calc_mean_std(content_feat)
|
| 64 |
-
normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size)
|
| 65 |
-
return normalized_feat * style_std.expand(size) + style_mean.expand(size)
|
| 66 |
-
|
| 67 |
-
def wavelet_blur(image: Tensor, radius: int):
|
| 68 |
-
"""
|
| 69 |
-
Apply wavelet blur to the input tensor.
|
| 70 |
-
"""
|
| 71 |
-
# input shape: (1, 3, H, W)
|
| 72 |
-
# convolution kernel
|
| 73 |
-
kernel_vals = [
|
| 74 |
-
[0.0625, 0.125, 0.0625],
|
| 75 |
-
[0.125, 0.25, 0.125],
|
| 76 |
-
[0.0625, 0.125, 0.0625],
|
| 77 |
-
]
|
| 78 |
-
kernel = torch.tensor(kernel_vals, dtype=image.dtype, device=image.device)
|
| 79 |
-
# add channel dimensions to the kernel to make it a 4D tensor
|
| 80 |
-
kernel = kernel[None, None]
|
| 81 |
-
# repeat the kernel across all input channels
|
| 82 |
-
kernel = kernel.repeat(3, 1, 1, 1)
|
| 83 |
-
image = F.pad(image, (radius, radius, radius, radius), mode='replicate')
|
| 84 |
-
# apply convolution
|
| 85 |
-
output = F.conv2d(image, kernel, groups=3, dilation=radius)
|
| 86 |
-
return output
|
| 87 |
-
|
| 88 |
-
def wavelet_decomposition(image: Tensor, levels=5):
|
| 89 |
-
"""
|
| 90 |
-
Apply wavelet decomposition to the input tensor.
|
| 91 |
-
This function only returns the low frequency & the high frequency.
|
| 92 |
-
"""
|
| 93 |
-
high_freq = torch.zeros_like(image)
|
| 94 |
-
for i in range(levels):
|
| 95 |
-
radius = 2 ** i
|
| 96 |
-
low_freq = wavelet_blur(image, radius)
|
| 97 |
-
high_freq += (image - low_freq)
|
| 98 |
-
image = low_freq
|
| 99 |
-
|
| 100 |
-
return high_freq, low_freq
|
| 101 |
-
|
| 102 |
-
def wavelet_reconstruction(content_feat:Tensor, style_feat:Tensor):
|
| 103 |
-
"""
|
| 104 |
-
Apply wavelet decomposition, so that the content will have the same color as the style.
|
| 105 |
-
"""
|
| 106 |
-
# calculate the wavelet decomposition of the content feature
|
| 107 |
-
content_high_freq, content_low_freq = wavelet_decomposition(content_feat)
|
| 108 |
-
del content_low_freq
|
| 109 |
-
# calculate the wavelet decomposition of the style feature
|
| 110 |
-
style_high_freq, style_low_freq = wavelet_decomposition(style_feat)
|
| 111 |
-
del style_high_freq
|
| 112 |
-
# reconstruct the content feature with the style's high frequency
|
| 113 |
-
return content_high_freq + style_low_freq
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/srmodule/spade.py
DELETED
|
@@ -1,206 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Copyright (C) 2019 NVIDIA Corporation. All rights reserved.
|
| 3 |
-
Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
import re
|
| 7 |
-
import torch
|
| 8 |
-
import torch.nn as nn
|
| 9 |
-
|
| 10 |
-
from ldm.modules.diffusionmodules.util import normalization, checkpoint
|
| 11 |
-
from ldm.modules.diffusionmodules.openaimodel import ResBlock, UNetModel
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
class SPADE(nn.Module):
|
| 15 |
-
def __init__(self, norm_nc, label_nc=256, config_text='spadeinstance3x3'):
|
| 16 |
-
super().__init__()
|
| 17 |
-
assert config_text.startswith('spade')
|
| 18 |
-
parsed = re.search('spade(\D+)(\d)x\d', config_text)
|
| 19 |
-
ks = int(parsed.group(2))
|
| 20 |
-
self.param_free_norm = normalization(norm_nc)
|
| 21 |
-
|
| 22 |
-
# The dimension of the intermediate embedding space. Yes, hardcoded.
|
| 23 |
-
nhidden = 128
|
| 24 |
-
|
| 25 |
-
pw = ks // 2
|
| 26 |
-
self.mlp_shared = nn.Sequential(
|
| 27 |
-
nn.Conv2d(label_nc, nhidden, kernel_size=ks, padding=pw),
|
| 28 |
-
nn.ReLU()
|
| 29 |
-
)
|
| 30 |
-
self.mlp_gamma = nn.Conv2d(nhidden, norm_nc, kernel_size=ks, padding=pw)
|
| 31 |
-
self.mlp_beta = nn.Conv2d(nhidden, norm_nc, kernel_size=ks, padding=pw)
|
| 32 |
-
|
| 33 |
-
def forward(self, x_dic, segmap_dic):
|
| 34 |
-
return checkpoint(
|
| 35 |
-
self._forward, (x_dic, segmap_dic), self.parameters(), True
|
| 36 |
-
)
|
| 37 |
-
|
| 38 |
-
def _forward(self, x_dic, segmap_dic):
|
| 39 |
-
segmap = segmap_dic[str(x_dic.size(-1))]
|
| 40 |
-
x = x_dic
|
| 41 |
-
|
| 42 |
-
# Part 1. generate parameter-free normalized activations
|
| 43 |
-
normalized = self.param_free_norm(x)
|
| 44 |
-
|
| 45 |
-
# Part 2. produce scaling and bias conditioned on semantic map
|
| 46 |
-
# segmap = F.interpolate(segmap, size=x.size()[2:], mode='nearest')
|
| 47 |
-
actv = self.mlp_shared(segmap)
|
| 48 |
-
|
| 49 |
-
repeat_factor = normalized.shape[0]//segmap.shape[0]
|
| 50 |
-
if repeat_factor > 1:
|
| 51 |
-
out = normalized
|
| 52 |
-
out *= (1 + self.mlp_gamma(actv).repeat_interleave(repeat_factor, dim=0))
|
| 53 |
-
out += self.mlp_beta(actv).repeat_interleave(repeat_factor, dim=0)
|
| 54 |
-
else:
|
| 55 |
-
out = normalized
|
| 56 |
-
out *= (1 + self.mlp_gamma(actv))
|
| 57 |
-
out += self.mlp_beta(actv)
|
| 58 |
-
return out
|
| 59 |
-
|
| 60 |
-
def dual_resblock_forward(self: ResBlock, x, emb, spade: SPADE, get_struct_cond):
|
| 61 |
-
if self.updown:
|
| 62 |
-
in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1]
|
| 63 |
-
h = in_rest(x)
|
| 64 |
-
h = self.h_upd(h)
|
| 65 |
-
x = self.x_upd(x)
|
| 66 |
-
h = in_conv(h)
|
| 67 |
-
else:
|
| 68 |
-
h = self.in_layers(x)
|
| 69 |
-
emb_out = self.emb_layers(emb).type(h.dtype)
|
| 70 |
-
while len(emb_out.shape) < len(h.shape):
|
| 71 |
-
emb_out = emb_out[..., None]
|
| 72 |
-
if self.use_scale_shift_norm:
|
| 73 |
-
out_norm, out_rest = self.out_layers[0], self.out_layers[1:]
|
| 74 |
-
scale, shift = torch.chunk(emb_out, 2, dim=1)
|
| 75 |
-
h = out_norm(h) * (1 + scale) + shift
|
| 76 |
-
h = out_rest(h)
|
| 77 |
-
else:
|
| 78 |
-
h = h + emb_out
|
| 79 |
-
h = self.out_layers(h)
|
| 80 |
-
h = spade(h, get_struct_cond())
|
| 81 |
-
return self.skip_connection(x) + h
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
class SPADELayers(nn.Module):
|
| 85 |
-
def __init__(self):
|
| 86 |
-
'''
|
| 87 |
-
A container class for fast SPADE layer loading.
|
| 88 |
-
params inferred from the official checkpoint
|
| 89 |
-
'''
|
| 90 |
-
super().__init__()
|
| 91 |
-
self.input_blocks = nn.ModuleList([
|
| 92 |
-
nn.Identity(),
|
| 93 |
-
SPADE(320),
|
| 94 |
-
SPADE(320),
|
| 95 |
-
nn.Identity(),
|
| 96 |
-
SPADE(640),
|
| 97 |
-
SPADE(640),
|
| 98 |
-
nn.Identity(),
|
| 99 |
-
SPADE(1280),
|
| 100 |
-
SPADE(1280),
|
| 101 |
-
nn.Identity(),
|
| 102 |
-
SPADE(1280),
|
| 103 |
-
SPADE(1280),
|
| 104 |
-
])
|
| 105 |
-
self.middle_block = nn.ModuleList([
|
| 106 |
-
SPADE(1280),
|
| 107 |
-
nn.Identity(),
|
| 108 |
-
SPADE(1280),
|
| 109 |
-
])
|
| 110 |
-
self.output_blocks = nn.ModuleList([
|
| 111 |
-
SPADE(1280),
|
| 112 |
-
SPADE(1280),
|
| 113 |
-
SPADE(1280),
|
| 114 |
-
SPADE(1280),
|
| 115 |
-
SPADE(1280),
|
| 116 |
-
SPADE(1280),
|
| 117 |
-
SPADE(640),
|
| 118 |
-
SPADE(640),
|
| 119 |
-
SPADE(640),
|
| 120 |
-
SPADE(320),
|
| 121 |
-
SPADE(320),
|
| 122 |
-
SPADE(320),
|
| 123 |
-
])
|
| 124 |
-
self.input_ids = [1,2,4,5,7,8,10,11]
|
| 125 |
-
self.output_ids = list(range(12))
|
| 126 |
-
self.mid_ids = [0,2]
|
| 127 |
-
self.forward_cache_name = 'org_forward_stablesr'
|
| 128 |
-
self.unet = None
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
def hook(self, unet: UNetModel, get_struct_cond):
|
| 132 |
-
# hook all resblocks
|
| 133 |
-
self.unet = unet
|
| 134 |
-
resblock: ResBlock = None
|
| 135 |
-
for i in self.input_ids:
|
| 136 |
-
resblock = unet.input_blocks[i][0]
|
| 137 |
-
# debug
|
| 138 |
-
# assert isinstance(resblock, ResBlock)
|
| 139 |
-
if not hasattr(resblock, self.forward_cache_name):
|
| 140 |
-
setattr(resblock, self.forward_cache_name, resblock._forward)
|
| 141 |
-
resblock._forward = lambda x, timesteps, resblock=resblock, spade=self.input_blocks[i]: dual_resblock_forward(resblock, x, timesteps, spade, get_struct_cond)
|
| 142 |
-
|
| 143 |
-
for i in self.output_ids:
|
| 144 |
-
resblock = unet.output_blocks[i][0]
|
| 145 |
-
# debug
|
| 146 |
-
# assert isinstance(resblock, ResBlock)
|
| 147 |
-
if not hasattr(resblock, self.forward_cache_name):
|
| 148 |
-
setattr(resblock, self.forward_cache_name, resblock._forward)
|
| 149 |
-
resblock._forward = lambda x, timesteps, resblock=resblock, spade=self.output_blocks[i]: dual_resblock_forward(resblock, x, timesteps, spade, get_struct_cond)
|
| 150 |
-
|
| 151 |
-
for i in self.mid_ids:
|
| 152 |
-
resblock = unet.middle_block[i]
|
| 153 |
-
# debug
|
| 154 |
-
# assert isinstance(resblock, ResBlock)
|
| 155 |
-
if not hasattr(resblock, self.forward_cache_name):
|
| 156 |
-
setattr(resblock, self.forward_cache_name, resblock._forward)
|
| 157 |
-
resblock._forward = lambda x, timesteps, resblock=resblock, spade=self.middle_block[i]: dual_resblock_forward(resblock, x, timesteps, spade, get_struct_cond)
|
| 158 |
-
|
| 159 |
-
def unhook(self):
|
| 160 |
-
unet = self.unet
|
| 161 |
-
if unet is None: return
|
| 162 |
-
resblock: ResBlock = None
|
| 163 |
-
for i in self.input_ids:
|
| 164 |
-
resblock = unet.input_blocks[i][0]
|
| 165 |
-
if hasattr(resblock, self.forward_cache_name):
|
| 166 |
-
resblock._forward = getattr(resblock, self.forward_cache_name)
|
| 167 |
-
delattr(resblock, self.forward_cache_name)
|
| 168 |
-
|
| 169 |
-
for i in self.output_ids:
|
| 170 |
-
resblock = unet.output_blocks[i][0]
|
| 171 |
-
if hasattr(resblock, self.forward_cache_name):
|
| 172 |
-
resblock._forward = getattr(resblock, self.forward_cache_name)
|
| 173 |
-
delattr(resblock, self.forward_cache_name)
|
| 174 |
-
|
| 175 |
-
for i in self.mid_ids:
|
| 176 |
-
resblock = unet.middle_block[i]
|
| 177 |
-
if hasattr(resblock, self.forward_cache_name):
|
| 178 |
-
resblock._forward = getattr(resblock, self.forward_cache_name)
|
| 179 |
-
delattr(resblock, self.forward_cache_name)
|
| 180 |
-
self.unet = None
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
def load_from_dict(self, state_dict):
|
| 184 |
-
"""
|
| 185 |
-
Load model weights from a dictionary.
|
| 186 |
-
:param state_dict: a dict of parameters.
|
| 187 |
-
"""
|
| 188 |
-
filtered_dict = {}
|
| 189 |
-
for k, v in state_dict.items():
|
| 190 |
-
if k.startswith("model.diffusion_model."):
|
| 191 |
-
key = k[len("model.diffusion_model.") :]
|
| 192 |
-
# remove the '.0.spade' within the key
|
| 193 |
-
if 'middle_block' not in key:
|
| 194 |
-
key = key.replace('.0.spade', '')
|
| 195 |
-
else:
|
| 196 |
-
key = key.replace('.spade', '')
|
| 197 |
-
filtered_dict[key] = v
|
| 198 |
-
self.load_state_dict(filtered_dict)
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
if __name__ == '__main__':
|
| 202 |
-
path = '../models/stablesr_sd21.ckpt'
|
| 203 |
-
state_dict = torch.load(path)
|
| 204 |
-
model = SPADELayers()
|
| 205 |
-
model.load_from_dict(state_dict)
|
| 206 |
-
print(model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/srmodule/struct_cond.py
DELETED
|
@@ -1,353 +0,0 @@
|
|
| 1 |
-
import math
|
| 2 |
-
import torch
|
| 3 |
-
import torch.nn as nn
|
| 4 |
-
|
| 5 |
-
from ldm.modules.diffusionmodules.openaimodel import (
|
| 6 |
-
TimestepEmbedSequential,
|
| 7 |
-
ResBlock,
|
| 8 |
-
Downsample,
|
| 9 |
-
)
|
| 10 |
-
|
| 11 |
-
from ldm.modules.diffusionmodules.util import (
|
| 12 |
-
conv_nd,
|
| 13 |
-
linear,
|
| 14 |
-
timestep_embedding,
|
| 15 |
-
checkpoint,
|
| 16 |
-
normalization,
|
| 17 |
-
zero_module,
|
| 18 |
-
)
|
| 19 |
-
|
| 20 |
-
from srmodule.attn import get_attn_func
|
| 21 |
-
|
| 22 |
-
attn_func = None
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
class QKVAttentionLegacy(nn.Module):
|
| 26 |
-
"""
|
| 27 |
-
A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping
|
| 28 |
-
"""
|
| 29 |
-
|
| 30 |
-
def __init__(self, n_heads):
|
| 31 |
-
super().__init__()
|
| 32 |
-
self.n_heads = n_heads
|
| 33 |
-
|
| 34 |
-
def forward(self, qkv):
|
| 35 |
-
"""
|
| 36 |
-
Apply QKV attention.
|
| 37 |
-
:param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs.
|
| 38 |
-
:return: an [N x (H * C) x T] tensor after attention.
|
| 39 |
-
"""
|
| 40 |
-
bs, width, length = qkv.shape
|
| 41 |
-
assert width % (3 * self.n_heads) == 0
|
| 42 |
-
ch = width // (3 * self.n_heads)
|
| 43 |
-
q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1)
|
| 44 |
-
# Legacy Attention
|
| 45 |
-
# scale = 1 / math.sqrt(math.sqrt(ch))
|
| 46 |
-
# weight = torch.einsum(
|
| 47 |
-
# "bct,bcs->bts", q * scale, k * scale
|
| 48 |
-
# ) # More stable with f16 than dividing afterwards
|
| 49 |
-
# weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
|
| 50 |
-
# a = torch.einsum("bts,bcs->bct", weight, v)
|
| 51 |
-
# a = a.reshape(bs, -1, length)
|
| 52 |
-
q, k, v = map(
|
| 53 |
-
lambda t:t.permute(0,2,1)
|
| 54 |
-
.contiguous(),
|
| 55 |
-
(q, k, v),
|
| 56 |
-
)
|
| 57 |
-
global attn_func
|
| 58 |
-
a = attn_func(q, k, v)
|
| 59 |
-
a = (
|
| 60 |
-
a.permute(0,2,1)
|
| 61 |
-
.reshape(bs, -1, length)
|
| 62 |
-
)
|
| 63 |
-
return a
|
| 64 |
-
|
| 65 |
-
class AttentionBlock(nn.Module):
|
| 66 |
-
"""
|
| 67 |
-
An attention block that allows spatial positions to attend to each other.
|
| 68 |
-
Originally ported from here, but adapted to the N-d case.
|
| 69 |
-
https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66.
|
| 70 |
-
"""
|
| 71 |
-
|
| 72 |
-
def __init__(
|
| 73 |
-
self,
|
| 74 |
-
channels,
|
| 75 |
-
num_heads=1,
|
| 76 |
-
num_head_channels=-1,
|
| 77 |
-
use_checkpoint=False,
|
| 78 |
-
use_new_attention_order=False,
|
| 79 |
-
):
|
| 80 |
-
super().__init__()
|
| 81 |
-
self.channels = channels
|
| 82 |
-
if num_head_channels == -1:
|
| 83 |
-
self.num_heads = num_heads
|
| 84 |
-
else:
|
| 85 |
-
assert (
|
| 86 |
-
channels % num_head_channels == 0
|
| 87 |
-
), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}"
|
| 88 |
-
self.num_heads = channels // num_head_channels
|
| 89 |
-
self.norm = normalization(channels)
|
| 90 |
-
self.qkv = conv_nd(1, channels, channels * 3, 1)
|
| 91 |
-
self.attention = QKVAttentionLegacy(self.num_heads)
|
| 92 |
-
|
| 93 |
-
self.proj_out = zero_module(conv_nd(1, channels, channels, 1))
|
| 94 |
-
|
| 95 |
-
def forward(self, x):
|
| 96 |
-
return checkpoint(self._forward, (x,), self.parameters(), True) # TODO: check checkpoint usage, is True # TODO: fix the .half call!!!
|
| 97 |
-
|
| 98 |
-
def _forward(self, x):
|
| 99 |
-
b, c, *spatial = x.shape
|
| 100 |
-
x = x.reshape(b, c, -1)
|
| 101 |
-
qkv = self.qkv(self.norm(x))
|
| 102 |
-
h = self.attention(qkv)
|
| 103 |
-
h = self.proj_out(h)
|
| 104 |
-
return (x + h).reshape(b, c, *spatial)
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
class EncoderUNetModelWT(nn.Module):
|
| 108 |
-
"""
|
| 109 |
-
The half UNet model with attention and timestep embedding.
|
| 110 |
-
For usage, see UNet.
|
| 111 |
-
"""
|
| 112 |
-
|
| 113 |
-
def __init__(
|
| 114 |
-
self,
|
| 115 |
-
in_channels,
|
| 116 |
-
model_channels,
|
| 117 |
-
out_channels,
|
| 118 |
-
num_res_blocks,
|
| 119 |
-
attention_resolutions,
|
| 120 |
-
dropout=0,
|
| 121 |
-
channel_mult=(1, 2, 4, 8),
|
| 122 |
-
conv_resample=True,
|
| 123 |
-
dims=2,
|
| 124 |
-
use_checkpoint=False,
|
| 125 |
-
use_fp16=False,
|
| 126 |
-
num_heads=4,
|
| 127 |
-
num_head_channels=-1,
|
| 128 |
-
num_heads_upsample=-1,
|
| 129 |
-
use_scale_shift_norm=False,
|
| 130 |
-
resblock_updown=False,
|
| 131 |
-
use_new_attention_order=False,
|
| 132 |
-
):
|
| 133 |
-
super().__init__()
|
| 134 |
-
|
| 135 |
-
if num_heads_upsample == -1:
|
| 136 |
-
num_heads_upsample = num_heads
|
| 137 |
-
|
| 138 |
-
self.in_channels = in_channels
|
| 139 |
-
self.model_channels = model_channels
|
| 140 |
-
self.out_channels = out_channels
|
| 141 |
-
self.num_res_blocks = num_res_blocks
|
| 142 |
-
self.attention_resolutions = attention_resolutions
|
| 143 |
-
self.dropout = dropout
|
| 144 |
-
self.channel_mult = channel_mult
|
| 145 |
-
self.conv_resample = conv_resample
|
| 146 |
-
self.use_checkpoint = use_checkpoint
|
| 147 |
-
self.dtype = torch.float16 if use_fp16 else torch.float32
|
| 148 |
-
self.num_heads = num_heads
|
| 149 |
-
self.num_head_channels = num_head_channels
|
| 150 |
-
self.num_heads_upsample = num_heads_upsample
|
| 151 |
-
|
| 152 |
-
time_embed_dim = model_channels * 4
|
| 153 |
-
self.time_embed = nn.Sequential(
|
| 154 |
-
linear(model_channels, time_embed_dim),
|
| 155 |
-
nn.SiLU(),
|
| 156 |
-
linear(time_embed_dim, time_embed_dim),
|
| 157 |
-
)
|
| 158 |
-
|
| 159 |
-
self.input_blocks = nn.ModuleList(
|
| 160 |
-
[
|
| 161 |
-
TimestepEmbedSequential(
|
| 162 |
-
conv_nd(dims, in_channels, model_channels, 3, padding=1)
|
| 163 |
-
)
|
| 164 |
-
]
|
| 165 |
-
)
|
| 166 |
-
self._feature_size = model_channels
|
| 167 |
-
input_block_chans = []
|
| 168 |
-
ch = model_channels
|
| 169 |
-
ds = 1
|
| 170 |
-
for level, mult in enumerate(channel_mult):
|
| 171 |
-
for _ in range(num_res_blocks):
|
| 172 |
-
layers = [
|
| 173 |
-
ResBlock(
|
| 174 |
-
ch,
|
| 175 |
-
time_embed_dim,
|
| 176 |
-
dropout,
|
| 177 |
-
out_channels=mult * model_channels,
|
| 178 |
-
dims=dims,
|
| 179 |
-
use_checkpoint=use_checkpoint,
|
| 180 |
-
use_scale_shift_norm=use_scale_shift_norm,
|
| 181 |
-
)
|
| 182 |
-
]
|
| 183 |
-
ch = mult * model_channels
|
| 184 |
-
if ds in attention_resolutions:
|
| 185 |
-
layers.append(
|
| 186 |
-
AttentionBlock(
|
| 187 |
-
ch,
|
| 188 |
-
use_checkpoint=use_checkpoint,
|
| 189 |
-
num_heads=num_heads,
|
| 190 |
-
num_head_channels=num_head_channels,
|
| 191 |
-
use_new_attention_order=use_new_attention_order,
|
| 192 |
-
)
|
| 193 |
-
)
|
| 194 |
-
self.input_blocks.append(TimestepEmbedSequential(*layers))
|
| 195 |
-
self._feature_size += ch
|
| 196 |
-
if level != len(channel_mult) - 1:
|
| 197 |
-
out_ch = ch
|
| 198 |
-
self.input_blocks.append(
|
| 199 |
-
TimestepEmbedSequential(
|
| 200 |
-
ResBlock(
|
| 201 |
-
ch,
|
| 202 |
-
time_embed_dim,
|
| 203 |
-
dropout,
|
| 204 |
-
out_channels=out_ch,
|
| 205 |
-
dims=dims,
|
| 206 |
-
use_checkpoint=use_checkpoint,
|
| 207 |
-
use_scale_shift_norm=use_scale_shift_norm,
|
| 208 |
-
down=True,
|
| 209 |
-
)
|
| 210 |
-
if resblock_updown
|
| 211 |
-
else Downsample(
|
| 212 |
-
ch, conv_resample, dims=dims, out_channels=out_ch
|
| 213 |
-
)
|
| 214 |
-
)
|
| 215 |
-
)
|
| 216 |
-
ch = out_ch
|
| 217 |
-
input_block_chans.append(ch)
|
| 218 |
-
ds *= 2
|
| 219 |
-
self._feature_size += ch
|
| 220 |
-
|
| 221 |
-
self.middle_block = TimestepEmbedSequential(
|
| 222 |
-
ResBlock(
|
| 223 |
-
ch,
|
| 224 |
-
time_embed_dim,
|
| 225 |
-
dropout,
|
| 226 |
-
dims=dims,
|
| 227 |
-
use_checkpoint=use_checkpoint,
|
| 228 |
-
use_scale_shift_norm=use_scale_shift_norm,
|
| 229 |
-
),
|
| 230 |
-
AttentionBlock(
|
| 231 |
-
ch,
|
| 232 |
-
use_checkpoint=use_checkpoint,
|
| 233 |
-
num_heads=num_heads,
|
| 234 |
-
num_head_channels=num_head_channels,
|
| 235 |
-
use_new_attention_order=use_new_attention_order,
|
| 236 |
-
),
|
| 237 |
-
ResBlock(
|
| 238 |
-
ch,
|
| 239 |
-
time_embed_dim,
|
| 240 |
-
dropout,
|
| 241 |
-
dims=dims,
|
| 242 |
-
use_checkpoint=use_checkpoint,
|
| 243 |
-
use_scale_shift_norm=use_scale_shift_norm,
|
| 244 |
-
),
|
| 245 |
-
)
|
| 246 |
-
input_block_chans.append(ch)
|
| 247 |
-
self._feature_size += ch
|
| 248 |
-
self.input_block_chans = input_block_chans
|
| 249 |
-
|
| 250 |
-
self.fea_tran = nn.ModuleList([])
|
| 251 |
-
|
| 252 |
-
for i in range(len(input_block_chans)):
|
| 253 |
-
self.fea_tran.append(
|
| 254 |
-
ResBlock(
|
| 255 |
-
input_block_chans[i],
|
| 256 |
-
time_embed_dim,
|
| 257 |
-
dropout,
|
| 258 |
-
out_channels=out_channels,
|
| 259 |
-
dims=dims,
|
| 260 |
-
use_checkpoint=use_checkpoint,
|
| 261 |
-
use_scale_shift_norm=use_scale_shift_norm,
|
| 262 |
-
)
|
| 263 |
-
)
|
| 264 |
-
|
| 265 |
-
@torch.no_grad()
|
| 266 |
-
def forward(self, x, timesteps):
|
| 267 |
-
"""
|
| 268 |
-
Apply the model to an input batch.
|
| 269 |
-
:param x: an [N x C x ...] Tensor of inputs.
|
| 270 |
-
:param timesteps: a 1-D batch of timesteps.
|
| 271 |
-
:return: an [N x K] Tensor of outputs.
|
| 272 |
-
"""
|
| 273 |
-
emb = self.time_embed(timestep_embedding(timesteps, self.model_channels))
|
| 274 |
-
|
| 275 |
-
result_list = []
|
| 276 |
-
results = {}
|
| 277 |
-
h = x.type(self.dtype)
|
| 278 |
-
for module in self.input_blocks:
|
| 279 |
-
last_h = h
|
| 280 |
-
h = module(h, emb)
|
| 281 |
-
if h.size(-1) != last_h.size(-1):
|
| 282 |
-
result_list.append(last_h)
|
| 283 |
-
h = self.middle_block(h, emb)
|
| 284 |
-
result_list.append(h)
|
| 285 |
-
|
| 286 |
-
assert len(result_list) == len(self.fea_tran)
|
| 287 |
-
|
| 288 |
-
for i in range(len(result_list)):
|
| 289 |
-
results[str(result_list[i].size(-1))] = self.fea_tran[i](result_list[i], emb)
|
| 290 |
-
|
| 291 |
-
return results
|
| 292 |
-
|
| 293 |
-
def load_from_dict(self, state_dict):
|
| 294 |
-
"""
|
| 295 |
-
Load model weights from a dictionary.
|
| 296 |
-
:param state_dict: a dict of parameters.
|
| 297 |
-
"""
|
| 298 |
-
filtered_dict = {}
|
| 299 |
-
for k, v in state_dict.items():
|
| 300 |
-
if k.startswith("structcond_stage_model."):
|
| 301 |
-
filtered_dict[k[len("structcond_stage_model.") :]] = v
|
| 302 |
-
self.load_state_dict(filtered_dict)
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
def build_unetwt() -> EncoderUNetModelWT:
|
| 306 |
-
"""
|
| 307 |
-
Build a model from a state dict.
|
| 308 |
-
:param state_dict: a dict of parameters.
|
| 309 |
-
:return: a nn.Module.
|
| 310 |
-
"""
|
| 311 |
-
# The settings is from official setting yaml file.
|
| 312 |
-
# https://github.com/IceClear/StableSR/blob/main/configs/stableSRNew/v2-finetune_text_T_512.yaml
|
| 313 |
-
|
| 314 |
-
model = EncoderUNetModelWT(
|
| 315 |
-
in_channels=4,
|
| 316 |
-
model_channels=256,
|
| 317 |
-
out_channels=256,
|
| 318 |
-
num_res_blocks=2,
|
| 319 |
-
attention_resolutions=[ 4, 2, 1 ],
|
| 320 |
-
dropout=0.0,
|
| 321 |
-
channel_mult=[1, 1, 2, 2],
|
| 322 |
-
conv_resample=True,
|
| 323 |
-
dims=2,
|
| 324 |
-
use_checkpoint=False,
|
| 325 |
-
use_fp16=False,
|
| 326 |
-
num_heads=4,
|
| 327 |
-
num_head_channels=-1,
|
| 328 |
-
num_heads_upsample=-1,
|
| 329 |
-
use_scale_shift_norm=False,
|
| 330 |
-
resblock_updown=False,
|
| 331 |
-
use_new_attention_order=False,
|
| 332 |
-
)
|
| 333 |
-
global attn_func
|
| 334 |
-
attn_func = get_attn_func()
|
| 335 |
-
return model
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
if __name__ == "__main__":
|
| 339 |
-
'''
|
| 340 |
-
Test the lr encoder model.
|
| 341 |
-
'''
|
| 342 |
-
path = '../models/stablesr_sd21.ckpt'
|
| 343 |
-
state_dict = torch.load(path)
|
| 344 |
-
for key in state_dict.keys():
|
| 345 |
-
print(key)
|
| 346 |
-
model = build_unetwt()
|
| 347 |
-
model.load_from_dict(state_dict)
|
| 348 |
-
model = model.cuda()
|
| 349 |
-
test_latent = torch.randn(1, 4, 64, 64).half().cuda()
|
| 350 |
-
test_timesteps = torch.tensor([0]).half().cuda()
|
| 351 |
-
with torch.no_grad():
|
| 352 |
-
test_result = model(test_latent, test_timesteps)
|
| 353 |
-
print(test_result.keys())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/tools/extract_srmodule.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
'''
|
| 2 |
-
This script extracts the spade and structcond module from the official stablesr_000117.ckpt
|
| 3 |
-
'''
|
| 4 |
-
|
| 5 |
-
import torch
|
| 6 |
-
|
| 7 |
-
stablesr_path = 'models/stablesr_000117.ckpt'
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
with open(stablesr_path, 'rb') as f:
|
| 11 |
-
stablesr_ckpt = torch.load(f, map_location='cpu')
|
| 12 |
-
|
| 13 |
-
srmodule = {}
|
| 14 |
-
for k, v in stablesr_ckpt['state_dict'].items():
|
| 15 |
-
if 'spade' in k or 'structcond' in k:
|
| 16 |
-
srmodule[k] = v
|
| 17 |
-
# print(k)
|
| 18 |
-
# save
|
| 19 |
-
|
| 20 |
-
torch.save(srmodule, 'models/stablesr_sd21.ckpt')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd-webui-stablesr/tools/extract_vaecfw.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
|
| 3 |
-
vae_path = 'models/vqgan_cfw_00011.ckpt'
|
| 4 |
-
|
| 5 |
-
with open(vae_path, 'rb') as f:
|
| 6 |
-
vae_ckpt = torch.load(f, map_location='cpu')
|
| 7 |
-
|
| 8 |
-
prune_keys = []
|
| 9 |
-
for k, v in vae_ckpt['state_dict'].items():
|
| 10 |
-
if 'decoder.fusion_layer' in k:
|
| 11 |
-
prune_keys.append(k)
|
| 12 |
-
print(k)
|
| 13 |
-
|
| 14 |
-
vae_cfw = {}
|
| 15 |
-
for k in prune_keys:
|
| 16 |
-
vae_cfw[k] = vae_ckpt['state_dict'][k]
|
| 17 |
-
del vae_ckpt['state_dict'][k]
|
| 18 |
-
|
| 19 |
-
torch.save(vae_ckpt, 'models/vqgan_cfw_00011_vae_only.ckpt')
|
| 20 |
-
torch.save(vae_cfw, 'models/vqgan_cfw_00011_cfw_only.ckpt')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|