diff --git a/3-bmab/.gitignore b/3-bmab/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5ef948413f1b4fce8f0fc2930704133a167ee0f2 --- /dev/null +++ b/3-bmab/.gitignore @@ -0,0 +1,2 @@ +**/__pycache__/ +cache/ diff --git a/3-bmab/LICENSE.txt b/3-bmab/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..211d32e752cb61bd056436e8f7a806f12a626bb7 --- /dev/null +++ b/3-bmab/LICENSE.txt @@ -0,0 +1,663 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (c) 2023 AUTOMATIC1111 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/3-bmab/README.md b/3-bmab/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0d0358dfe6216b08b26efdc9345f33a671ac7722 --- /dev/null +++ b/3-bmab/README.md @@ -0,0 +1,214 @@ + +# BMAB + +BMAB는 Stable Diffusion WebUI의 확장 기능으로, 생성된 이미지를 설정에 따라 후처리하는 기능을 가지고 있습니다. +필요에 따라 인물, 얼굴, 손을 찾아 다시 그리거나, Resize, Resample, 노이즈 추가 등의 기능을 수행할 수 있으며, +두 장의 이미지를 합성하거나, Upscale의 기능을 수행 할 수 있습니다. + +BMAB is an extension of Stable Diffusion WebUI and has the function of post-processing the generated image according to settings. +If necessary, you can find and redraw people, faces, and hands, or perform functions such as resize, resample, and add noise. +You can composite two images or perform the Upscale function. + +Manual (KR) + +You can see more image. https://www.facebook.com/profile.php?id=61553793992101 + +## Example + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ Person, Face Detailing, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1), cnIpAdapter(0.6, 0, 0.3), Custom Filter, +
+ +
+ IC-Light(Left), Face Detailing, Resample, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1), cnIpAdapter(0.6, 0, 0.3) +
+ +
+ IC-Light(Left), Face Detailing, Resample, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1), cnIpAdapter(0.6, 0, 0.3) +
+ + + +
+ IC-Light(Left), Face Detailing, + + IC-Light(Left), Face Detailing, +
+ +
+ Face Detailing, Resample, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1), cnIpAdapter(0.6, 0, 0.3) +
+ + + +
+ Face Detailing, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1), cnIpAdapter(0.6, 0, 0.3) + + Face Detailing, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1), cnIpAdapter(0.6, 0, 0.3) +
+ + + +
+ Face Detailing, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1), cnIpAdapter(0.6, 0, 0.3) + + Face Detailing, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1), cnIpAdapter(0.6, 0, 0.3) +
+ + + +
+ Face Detailing, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1), cnIpAdapter(0.6, 0, 0.3) + + Face Detailing, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1), cnIpAdapter(0.6, 0, 0.3) +
+ + + +
+ Face Detailing, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1) + + Face Detailing, cnNoise(0.4, 0, 0.4), cnPose(1, 0, 1) +
+ +
+ Face Detailing, ControlNet Noise (0.4, 0, 0.4), +
+ + + +
+ Resize intermediate (inpaint+lama, Bottom, 0.75, 0.6),
+ Face Detailing, ControlNet Noise (0.7, 0, 0.6),
+ Noise Alpha (0.1) +
+ Resize intermediate (Center, 0.5, 0.6),
+ Face Detailing, ControlNet Noise (0.4, 0, 0.4)
+
+ +
+ +
+ +
+ +
+ + + +
+ + + +
+ +
+ Resize intermediate (Bottom, 0.5, 0.6), Face Detailing, ControlNet Noise (0.4, 0, 0.4) +
diff --git a/3-bmab/ReleaseNote.md b/3-bmab/ReleaseNote.md new file mode 100644 index 0000000000000000000000000000000000000000..71e9afdf0b6fdf0a54582dde9bc41939fb9292f1 --- /dev/null +++ b/3-bmab/ReleaseNote.md @@ -0,0 +1,143 @@ +### v24.05.12 + +* New Feature + * ICLight + * Style 추가했습니다. + * intensive : IC-Light original process + * less intensive : 과도한 적용을 줄임 + * normal : 배경을 최대한 살리면서 lighting 적용 + * soft : 배경을 최대한 살림 + * Face : 얼굴 부분을 강조하도록 검출해서 조명으로 강조 + * Person : 사람을 찾아 조명으로 강조 + * Cache + * 디렉토리가 존재하지 않으면 생성 + * Pretraining + * 입력된 폭, 높이가 아니라 실제 이미지의 크기를 사용하도록 수정 + +### v24.05.12 + +* New Feature + * ICLight : https://github.com/lllyasviel/IC-Light + * ICLight 기능을 제어할 수 있도록 통합했습니다. + * 메모리 사용량이 많습니다. + * Enable ICLight before upscale 끄면 1200*1800을 4090으로도 어렵습니다. + * Process 작업전 수행되는 preprocess filter가 추가되었습니다. +* Code Fix + * Pose Default 값을 변경했습니다. + * cache를 통합 제어하도록 변경했습니다. + +### v24.05.07 + +* New Feature + * Installer : groudingdino for CUDA support. + * Resample : fix FakeControlNet + * Refactoring : multiple controlnet support + + +### v24.05.01 + +* New Feature + * ControlNet IpAdapter에서 하위 디렉토리를 만들면 하위 디렉토리 안에서 이미지를 적용할 수 있도록 개선했습니다. + * Random으로 지정하면 전체 이미지에서 선택합니다. + * 저장된 정보를 불러왔을때 Pose, IpAdapter 이미지가 표시됩니다. + +* ### v24.05.01 + +* New Feature + * ControlNet IpAdapter 기능을 제공합니다. + * sd-webui-controlnet을 이용하여 제공하며, 랜덤하게 이미지를 적용할 수 있도록 개선했습니다. + * Setting > BMAB에 현재 ControlNet IpAdapter의 model을 적어주셔야 합니다. + * 기본적으로 모델은 default로 적용해두었습니다. + +* Code Fix + * Setting에 Additional Checkpoint Path이 sd-webui 1.8에서 동작하지 않아, 이런 경우 빈칸으로 두면 오류가 발생하지 않도록 수정했습니다. + * Person, Face 의 prompt가 hires.fix가 켜져있다면 hires.fix 것을 사용함. + +### v24.04.30 + +* New Feature + * Person Detailer에 checkpoint, vae, sampler, scheduler 및 steps 추가 + * Setting에 Additional Checkpoint Path 디렉토리를 입력하면 두 디렉토리 내용을 병합하여 전체적 적용됨. + +* Code Fix + * Person, Face 의 prompt가 hires.fix가 켜져있다면 hires.fix 것을 사용함. + +### v24.04.26 + +* Code Fix + * Pretraining 에 checkpoint, vae 선택 추가 + * checkpoint, vae, filter, pose 등에 대한 fresh 버튼을 삭제하고 하나로 통합 + * vintage filter 추가 + + +### v24.04.20 + +* New Features + * Pose + * Face only 옵션 추가 + * Pose를 선택할 수 있도록 추가 + +### v24.04.17 + +* New Features + * Pose + * sd-webui-bmab/pose 디렉토리에 있는 이미지를 이용하여 openpose를 동작한다. + * seed에 맞춰서 임의이 순서로 동작한다. + * Face + * CheckPoint를 선택할 수 있도록 추가 + * Pretraining + * Filter를 선택할 수 있도록 추가 + * Refiner + * VAE를 선택할 수 있도록 추가 + * Noise + * Both, Low res fix, High res fix 추가. + +### v24.04.16 + +* Code Fix + * 1.9.0 지원 + * 1.8.0 하위 호환 + * CheckPoint 관련 전반 수정 + * basicsr 설치 오류 수정 + +### v24.04.05 + +* Code Fix + * 1.8.0 설치시에 동작하지 않는 버그를 수정했습니다. + * preprocess filter 분리 + * hires.fix - upscale 시에 필터가 정확하게 적용되지 않는 버그 수정 + * ControlNet Noise 사용시에 그레이 이미지를 사용하도록 수정 + * 그 외 몇가지 코드 리펙토링 + +### v23.11.30.0 + +* Code Fix + * Support BMAB DR + * Img2img rollback + + +### v23.11.28.0 + +* New Feature + * Kohya Hires.fix + * Preprocess에서 Kohya Hires.fix기능을 추가하였습니다. + * 이 기능을 사용할때 sd 1.5 기준 1024x1025, 1536x1536, 1024x1536, 1536x1024일 경우가 가장 잘나옵니다. + * 이 기능은 원작자가 SDXL을 위해서 만든 기능입니다. 굳이 sd 1.5에서 사용할 필요는 없습니다. + + +### v23.11.27.0 + +* New Feature + * Stop generating gracefully + * BMAB 프로세스가 완료되면 batch가 남아있더라도 종료하는 기능. + * 이미지 생성 중간에 Interrupt 를 눌러서 종료가 아니라, 이미지 생성이 완료되면 종료된다. + * 'Enable BMAB' 오른쪽에 작게 Stop이 있다. + * FinalFilter + * 최종 이미지에 수정을 가할 수 있도록 필터를 적용할 수 있도록 하는 기능. + * 필터는 구현해서 filter에 넣으면 확인할 수 있다. +* BugFix + * Img2Img와 openpose 사용시에 inpaint area 적용되지 않는 문제 수정. + * 약간의 코드 리펙토링 + + + diff --git a/3-bmab/docs/en/bmab.md b/3-bmab/docs/en/bmab.md new file mode 100644 index 0000000000000000000000000000000000000000..c456a23acf9288aba487b7a76ac3b5c4d0f2f36c --- /dev/null +++ b/3-bmab/docs/en/bmab.md @@ -0,0 +1,319 @@ + +# BMAB + +## 기본 기능 + +* Contrast : 대비값 조절 (1이면 변경 없음) +* Brightness : 밝기값 조절 (1이면 변경 없음) +* Sharpeness : 날카롭게 처리하는 값 조절 (1이면 변경 없음) +* Color Temperature : 색온도 조절, 6500K이 0 (0이면 변경 없음) +* Noise alpha : 프로세스 전에 노이즈를 추가하여 디테일을 올릴 수 있습니다. (권장값:0.1) +* Noise alpha at final stage : 최종 단계에서 노이즈를 추가하여 분위기를 다르게 전달할 수 있습니다. + + +## Imaging + +### Blend Image in Img2Img + +이미지 업로드 상자에 입력한 이미지와 Img2Img에 입력된 이미지를 Blending합니다. +Blend Alpha 값으로 두 개의 이미지를 합성합니다. +"Process before Img2Img" 옵션이 적용됩니다. + +### Dino detect + +Img2Img Inpainting 하는 경우에 마스크를 입력하지 않아도 Dino detect prompt에 있는 내용을 이용하여 자동으로 마스크를 생성합니다. +이미지를 업로드 하게되면 업로드된 이미지를 배경으로 하여 prompt로 입력된 부분을 업로드 이미지에 합성합니다. + +#### Img2Img 에서 사용하는 경우 + +

+ + +

+

+ + +

+ + + +첫번째 image는 Img2Img 이미지로 지정 +두번째 image는 BMAB의 Imaging에 Image 입력창에 지정 + +프로세스 과정에서 세번째 image를 합성하고 프롬프트에 따라서 결과가 얻어진다. + +Enabled : CHECK!! + +Contrast : 1.2 +Brightness : 0.9 +Sharpeness : 1.5 + +Enable dino detect : CHECK!! +DINO detect Prompt : 1girl + + +#### Img2Img Inpaint 에서 사용하는 경우 + +DINO detect Prompt에 있는 내용대로 자동으로 마스크를 만들어준다. + +

+ + + +

+ + +이번 예제에서는 배경을 변경했으니, inpaint 설정에서 "Inpaint Not Masked"를 선택해야 한다. +반대로 "Inpaint Masked"를 하면 인물이 변경된다. + + +## Person + +이 기능을 사용하게 되면 프로세스가 완료된 이후에, 인물을 감지하여 다시 그립니다. +아래의 경우에 사용하면 효과적입니다. + +* 인물이 배경에 비해 매우 작은 경우, 의복, 얼굴 등 인물 전체의 디테일이 올라갑니다. +* 4K와 같이 큰 이미지를 출력하는 경우, 업 스케일 이후에 인물이 작은 경우 이 기능을 사용하면 인물이 뚜렷해 집니다. +* Face Detailing과 같이 사용하면 좋은 효과를 볼 수 있습니다. + + + + + +#### Enable person detailing for landscape (EXPERIMENTAL) + +풍경에서 인물을 자세하게 다시 그리는 기능을 활성화 합니다. + +#### Block over-scaled image + +이 기능이 켜지게 되면 인물을 찾아내서 크게 키워서 다시 그리는데 이때 확대된 이미지의 면적이 본래 이미지를 초과하게 되면 프로세스를 멈춥니다. +sd-webui가 멈추거나 GPU를 보호하기 위한 목적입니다. + +#### Auto scale if "Block over-scaled image" enabled + +이 기능을 설정하면 위에서 언급한 "Block over-scaled image"로 차단될 경우 본래 이미지의 면적에 맞춰서 스케일을 조정하여 작업합니다. + +#### Upscale Ratio + +인물이 발견되면 주어진 비율로 키워서 자세하게 그립니다. + +#### Denoising Strength + +인물의 크기가 클 경우 0.4로 부족할 수 있습니다. 이런 경우 수치를 올려주세요. + +#### Dilation mask + +찾아낸 인물의 마스크를 확장합니다. + +#### CFG Scale + +인물을 다시 그릴때 사용하는 CFG scale 값입니다. + +#### Large person area limit + +인물이 이미지 속에서 차지하는 면적이 이 값을 초과하면 작업하지 않습니다. +인물이 충분히 큰 경우 다시 그릴 필요가 없기 때문입니다. + +#### Limit + +이미지 속에 인물이 너무 많은 경우 면적단위로 큰 것부터 카운트하여 설정값을 초과하여 다시 그리지 않습니다. + + + + + + +## Face + +### Face Detailing + +이 기능을 사용하게 되면 프로세스가 완료된 이후 After Detailer(AD)나 Detection Detailer(DD)와 같이 +얼굴을 보정합니다. +이 기능을 동작시킨 후에 AD, DD가 동작하도록 설정한다면, 결과가 좋지 않을 수 있습니다. + + + +최대 5개의 캐릭터에 대해 prompt를 별도로 지정할 수 있습니다. + +#### Enable face detailing + +face detailing 기능을 켜고 끌 수 있습니다. + +#### Enable face detailing before hires.fix (EXPERIMENTAL) + +face detailing 기능을 txt2img 과정의 hires.fix 직전에 한 번 더 수행합니다. +얼굴을 보정한 이후에 upscale을 하기 때문에 더 좋은 품질의 이미지를 얻을 수 있습니다. +하지만 부하가 더 들어가고, 이미지 변화가 심합니다. + +#### Face detailing sort by + +이미지 안에 여러 인물이 있는 경우 어떤 순서로 Detailing 할 것인지 결정합니다. + + + + +왼쪽, 오른쪽 혹은 크기로 가능하며 없다면 기본적으로 Score 값이 높은 순서로 합니다. + +#### Limit + +이미지 않에 여러 인물이 있는 경우 위에서 정한 순서로 얼마나 수행할지 결정합니다. +Limit이 1이라면 최대 1개만 수행한다는 뜻입니다. + +#### Override Parameters + +* Denoising Strength +* CFG Scale +* Width +* Height +* Steps +* Mask Blur + +위 값에 대해 기본값이 아닌 UI에서 지정한 값을 사용합니다. + +#### Inpaint Area + +전체를 다시 그릴지 얼굴만 다시 그릴지를 결정합니다. 전체를 다시 그리는 것은 별로 추천하지 않습니다. + +#### Only masked padding, pixels + +기본값을 사용해 주세요. + +#### Dilation + +검출된 얼굴의 마스크 크기를 키웁니다. + +#### Box threshold + +Detector의 검출 값을 결정합니다. 기본값 0.35보다 작으면 face가 아닐 것으로 제외합니다. +YOLO를 사용하는 경우 confidence를 대체합니다. + +**좋은 결과를 얻기 위한 조언** + +* Prompt에 얼굴 관련된 lora, textual inversion등 관련 내용을 뺍니다. sunglass 등은 무관합니다. +* 설정 파일에 얼굴마다 서로 다른 lora, textual inversion 등을 넣습니다. +* prompt에 lora, TI가 많을 경우 그림 생성 자유도가 떨어지는 것 같습니다. +* 그림속 모든 캐릭터가 공유되는 lora는 넣어주셔도 무방합니다. + + + +## Hand + +### Hand Detailing (EXPERIMENTAL) + +손 표현이 잘못된 부분을 수정하는 기능입니다. +만들어진 그림에서 손 부분을 자동으로 찾아내어 해당 부분을 다시 그리는 기능입니다. +다만 손의 경우 다시 그려도 잘 그려질지 확실하지 않으며, 손을 자세하게 그리는 정도입니다. + + + +#### Enable hand detailing + +손 보정 기능을 사용하도록 활성화 합니다. + +#### Block over-scaled image + +이 기능은 손을 찾아내어 확대해서 다시그리는 방법을 사용합니다. +다시 그려야 하는 부분의 면적이 원래이미지를 초과하게 되면 작업을 수행하지 않습니다. +이런 경우에는 Upscale Ratio를 줄이거나, 이 기능을 꺼야하는데, 이 기능을 끄면 매우 큰 그림을 다시 그릴 수도 있어서 GPU에 부하가 걸릴 수 있습니다.. + +#### Method +* subframe : 손을 포함하여 얼굴/머리 부분까지 찾아내어 상반신을 다시 그린다. +* each hand : 손을 찾아내여 3배 크기의 주변부 까지 다시 그려 손만 적용한다. +* each hand inpaint : 손을 찾아내어 3재 크기의 주변부를 기반으로 손만 다시 그린다. + 매우 극단적으로 변형될 수 있어서 잘 그려지기 어렵다 모양이 갖춰진다면, subframe으로 다시 그리는 것을 추천한다. +* at once : 찾아낸 손을 모두 한번에 다시 그린다. + + +#### Prompt + +Subframe에서는 입력하지 않을 것을 권장합니다. +each hand, each hand inpaint시에 손 관련 프롬프트를 입력합니다. + +#### Negative Prompt + +Subframe에서는 입력하지 않을 것을 권장합니다. +each hand, each hand inpaint시에 손 관련 네거티브 프롬프트를 입력합니다. + +#### Denoising Strength + +다시 그리는 경우 Denoising Strength 값 입니다. +* subframe : 0.4 권장 +* 기타 0.55 이상 권장 + +#### CFG Scale + +다시 그리는 경우 CFG Scale 값 입니다. + +#### Upscale Ratio +상반신 / 손 주변을 찾아내어 얼마나 크게 확대하여 다시 그릴 것인지 지정한다. +무조건 크게 그린다고 성공확률이 올라가는 것은 아니다. +* subframe : 2.0 +* 기타 : 2.0~4.0 + +#### Box Threshold + +손을 찾아내지 못하는 경우 이 값을 낮추면, 찾아낼 수 있는 확률이 올라갑니다. +하지만 잘 못 찾아낼 가능성도 올라갑니다. + +#### Box Dilation + +찾아낸 박스(손을 포함하여)의 외곽 부분을 얼마나 크게 할 것이 결정합니다. (only for subframe) + +#### Inpaint Area + +찾아낸 박스 전체를 다시 그릴 것인지, 손만 다시 그릴 것인지를 결정한다. +손만 다시그리는 경우 손 모양이 원하지 않게 바뀔 수 있으나 크게 변경된다. + +#### Only masked padding + +찾아낸 손의 내부 공간을 얼마 정도로 채울지를 결정합니다. 딱히 변경할 일 없습니다. + +#### Additional Parameter + +현재는 제공하지 않지만 향후 고급 사용자를 위한 옵션을 제공할 예정입니다. + + + +## ControlNet + +ControlNet을 이용하여 이미지에 노이즈를 추가하는 방법으로 디테일을 올리는 기능입니다. +ControlNet의 Lineart 모델에 가우시안 노이즈 이미지를 입력으로 사용하여, +결과물에 다양하고 복잡한 디테일을 추가합니다. + +#### Noise Strength + +노이즈 강도를 지정합니다. (0.4 권장) + +#### Begin + +Sampling 단계 시작점 + +#### End + +Sampling 단계 끝점 + +보통의 경우 0.4, 0, 0.4를 권장합니다. 혹은 이미지가 과도하게 그려질 경우 0.2, 0, 0.4 정도로 추천합니다. +과도하게 이미지가 그려진 경우 refiner를 사용하면 이미지를 어느 정도 안정시킬 수 있습니다. + +아래는 모두 같은 seed입니다. + + + + + + + + + + + +
기본이미지0.40.7
+ + + + + + +
+
+
diff --git a/3-bmab/docs/en/manual.md b/3-bmab/docs/en/manual.md new file mode 100644 index 0000000000000000000000000000000000000000..bad6b46f361a0e7d4e4145824028832d76b0cf32 --- /dev/null +++ b/3-bmab/docs/en/manual.md @@ -0,0 +1,58 @@ + +## Quick Test + +Enable을 체크하고 Config Tab에서 Preset "example"을 선택합니다. + +contrast: 1.2 +brightness: 0.9 +sharpeness: 1.5 + +Edge enhancement 적용 +Face Detailing 적용 +Resize by person 적용 + + + +## 기본 옵션 + +Enabled (VERSION): 기능을 켜고 끌 수 있습니다. + +### Resize and fill override + +Img2Img를 수행하는 경우 "Resize and fill" 을 선택하게 되면 +통상 좌우, 상하로 늘어나거나 비율이 같다면 그대로 크기만 변경됩니다. + +Enabled 된 상태에서는 항상 이미지가 아래에 위치하고, +왼쪽, 오른쪽, 윗쪽으로 비율에 맞게 늘어납니다. + +인물의 윗쪽으로 여백이 없는 경우에 적용하면 효과적입니다. +너무 크게 늘리게 되면 좋은 결과를 얻기 힘듭니다. +대략 1.1, 1.2 정도 스케일에서 사용하시길 권장합니다. + +

+ + +

+ +
+
+
+ +# Preprocess + +본 이미지 변경을 하기 전에 사전 처리 과정을 수행합니다. +조건에 따라 hires.fix 과정에 개입할 수도 있습니다. + +Preprocess + +# BMAB + +Person, Hand, Face detailer를 수행하거나, 이미지 합성 혹은 노이즈 추가등의 기능을 수행합니다. + +bmab + +# Postprocess + +이미지 처리가 완료된 이후, 인물의 크기에 따라 배경을 확장하거나 upscale을 할 수 있습니다. + +Postprocess diff --git a/3-bmab/docs/en/postprocess.md b/3-bmab/docs/en/postprocess.md new file mode 100644 index 0000000000000000000000000000000000000000..2fc66330d0a198fb6b0640fcfa95091748bbf898 --- /dev/null +++ b/3-bmab/docs/en/postprocess.md @@ -0,0 +1,66 @@ + + +# PostProcessing + +## Resize + +### Resize by person + +그림 속 인물중 가장 신장이 큰 사람의 길이와 그림 높이의 비율이 설정값을 넘어가면 비율을 설정값로 맞추는 기능입니다. +설정값이 0.90이고 인물의 전체 길이: 그림 높이의 비율이 0.95라고 한다면 +배경을 늘려서 인물의 비율이 0.90이 되도록 합니다. +배경은 왼쪽, 오른쪽, 위쪽으로 늘어납니다. + + +이 기능은 2가지 방법을 제공하는데 다음과 같습니다. + +#### Inpaint + +Face Detailing과 같은 방법으로 이미지가 완전히 생성되고 난 후에 주변부를 확장합니다. +이때 이미 생성된 이미지를 훼손하지 않고 주변부만 확장하기 때문에 직관적으로 확인이 가능합니다. +가장 빠르고 효과적으로 추천합니다. + +#### Inpaint + lama + +Inpaint와 같은 방법인데 BMAB에서 ControlNet을 불러서 Inpaint+lama를 이용해서 동작합니다. +이미지가 생성되고나서 디테일링 시작하기 전에 img2img를 이용하여 배경을 확장하여 전체적으로 인물을 작게 만드는 효과가 있습니다. + + + + + + + + + + + + + + +
+ +이 두가지 방법은 생성된 이미지를 축소하기만 할뿐 훼손하지 않습니다. +이것이 Resize intermediate와 다른점입니다. + + +## Upscaler + +최종적으로 이미지가 완성되고 난 이후에 이미지를 upscaler를 이용하여 크게 만듭니다. + + +#### Enable upscale at final stage + +이미지 생성이 완료되고 난 이후에 Upscale을 수행합니다. +960x540으로 생성하고 hires.fix를 x2로 하면 1920x1080 이미지가 나오는데 +여기서 Upscale을 x2를 하면 4K 이미지가 나오게 됩니다. + +#### Detailing after upscale + +이 옵션을 설정하면 위에서 언급한 Person, Face, Hand 에 대한 detailing을 upscale 이후에 수행합니다. + +#### Upscale Ratio + +이미지를 얼마나 upscale할지 결정합니다. + + diff --git a/3-bmab/docs/en/preprocess.md b/3-bmab/docs/en/preprocess.md new file mode 100644 index 0000000000000000000000000000000000000000..598d6670bd472d1a871ad0c69b7f975b7d17c4d2 --- /dev/null +++ b/3-bmab/docs/en/preprocess.md @@ -0,0 +1,425 @@ + +# Preprocess + +## Context + +BMAB에서 사용할 Checkpoint와 VAE를 지정합니다. +특정 기능들은 자체 Checkpoint와 VAE를 설정할 수 있습니다. +한 번 변경된 Checkpoint는 그 이후 프로세스들이 계속 사용합니다. + + + +#### txt2img noise multiplier for hires.fix + +hires.fix 단계에서 noise를 추가 할 수 있다. + +#### txt2img extra noise multiplier for hires.fix (EXPERIMENTAL) + +hires.fix 단계에서 추가적인 noise를 더 할 수 있다. + +#### Hires.fix filter before upscaler + +Hires.fix 단계 중 upscaler 전에 filter를 적용할 수 있다. + +#### Hires.fix filter after upscaler + +Hires.fix 단계 중 upscaler 후에 filter를 적용할 수 있다. + + +## Resample (EXPERIMENTAL) + +Self resampling 기능입니다. txt2img -> hres.fix를 통해 생성된 이미지를 다시 txt2img -> hires.fix 과정을 수행하면서 +ControlNet Tile Resample을 수행합니다. 아래와 같은 경우 사용할 수 있습니다. + +* 두 모델간에 결과물 차이가 큰 경우 +* 두 모델간에 인물 비율이 차이나는 경우 +* 두 모델간 버전이 다른 경우 (SDXL, SD15) + + + + + + + + + + +
txt2img->hires.fixResample + BMAB Basic
+ + +BMAB resample image by [padapari](https://www.instagram.com/_padapari_/) + +
+
+
+
+ + + + +#### Enable self resample (EXPERIMENTAL) + +이 기능을 켜고 끌 수 있습니다. + +#### Save image before processing + +최초 txt2img -> hires.fix를 통해 생성된 이미지가 후 처리를 위해 BMAB로 입력되면, +해당 이미지를 프로세싱하기 전에 저장합니다. 이미지 postfix로 "-before-resample"이 붙습니다. + +#### Checkpoint + +SD Checkpoint를 지정할 수 있습니다. 지정하지 않는다면 앞에서 설정된 Checkpoint를 사용합니다. +프로세스가 완료되어도 원래 것으로 돌려놓지 않습니다. + +#### SD VAE + +SD VAE를 지정할 수 있습니다. 지정하지 않는다면 앞에서 설정된 VAE를 사용합니다. +프로세스가 완료되어도 원래 것으로 돌려놓지 않습니다. + +#### Resample method + +Resample 방법을 선택할 수 있습니다. + +txt2img-1pass : txt2img을 hires.fix 없이 동작시킨다. +txt2img-2pass : txt2img를 hires.fix로 동작시킨다. 기본적으로 이미지를 출력할 때 hires.fix가 동작해야하만 한다. +img2img-1pass : img2img로 동작시킨다. + +#### Resample filter + +Resample이 완료되고 난 이후에 외부 filter 코드를 호출하여 이미지 변환을 추가적으로 수행할 수 있다. + + +#### Resample prompt + +resampling 과정에서 사용할 prompt입니다. 비어있는 경우 main prompt와 동일하며, +"#!org!#" 를 입력하면 main prompt를 대체합니다. "#!org!#" 이후에 추가로 prompt를 적을 수 있습니다. +ex) #!org!#, soft light, some more keyword + +#### Resample negative prompt + +resampling 과정에서 사용할 prompt입니다. 비어있는 경우 main negative prompt와 동일합니다. + +#### Sampling method + +프로세스에 사용할 sampling method를 지정합니다. 지정하지 않는다면 이전 프로세스와 같은 sampler를 지정합니다. + +#### Upsacler + +hires.fix를 사용하는 경우에 지정하는 upscaler입니다. + +#### Resample sampling steps + +resample process 사용할 samping steps를 지정합니다. +(권장 20) + +#### Resample CFG scale + +resample process 사용할 CFG scale 값을 지정합니다. +dynamic threshold는 지원하지 않습니다. + +#### Resample denoising strength + +resample process가 사용할 denoising strength를 지정합니다. +(권장 0.4) + +#### Resample strength + +0에 가까운 값은 입력 이미지와 멀어지고, 1에 가까울 수록 원본 이미지와 유사합니다. + +#### Resample begin + +sampling 단계에 적용 시작점. + +#### Resample end + +sampling 단계 적용 종료 시점. + + + + + + + + +## Pretraining (EXPERIMENTAL) + +Pretraining detailer입니다. ultralytics로 pretraining 모델을 적용하여 detection을 수행하고 +이를 기반으로 prompt, negative prompt를 적용하여 부분적으로 이미지를 더 자세하게 그릴 수 있습니다. + + + +#### Enable pretraining detailer (EXPERIMENTAL) + +이 기능을 켜고 끌 수 있습니다. + +#### Enable pretraining before hires.fix + +pretraining detailer를 hires.fix 전에 수행하도록 한다. + +#### Pretraining model + +ultralytics 로 학습된 detection model (*.pt)를 지정할 수 있습니다. +stable-diffusion-webui/models/BMAB에 해당 파일이 있어야 목록에 나타납니다. + + +#### Pretraining prompt + +pretraining detailer process 과정에서 사용할 prompt입니다. 비어있는 경우 main prompt와 동일하며, +"#!org!#" 를 입력하면 main prompt를 대체합니다. "#!org!#" 이후에 추가로 prompt를 적을 수 있습니다. +ex) #!org!#, soft light, some more keyword + +#### Pretraining negative prompt + +pretraining detailer process 과정에서 사용할 prompt입니다. 비어있는 경우 main negative prompt와 동일합니다. + +#### Sampling method + +프로세스에 사용할 sampling method를 지정합니다. 지정하지 않는다면 이전 프로세스와 같은 sampler를 지정합니다. + + +#### Pretraining sampling steps + +resample process 사용할 samping steps를 지정합니다. +(권장 20) + +#### Pretraining CFG scale + +resample process 사용할 CFG scale 값을 지정합니다. +dynamic threshold는 지원하지 않습니다. + +#### Pretraining denoising strength + +resample process가 사용할 denoising strength를 지정합니다. +(권장 0.4) + +#### Pretraining dilation + +detection 된 사각형의 범위를 주어진 값 만큼 크기를 크게 합니다. + +#### Pretraining box threshold + +Detector의 검출 값을 결정합니다. 기본값 0.35보다 작으면 face가 아닐 것으로 제외합니다. +ultralytics predict의 confidence 값입니다. + + + +## Edge enhancemant + +이미지 경계를 강화해 선명도를 증가시키거나 디테일을 증가시키는 기능입니다. + +**Upscaler가 Latent 계열인 경우 동작하지 않습니다. (R-ESRGAN, 4x-UltraSharp 추천)** + + + +권장설정 + +* Edge low threshold : 50 +* Edge high threshold : 200 +* Edge strength : 0.5 + +

+ + +

+ +Enabled : CHECK!! + +Contrast : 1.2 +Brightness : 0.9 +Sharpeness : 1.5 + +Enable edge enhancement : CHECK!! +Edge low threshold : 50 +Edge high threshold : 200 +Edge strength : 0.5 + + + + +## Resize + +txt2img -> hires.fix 의 중간 과정에서 동작합니다.. +만약 img2img에서 사용한다면, 프로세스 시작 전에 동작합니다. + +그림 속 인물중 가장 신장이 큰 사람의 길이와 그림 높이의 비율이 설정값을 넘어가면 비율을 설정값로 맞추는 기능입니다. +설정값이 0.90이고 인물의 전체 길이: 그림 높이의 비율이 0.95라고 한다면 +배경을 늘려서 인물의 비율이 0.90이 되도록 합니다. +배경은 Alignment에서 지정한 방식에 따라 늘어납니다. + +txt2img 수행하는 단계에서 hires.fix 하기 직전에 이미지를 변경합니다. +이 과정은 변경된 이미지가 hires.fix 과정에서 매끄럽게 변하게 하기 위한 것입니다. +**denoising strength는 0.6~0.7 정도를 사용하셔야 주변부 이미지 왜곡이 발생하지 않습니다.** +**Upscaler가 Latent 계열인 경우 동작하지 않습니다. (R-ESRGAN, 4x-UltraSharp 추천)** + +#### Method + +Resize 하는 방식을 지정할 수 있습니다. + +* Stretching : 단순히 이미지를 외곽부분을 늘려서 배경을 확장합니다. +* inpaint : Stretching된 이미지를 mask를 사용하여 늘린 부분만 img2img inpainting을 수행합니다. +* inpaint+lama : Controlnet의 inpaint+lama 모델을 사용하여 확장된 영역을 다시 그립니다. +* inpaint_only : Controlnet의 inpaint_only를 사용하여 확장된 영역을 다시 그립니다. + + +#### Alignment + +이미지를 확장하고 원래 이미지를 어느 방향으로 정렬할 것인지를 결정합니다. + + + +#### Resize filter + +Resize가 완료되고 난 이후에 외부 filter 코드를 호출하여 이미지 변환을 추가적으로 수행할 수 있다. + + +#### Resize by person intermediate + +인물의 크기 비율을 나타냅니다. 이 값을 초과하면 이 크기가 되도록 배경을 확장시킵니다. + + + + + + + + + + + + + + +
OriginalResize 0.7Resize 0.5
+ + + + + + + + + + + + + + + + + + +
OriginalAlignment center
Alignment bottomAlignment bottom-left
+ + +Resize sample + + + + + + + + + + + + + + +## Refiner + +txt2img에서 만들어진 이미지를 한번 더 그리는 과정을 수행한다. +txt2img + hires.fix 가 된 상황에서도 유효하다. + +refiner는 이미지가 생성되고 detailing 하기전에 동작하며, +sd-webui의 hires.fix + refiner를 합친 동작과 비슷하다. + + + + + + + + + + + + + + +
txt2img(512x768)txt2img + hires.fix(800x1200)txt2img + hires.fix + refiner(1200x1800)
+ + +(위 예제는 결과를 모두 resize하여 동일한 크기이다.) + +위 예제와 같이 3단계로 처리할 수도 있으나, +hires.fix 단계 없이 refiner로 resize하여 처리할 수도 있다. + + + +

+ +

+ +#### Enable refiner + +refiner 사용 여부를 체크합니다. + +#### CheckPoint + +refiner를 이용하여 다시 그릴때 사용할 checkpoint를 지정합니다. + +#### Use this checkpoint for detailing + +위에서 지정한 checkpoint를 이용하여 detailing에 적용합니다. + +#### Prompt + +refiner가 이미지를 다시 그릴때 사용하는 prompt를 지정합니다. +비어있다면 main prompt와 동일하고,채워져 있다면 main prompt를 무시합니다. +만약 #!org!# 문자열이 있다면 main prompt를 대체합니다. + +#### Negative prompt + +refiner가 이미지를 다시 그릴때 사용하는 negative prompt를 지정합니다. + +#### Sampling method + +refiner가 사용할 sampler를 지정할 수 있습니다. +(Euler A 권장) + +#### Upscaler + +refiner가 이미지를 resize하는 경우 사용할 upscaler를 지정합니다. + +#### Refiner sampling steps + +refiner가 사용할 samping steps를 지정합니다. +(권장 20) + +#### Refiner CFG scale + +refiner가 사용할 CFG scale 값을 지정합니다. +dynamic threshold는 지원하지 않습니다. + +#### Refiner denoising strength + +refiner가 사용할 denoising strength를 지정합니다. +(권장 0.4) + +#### Refiner scale + +refiner가 주어진 이미지를 scale 값으로 resize합니다. +만약 refiner width, refiner height가 설정되어있다면 무시됩니다. + +#### Refiner width + +이미지 폭을 해당 값으로 강제로 설정합니다. + +#### Refiner height + +이미지 높이를 해당 값으로 강제로 설정합니다. + +
+
+
diff --git a/3-bmab/docs/kr/api.md b/3-bmab/docs/kr/api.md new file mode 100644 index 0000000000000000000000000000000000000000..d9f9eed800647d302232fb59333ad6b1de09a09f --- /dev/null +++ b/3-bmab/docs/kr/api.md @@ -0,0 +1,234 @@ + + + +# API + +Stable diffusion webui의 API를 이용하여 이미지를 생성하는 경우 아래와 같이 BMAB를 사용하여 API Call을 할 수 있습니다. + + +```python +import requests +import json +import base64 + + +prompt = ''' +1girl +''' +negative_prompt = '(worst quality, low quality:1.4),' + +txt2img = { + 'prompt': prompt, + 'negative_prompt': negative_prompt, + 'steps': 20, + 'width': 512, + 'height': 768, + 'cfg_scale': 7, + 'seed': -1, + 'sampler_index': 'DPM++ SDE Karras', + 'script_name': None, + 'alwayson_scripts': { + 'BMAB': { + 'args': [ + { + 'enabled': True, + 'face_detailing_enabled': True, + } + ] + } + } +} + +response = requests.post('http://localhost:7860/sdapi/v1/txt2img', data=json.dumps(txt2img)) +print(response) +j = response.json() +b64_image = j['images'][0] + + +with open('test.png', 'wb') as image_file: + image_file.write(base64.b64decode(b64_image)) + +``` + +BAMB의 Argument는 저장된 설정 파일과 동일하며, +이것을 기반으로 모든 설정을 사용할 수 있습니다. 설정에 없는 경우 기본값을 사용합니다. + +아래는 json 형태의 기본 설정 값입니다. + +```json +{ + "enabled": false, + "preprocess_checkpoint": "Use same checkpoint", + "preprocess_vae": "Use same vae", + "txt2img_noise_multiplier": 1, + "txt2img_extra_noise_multiplier": 0, + "txt2img_filter_hresfix_before_upscale": "None", + "txt2img_filter_hresfix_after_upscale": "None", + "resample_enabled": false, + "module_config": { + "resample_opt": { + "save_image": false, + "hiresfix_enabled": false, + "checkpoint": "Use same checkpoint", + "vae": "Use same vae", + "method": "txt2img-1pass", + "filter": "None", + "prompt": "", + "negative_prompt": "", + "sampler": "Use same sampler", + "upscaler": "BMAB fast", + "steps": 20, + "cfg_scale": 7, + "denoising_strength": 0.75, + "strength": 0.5, + "begin": 0.1, + "end": 0.9 + }, + "pretraining_opt": { + "hiresfix_enabled": false, + "pretraining_model": "Select Model", + "prompt": "", + "negative_prompt": "", + "sampler": "Use same sampler", + "steps": 20, + "cfg_scale": 7, + "denoising_strength": 0.75, + "dilation": 4, + "box_threshold": 0.35 + }, + "resize_intermediate_opt": { + "resize_by_person": true, + "method": "stretching", + "alignment": "bottom", + "filter": "None", + "scale": 0.85, + "denoising_strength": 0.75 + }, + "refiner_opt": { + "checkpoint": "Use same checkpoint", + "keep_checkpoint": true, + "prompt": "", + "negative_prompt": "", + "sampler": "Use same sampler", + "upscaler": "BMAB fast", + "steps": 20, + "cfg_scale": 7, + "denoising_strength": 0.75, + "scale": 1, + "width": 0, + "height": 0 + }, + "person_detailing_opt": { + "best_quality": false, + "force_1:1": false, + "block_overscaled_image": true, + "auto_upscale": true, + "scale": 4, + "dilation": 3, + "area_ratio": 0.1, + "limit": 1, + "background_color": 1, + "background_blur": 0 + }, + "person_detailing": { + "denoising_strength": 0.4, + "cfg_scale": 7 + }, + "face_detailing_opt": { + "best_quality": false, + "sort_by": "Score", + "limit": 1, + "prompt0": "", + "negative_prompt0": "", + "prompt1": "", + "negative_prompt1": "", + "prompt2": "", + "negative_prompt2": "", + "prompt3": "", + "negative_prompt3": "", + "prompt4": "", + "negative_prompt4": "", + "override_parameter": false, + "sampler": "Use same sampler", + "detection_model": "BMAB Face(Normal)", + "dilation": 4, + "box_threshold": 0.35, + "skip_large_face": false, + "large_face_pixels": 0.26 + }, + "face_detailing": { + "width": 512, + "height": 512, + "cfg_scale": 7, + "steps": 20, + "mask_blur": 4, + "inpaint_full_res": "Only masked", + "inpaint_full_res_padding": 32, + "denoising_strength": 0.4 + }, + "hand_detailing_opt": { + "block_overscaled_image": true, + "best_quality": false, + "detailing_method": "subframe", + "auto_upscale": true, + "scale": 4, + "box_threshold": 0.3, + "dilation": 0.1, + "additional_parameter": "" + }, + "hand_detailing": { + "prompt": "", + "negative_prompt": "", + "denoising_strength": 0.4, + "cfg_scale": 7, + "inpaint_full_res": "Only masked", + "inpaint_full_res_padding": 32 + }, + "controlnet": { + "enabled": false, + "with_refiner": false, + "noise": false, + "noise_strength": 0.4, + "noise_begin": 0.1, + "noise_end": 0.9 + }, + "resize_by_person_opt": { + "mode": "Inpaint", + "scale": 0.85, + "denoising_strength": 0.6, + "dilation": 30 + } + }, + "pretraining_enabled": false, + "edge_flavor_enabled": false, + "edge_low_threadhold": 50, + "edge_high_threadhold": 200, + "edge_strength": 0.5, + "resize_intermediate_enabled": false, + "refiner_enabled": false, + "contrast": 1, + "brightness": 1, + "sharpeness": 1, + "color_saturation": 1, + "color_temperature": 0, + "noise_alpha": 0, + "noise_alpha_final": 0, + "input_image": null, + "blend_enabled": false, + "blend_alpha": 1, + "detect_enabled": false, + "masking_prompt": "", + "person_detailing_enabled": false, + "face_detailing_enabled": false, + "face_detailing_before_hiresfix_enabled": false, + "hand_detailing_enabled": false, + "resize_by_person_enabled": false, + "upscale_enabled": false, + "detailing_after_upscale": true, + "upscaler_name": "None", + "upscale_ratio": 1.5, + "config_file": "test", + "preset": "None" +} +``` + diff --git a/3-bmab/docs/kr/bmab.md b/3-bmab/docs/kr/bmab.md new file mode 100644 index 0000000000000000000000000000000000000000..c456a23acf9288aba487b7a76ac3b5c4d0f2f36c --- /dev/null +++ b/3-bmab/docs/kr/bmab.md @@ -0,0 +1,319 @@ + +# BMAB + +## 기본 기능 + +* Contrast : 대비값 조절 (1이면 변경 없음) +* Brightness : 밝기값 조절 (1이면 변경 없음) +* Sharpeness : 날카롭게 처리하는 값 조절 (1이면 변경 없음) +* Color Temperature : 색온도 조절, 6500K이 0 (0이면 변경 없음) +* Noise alpha : 프로세스 전에 노이즈를 추가하여 디테일을 올릴 수 있습니다. (권장값:0.1) +* Noise alpha at final stage : 최종 단계에서 노이즈를 추가하여 분위기를 다르게 전달할 수 있습니다. + + +## Imaging + +### Blend Image in Img2Img + +이미지 업로드 상자에 입력한 이미지와 Img2Img에 입력된 이미지를 Blending합니다. +Blend Alpha 값으로 두 개의 이미지를 합성합니다. +"Process before Img2Img" 옵션이 적용됩니다. + +### Dino detect + +Img2Img Inpainting 하는 경우에 마스크를 입력하지 않아도 Dino detect prompt에 있는 내용을 이용하여 자동으로 마스크를 생성합니다. +이미지를 업로드 하게되면 업로드된 이미지를 배경으로 하여 prompt로 입력된 부분을 업로드 이미지에 합성합니다. + +#### Img2Img 에서 사용하는 경우 + +

+ + +

+

+ + +

+ + + +첫번째 image는 Img2Img 이미지로 지정 +두번째 image는 BMAB의 Imaging에 Image 입력창에 지정 + +프로세스 과정에서 세번째 image를 합성하고 프롬프트에 따라서 결과가 얻어진다. + +Enabled : CHECK!! + +Contrast : 1.2 +Brightness : 0.9 +Sharpeness : 1.5 + +Enable dino detect : CHECK!! +DINO detect Prompt : 1girl + + +#### Img2Img Inpaint 에서 사용하는 경우 + +DINO detect Prompt에 있는 내용대로 자동으로 마스크를 만들어준다. + +

+ + + +

+ + +이번 예제에서는 배경을 변경했으니, inpaint 설정에서 "Inpaint Not Masked"를 선택해야 한다. +반대로 "Inpaint Masked"를 하면 인물이 변경된다. + + +## Person + +이 기능을 사용하게 되면 프로세스가 완료된 이후에, 인물을 감지하여 다시 그립니다. +아래의 경우에 사용하면 효과적입니다. + +* 인물이 배경에 비해 매우 작은 경우, 의복, 얼굴 등 인물 전체의 디테일이 올라갑니다. +* 4K와 같이 큰 이미지를 출력하는 경우, 업 스케일 이후에 인물이 작은 경우 이 기능을 사용하면 인물이 뚜렷해 집니다. +* Face Detailing과 같이 사용하면 좋은 효과를 볼 수 있습니다. + + + + + +#### Enable person detailing for landscape (EXPERIMENTAL) + +풍경에서 인물을 자세하게 다시 그리는 기능을 활성화 합니다. + +#### Block over-scaled image + +이 기능이 켜지게 되면 인물을 찾아내서 크게 키워서 다시 그리는데 이때 확대된 이미지의 면적이 본래 이미지를 초과하게 되면 프로세스를 멈춥니다. +sd-webui가 멈추거나 GPU를 보호하기 위한 목적입니다. + +#### Auto scale if "Block over-scaled image" enabled + +이 기능을 설정하면 위에서 언급한 "Block over-scaled image"로 차단될 경우 본래 이미지의 면적에 맞춰서 스케일을 조정하여 작업합니다. + +#### Upscale Ratio + +인물이 발견되면 주어진 비율로 키워서 자세하게 그립니다. + +#### Denoising Strength + +인물의 크기가 클 경우 0.4로 부족할 수 있습니다. 이런 경우 수치를 올려주세요. + +#### Dilation mask + +찾아낸 인물의 마스크를 확장합니다. + +#### CFG Scale + +인물을 다시 그릴때 사용하는 CFG scale 값입니다. + +#### Large person area limit + +인물이 이미지 속에서 차지하는 면적이 이 값을 초과하면 작업하지 않습니다. +인물이 충분히 큰 경우 다시 그릴 필요가 없기 때문입니다. + +#### Limit + +이미지 속에 인물이 너무 많은 경우 면적단위로 큰 것부터 카운트하여 설정값을 초과하여 다시 그리지 않습니다. + + + + + + +## Face + +### Face Detailing + +이 기능을 사용하게 되면 프로세스가 완료된 이후 After Detailer(AD)나 Detection Detailer(DD)와 같이 +얼굴을 보정합니다. +이 기능을 동작시킨 후에 AD, DD가 동작하도록 설정한다면, 결과가 좋지 않을 수 있습니다. + + + +최대 5개의 캐릭터에 대해 prompt를 별도로 지정할 수 있습니다. + +#### Enable face detailing + +face detailing 기능을 켜고 끌 수 있습니다. + +#### Enable face detailing before hires.fix (EXPERIMENTAL) + +face detailing 기능을 txt2img 과정의 hires.fix 직전에 한 번 더 수행합니다. +얼굴을 보정한 이후에 upscale을 하기 때문에 더 좋은 품질의 이미지를 얻을 수 있습니다. +하지만 부하가 더 들어가고, 이미지 변화가 심합니다. + +#### Face detailing sort by + +이미지 안에 여러 인물이 있는 경우 어떤 순서로 Detailing 할 것인지 결정합니다. + + + + +왼쪽, 오른쪽 혹은 크기로 가능하며 없다면 기본적으로 Score 값이 높은 순서로 합니다. + +#### Limit + +이미지 않에 여러 인물이 있는 경우 위에서 정한 순서로 얼마나 수행할지 결정합니다. +Limit이 1이라면 최대 1개만 수행한다는 뜻입니다. + +#### Override Parameters + +* Denoising Strength +* CFG Scale +* Width +* Height +* Steps +* Mask Blur + +위 값에 대해 기본값이 아닌 UI에서 지정한 값을 사용합니다. + +#### Inpaint Area + +전체를 다시 그릴지 얼굴만 다시 그릴지를 결정합니다. 전체를 다시 그리는 것은 별로 추천하지 않습니다. + +#### Only masked padding, pixels + +기본값을 사용해 주세요. + +#### Dilation + +검출된 얼굴의 마스크 크기를 키웁니다. + +#### Box threshold + +Detector의 검출 값을 결정합니다. 기본값 0.35보다 작으면 face가 아닐 것으로 제외합니다. +YOLO를 사용하는 경우 confidence를 대체합니다. + +**좋은 결과를 얻기 위한 조언** + +* Prompt에 얼굴 관련된 lora, textual inversion등 관련 내용을 뺍니다. sunglass 등은 무관합니다. +* 설정 파일에 얼굴마다 서로 다른 lora, textual inversion 등을 넣습니다. +* prompt에 lora, TI가 많을 경우 그림 생성 자유도가 떨어지는 것 같습니다. +* 그림속 모든 캐릭터가 공유되는 lora는 넣어주셔도 무방합니다. + + + +## Hand + +### Hand Detailing (EXPERIMENTAL) + +손 표현이 잘못된 부분을 수정하는 기능입니다. +만들어진 그림에서 손 부분을 자동으로 찾아내어 해당 부분을 다시 그리는 기능입니다. +다만 손의 경우 다시 그려도 잘 그려질지 확실하지 않으며, 손을 자세하게 그리는 정도입니다. + + + +#### Enable hand detailing + +손 보정 기능을 사용하도록 활성화 합니다. + +#### Block over-scaled image + +이 기능은 손을 찾아내어 확대해서 다시그리는 방법을 사용합니다. +다시 그려야 하는 부분의 면적이 원래이미지를 초과하게 되면 작업을 수행하지 않습니다. +이런 경우에는 Upscale Ratio를 줄이거나, 이 기능을 꺼야하는데, 이 기능을 끄면 매우 큰 그림을 다시 그릴 수도 있어서 GPU에 부하가 걸릴 수 있습니다.. + +#### Method +* subframe : 손을 포함하여 얼굴/머리 부분까지 찾아내어 상반신을 다시 그린다. +* each hand : 손을 찾아내여 3배 크기의 주변부 까지 다시 그려 손만 적용한다. +* each hand inpaint : 손을 찾아내어 3재 크기의 주변부를 기반으로 손만 다시 그린다. + 매우 극단적으로 변형될 수 있어서 잘 그려지기 어렵다 모양이 갖춰진다면, subframe으로 다시 그리는 것을 추천한다. +* at once : 찾아낸 손을 모두 한번에 다시 그린다. + + +#### Prompt + +Subframe에서는 입력하지 않을 것을 권장합니다. +each hand, each hand inpaint시에 손 관련 프롬프트를 입력합니다. + +#### Negative Prompt + +Subframe에서는 입력하지 않을 것을 권장합니다. +each hand, each hand inpaint시에 손 관련 네거티브 프롬프트를 입력합니다. + +#### Denoising Strength + +다시 그리는 경우 Denoising Strength 값 입니다. +* subframe : 0.4 권장 +* 기타 0.55 이상 권장 + +#### CFG Scale + +다시 그리는 경우 CFG Scale 값 입니다. + +#### Upscale Ratio +상반신 / 손 주변을 찾아내어 얼마나 크게 확대하여 다시 그릴 것인지 지정한다. +무조건 크게 그린다고 성공확률이 올라가는 것은 아니다. +* subframe : 2.0 +* 기타 : 2.0~4.0 + +#### Box Threshold + +손을 찾아내지 못하는 경우 이 값을 낮추면, 찾아낼 수 있는 확률이 올라갑니다. +하지만 잘 못 찾아낼 가능성도 올라갑니다. + +#### Box Dilation + +찾아낸 박스(손을 포함하여)의 외곽 부분을 얼마나 크게 할 것이 결정합니다. (only for subframe) + +#### Inpaint Area + +찾아낸 박스 전체를 다시 그릴 것인지, 손만 다시 그릴 것인지를 결정한다. +손만 다시그리는 경우 손 모양이 원하지 않게 바뀔 수 있으나 크게 변경된다. + +#### Only masked padding + +찾아낸 손의 내부 공간을 얼마 정도로 채울지를 결정합니다. 딱히 변경할 일 없습니다. + +#### Additional Parameter + +현재는 제공하지 않지만 향후 고급 사용자를 위한 옵션을 제공할 예정입니다. + + + +## ControlNet + +ControlNet을 이용하여 이미지에 노이즈를 추가하는 방법으로 디테일을 올리는 기능입니다. +ControlNet의 Lineart 모델에 가우시안 노이즈 이미지를 입력으로 사용하여, +결과물에 다양하고 복잡한 디테일을 추가합니다. + +#### Noise Strength + +노이즈 강도를 지정합니다. (0.4 권장) + +#### Begin + +Sampling 단계 시작점 + +#### End + +Sampling 단계 끝점 + +보통의 경우 0.4, 0, 0.4를 권장합니다. 혹은 이미지가 과도하게 그려질 경우 0.2, 0, 0.4 정도로 추천합니다. +과도하게 이미지가 그려진 경우 refiner를 사용하면 이미지를 어느 정도 안정시킬 수 있습니다. + +아래는 모두 같은 seed입니다. + + + + + + + + + + + +
기본이미지0.40.7
+ + + + + + +
+
+
diff --git a/3-bmab/docs/kr/manual.md b/3-bmab/docs/kr/manual.md new file mode 100644 index 0000000000000000000000000000000000000000..744aec132e00dc9cb7acd0293ba2e921feaf294e --- /dev/null +++ b/3-bmab/docs/kr/manual.md @@ -0,0 +1,65 @@ + +## Quick Test + +Enable을 체크하고 Config Tab에서 Preset "example"을 선택합니다. + +contrast: 1.2 +brightness: 0.9 +sharpeness: 1.5 + +Edge enhancement 적용 +Face Detailing 적용 +Resize by person 적용 + + + +## 기본 옵션 + +Enabled (VERSION): 기능을 켜고 끌 수 있습니다. + +### Resize and fill override + +Img2Img를 수행하는 경우 "Resize and fill" 을 선택하게 되면 +통상 좌우, 상하로 늘어나거나 비율이 같다면 그대로 크기만 변경됩니다. + +Enabled 된 상태에서는 항상 이미지가 아래에 위치하고, +왼쪽, 오른쪽, 윗쪽으로 비율에 맞게 늘어납니다. + +인물의 윗쪽으로 여백이 없는 경우에 적용하면 효과적입니다. +너무 크게 늘리게 되면 좋은 결과를 얻기 힘듭니다. +대략 1.1, 1.2 정도 스케일에서 사용하시길 권장합니다. + +

+ + +

+ +
+
+
+ +# Preprocess + +본 이미지 변경을 하기 전에 사전 처리 과정을 수행합니다. +조건에 따라 hires.fix 과정에 개입할 수도 있습니다. + +Preprocess + +# BMAB + +Person, Hand, Face detailer를 수행하거나, 이미지 합성 혹은 노이즈 추가등의 기능을 수행합니다. + +bmab + +# Postprocess + +이미지 처리가 완료된 이후, 인물의 크기에 따라 배경을 확장하거나 upscale을 할 수 있습니다. + +Postprocess + + +# API + +stable diffusion webui의 API 기능을 호출할때 BMAB extension을 사용할 수 있습니다. + +API Guide diff --git a/3-bmab/docs/kr/postprocess.md b/3-bmab/docs/kr/postprocess.md new file mode 100644 index 0000000000000000000000000000000000000000..2fc66330d0a198fb6b0640fcfa95091748bbf898 --- /dev/null +++ b/3-bmab/docs/kr/postprocess.md @@ -0,0 +1,66 @@ + + +# PostProcessing + +## Resize + +### Resize by person + +그림 속 인물중 가장 신장이 큰 사람의 길이와 그림 높이의 비율이 설정값을 넘어가면 비율을 설정값로 맞추는 기능입니다. +설정값이 0.90이고 인물의 전체 길이: 그림 높이의 비율이 0.95라고 한다면 +배경을 늘려서 인물의 비율이 0.90이 되도록 합니다. +배경은 왼쪽, 오른쪽, 위쪽으로 늘어납니다. + + +이 기능은 2가지 방법을 제공하는데 다음과 같습니다. + +#### Inpaint + +Face Detailing과 같은 방법으로 이미지가 완전히 생성되고 난 후에 주변부를 확장합니다. +이때 이미 생성된 이미지를 훼손하지 않고 주변부만 확장하기 때문에 직관적으로 확인이 가능합니다. +가장 빠르고 효과적으로 추천합니다. + +#### Inpaint + lama + +Inpaint와 같은 방법인데 BMAB에서 ControlNet을 불러서 Inpaint+lama를 이용해서 동작합니다. +이미지가 생성되고나서 디테일링 시작하기 전에 img2img를 이용하여 배경을 확장하여 전체적으로 인물을 작게 만드는 효과가 있습니다. + + + + + + + + + + + + + + +
+ +이 두가지 방법은 생성된 이미지를 축소하기만 할뿐 훼손하지 않습니다. +이것이 Resize intermediate와 다른점입니다. + + +## Upscaler + +최종적으로 이미지가 완성되고 난 이후에 이미지를 upscaler를 이용하여 크게 만듭니다. + + +#### Enable upscale at final stage + +이미지 생성이 완료되고 난 이후에 Upscale을 수행합니다. +960x540으로 생성하고 hires.fix를 x2로 하면 1920x1080 이미지가 나오는데 +여기서 Upscale을 x2를 하면 4K 이미지가 나오게 됩니다. + +#### Detailing after upscale + +이 옵션을 설정하면 위에서 언급한 Person, Face, Hand 에 대한 detailing을 upscale 이후에 수행합니다. + +#### Upscale Ratio + +이미지를 얼마나 upscale할지 결정합니다. + + diff --git a/3-bmab/docs/kr/preprocess.md b/3-bmab/docs/kr/preprocess.md new file mode 100644 index 0000000000000000000000000000000000000000..598d6670bd472d1a871ad0c69b7f975b7d17c4d2 --- /dev/null +++ b/3-bmab/docs/kr/preprocess.md @@ -0,0 +1,425 @@ + +# Preprocess + +## Context + +BMAB에서 사용할 Checkpoint와 VAE를 지정합니다. +특정 기능들은 자체 Checkpoint와 VAE를 설정할 수 있습니다. +한 번 변경된 Checkpoint는 그 이후 프로세스들이 계속 사용합니다. + + + +#### txt2img noise multiplier for hires.fix + +hires.fix 단계에서 noise를 추가 할 수 있다. + +#### txt2img extra noise multiplier for hires.fix (EXPERIMENTAL) + +hires.fix 단계에서 추가적인 noise를 더 할 수 있다. + +#### Hires.fix filter before upscaler + +Hires.fix 단계 중 upscaler 전에 filter를 적용할 수 있다. + +#### Hires.fix filter after upscaler + +Hires.fix 단계 중 upscaler 후에 filter를 적용할 수 있다. + + +## Resample (EXPERIMENTAL) + +Self resampling 기능입니다. txt2img -> hres.fix를 통해 생성된 이미지를 다시 txt2img -> hires.fix 과정을 수행하면서 +ControlNet Tile Resample을 수행합니다. 아래와 같은 경우 사용할 수 있습니다. + +* 두 모델간에 결과물 차이가 큰 경우 +* 두 모델간에 인물 비율이 차이나는 경우 +* 두 모델간 버전이 다른 경우 (SDXL, SD15) + + + + + + + + + + +
txt2img->hires.fixResample + BMAB Basic
+ + +BMAB resample image by [padapari](https://www.instagram.com/_padapari_/) + +
+
+
+
+ + + + +#### Enable self resample (EXPERIMENTAL) + +이 기능을 켜고 끌 수 있습니다. + +#### Save image before processing + +최초 txt2img -> hires.fix를 통해 생성된 이미지가 후 처리를 위해 BMAB로 입력되면, +해당 이미지를 프로세싱하기 전에 저장합니다. 이미지 postfix로 "-before-resample"이 붙습니다. + +#### Checkpoint + +SD Checkpoint를 지정할 수 있습니다. 지정하지 않는다면 앞에서 설정된 Checkpoint를 사용합니다. +프로세스가 완료되어도 원래 것으로 돌려놓지 않습니다. + +#### SD VAE + +SD VAE를 지정할 수 있습니다. 지정하지 않는다면 앞에서 설정된 VAE를 사용합니다. +프로세스가 완료되어도 원래 것으로 돌려놓지 않습니다. + +#### Resample method + +Resample 방법을 선택할 수 있습니다. + +txt2img-1pass : txt2img을 hires.fix 없이 동작시킨다. +txt2img-2pass : txt2img를 hires.fix로 동작시킨다. 기본적으로 이미지를 출력할 때 hires.fix가 동작해야하만 한다. +img2img-1pass : img2img로 동작시킨다. + +#### Resample filter + +Resample이 완료되고 난 이후에 외부 filter 코드를 호출하여 이미지 변환을 추가적으로 수행할 수 있다. + + +#### Resample prompt + +resampling 과정에서 사용할 prompt입니다. 비어있는 경우 main prompt와 동일하며, +"#!org!#" 를 입력하면 main prompt를 대체합니다. "#!org!#" 이후에 추가로 prompt를 적을 수 있습니다. +ex) #!org!#, soft light, some more keyword + +#### Resample negative prompt + +resampling 과정에서 사용할 prompt입니다. 비어있는 경우 main negative prompt와 동일합니다. + +#### Sampling method + +프로세스에 사용할 sampling method를 지정합니다. 지정하지 않는다면 이전 프로세스와 같은 sampler를 지정합니다. + +#### Upsacler + +hires.fix를 사용하는 경우에 지정하는 upscaler입니다. + +#### Resample sampling steps + +resample process 사용할 samping steps를 지정합니다. +(권장 20) + +#### Resample CFG scale + +resample process 사용할 CFG scale 값을 지정합니다. +dynamic threshold는 지원하지 않습니다. + +#### Resample denoising strength + +resample process가 사용할 denoising strength를 지정합니다. +(권장 0.4) + +#### Resample strength + +0에 가까운 값은 입력 이미지와 멀어지고, 1에 가까울 수록 원본 이미지와 유사합니다. + +#### Resample begin + +sampling 단계에 적용 시작점. + +#### Resample end + +sampling 단계 적용 종료 시점. + + + + + + + + +## Pretraining (EXPERIMENTAL) + +Pretraining detailer입니다. ultralytics로 pretraining 모델을 적용하여 detection을 수행하고 +이를 기반으로 prompt, negative prompt를 적용하여 부분적으로 이미지를 더 자세하게 그릴 수 있습니다. + + + +#### Enable pretraining detailer (EXPERIMENTAL) + +이 기능을 켜고 끌 수 있습니다. + +#### Enable pretraining before hires.fix + +pretraining detailer를 hires.fix 전에 수행하도록 한다. + +#### Pretraining model + +ultralytics 로 학습된 detection model (*.pt)를 지정할 수 있습니다. +stable-diffusion-webui/models/BMAB에 해당 파일이 있어야 목록에 나타납니다. + + +#### Pretraining prompt + +pretraining detailer process 과정에서 사용할 prompt입니다. 비어있는 경우 main prompt와 동일하며, +"#!org!#" 를 입력하면 main prompt를 대체합니다. "#!org!#" 이후에 추가로 prompt를 적을 수 있습니다. +ex) #!org!#, soft light, some more keyword + +#### Pretraining negative prompt + +pretraining detailer process 과정에서 사용할 prompt입니다. 비어있는 경우 main negative prompt와 동일합니다. + +#### Sampling method + +프로세스에 사용할 sampling method를 지정합니다. 지정하지 않는다면 이전 프로세스와 같은 sampler를 지정합니다. + + +#### Pretraining sampling steps + +resample process 사용할 samping steps를 지정합니다. +(권장 20) + +#### Pretraining CFG scale + +resample process 사용할 CFG scale 값을 지정합니다. +dynamic threshold는 지원하지 않습니다. + +#### Pretraining denoising strength + +resample process가 사용할 denoising strength를 지정합니다. +(권장 0.4) + +#### Pretraining dilation + +detection 된 사각형의 범위를 주어진 값 만큼 크기를 크게 합니다. + +#### Pretraining box threshold + +Detector의 검출 값을 결정합니다. 기본값 0.35보다 작으면 face가 아닐 것으로 제외합니다. +ultralytics predict의 confidence 값입니다. + + + +## Edge enhancemant + +이미지 경계를 강화해 선명도를 증가시키거나 디테일을 증가시키는 기능입니다. + +**Upscaler가 Latent 계열인 경우 동작하지 않습니다. (R-ESRGAN, 4x-UltraSharp 추천)** + + + +권장설정 + +* Edge low threshold : 50 +* Edge high threshold : 200 +* Edge strength : 0.5 + +

+ + +

+ +Enabled : CHECK!! + +Contrast : 1.2 +Brightness : 0.9 +Sharpeness : 1.5 + +Enable edge enhancement : CHECK!! +Edge low threshold : 50 +Edge high threshold : 200 +Edge strength : 0.5 + + + + +## Resize + +txt2img -> hires.fix 의 중간 과정에서 동작합니다.. +만약 img2img에서 사용한다면, 프로세스 시작 전에 동작합니다. + +그림 속 인물중 가장 신장이 큰 사람의 길이와 그림 높이의 비율이 설정값을 넘어가면 비율을 설정값로 맞추는 기능입니다. +설정값이 0.90이고 인물의 전체 길이: 그림 높이의 비율이 0.95라고 한다면 +배경을 늘려서 인물의 비율이 0.90이 되도록 합니다. +배경은 Alignment에서 지정한 방식에 따라 늘어납니다. + +txt2img 수행하는 단계에서 hires.fix 하기 직전에 이미지를 변경합니다. +이 과정은 변경된 이미지가 hires.fix 과정에서 매끄럽게 변하게 하기 위한 것입니다. +**denoising strength는 0.6~0.7 정도를 사용하셔야 주변부 이미지 왜곡이 발생하지 않습니다.** +**Upscaler가 Latent 계열인 경우 동작하지 않습니다. (R-ESRGAN, 4x-UltraSharp 추천)** + +#### Method + +Resize 하는 방식을 지정할 수 있습니다. + +* Stretching : 단순히 이미지를 외곽부분을 늘려서 배경을 확장합니다. +* inpaint : Stretching된 이미지를 mask를 사용하여 늘린 부분만 img2img inpainting을 수행합니다. +* inpaint+lama : Controlnet의 inpaint+lama 모델을 사용하여 확장된 영역을 다시 그립니다. +* inpaint_only : Controlnet의 inpaint_only를 사용하여 확장된 영역을 다시 그립니다. + + +#### Alignment + +이미지를 확장하고 원래 이미지를 어느 방향으로 정렬할 것인지를 결정합니다. + + + +#### Resize filter + +Resize가 완료되고 난 이후에 외부 filter 코드를 호출하여 이미지 변환을 추가적으로 수행할 수 있다. + + +#### Resize by person intermediate + +인물의 크기 비율을 나타냅니다. 이 값을 초과하면 이 크기가 되도록 배경을 확장시킵니다. + + + + + + + + + + + + + + +
OriginalResize 0.7Resize 0.5
+ + + + + + + + + + + + + + + + + + +
OriginalAlignment center
Alignment bottomAlignment bottom-left
+ + +Resize sample + + + + + + + + + + + + + + +## Refiner + +txt2img에서 만들어진 이미지를 한번 더 그리는 과정을 수행한다. +txt2img + hires.fix 가 된 상황에서도 유효하다. + +refiner는 이미지가 생성되고 detailing 하기전에 동작하며, +sd-webui의 hires.fix + refiner를 합친 동작과 비슷하다. + + + + + + + + + + + + + + +
txt2img(512x768)txt2img + hires.fix(800x1200)txt2img + hires.fix + refiner(1200x1800)
+ + +(위 예제는 결과를 모두 resize하여 동일한 크기이다.) + +위 예제와 같이 3단계로 처리할 수도 있으나, +hires.fix 단계 없이 refiner로 resize하여 처리할 수도 있다. + + + +

+ +

+ +#### Enable refiner + +refiner 사용 여부를 체크합니다. + +#### CheckPoint + +refiner를 이용하여 다시 그릴때 사용할 checkpoint를 지정합니다. + +#### Use this checkpoint for detailing + +위에서 지정한 checkpoint를 이용하여 detailing에 적용합니다. + +#### Prompt + +refiner가 이미지를 다시 그릴때 사용하는 prompt를 지정합니다. +비어있다면 main prompt와 동일하고,채워져 있다면 main prompt를 무시합니다. +만약 #!org!# 문자열이 있다면 main prompt를 대체합니다. + +#### Negative prompt + +refiner가 이미지를 다시 그릴때 사용하는 negative prompt를 지정합니다. + +#### Sampling method + +refiner가 사용할 sampler를 지정할 수 있습니다. +(Euler A 권장) + +#### Upscaler + +refiner가 이미지를 resize하는 경우 사용할 upscaler를 지정합니다. + +#### Refiner sampling steps + +refiner가 사용할 samping steps를 지정합니다. +(권장 20) + +#### Refiner CFG scale + +refiner가 사용할 CFG scale 값을 지정합니다. +dynamic threshold는 지원하지 않습니다. + +#### Refiner denoising strength + +refiner가 사용할 denoising strength를 지정합니다. +(권장 0.4) + +#### Refiner scale + +refiner가 주어진 이미지를 scale 값으로 resize합니다. +만약 refiner width, refiner height가 설정되어있다면 무시됩니다. + +#### Refiner width + +이미지 폭을 해당 값으로 강제로 설정합니다. + +#### Refiner height + +이미지 높이를 해당 값으로 강제로 설정합니다. + +
+
+
diff --git a/3-bmab/docs/masking.md b/3-bmab/docs/masking.md new file mode 100644 index 0000000000000000000000000000000000000000..fc6dfd392a1894f9808baf3e43b26682410ec06b --- /dev/null +++ b/3-bmab/docs/masking.md @@ -0,0 +1,31 @@ + +# Auto Masking + +## Detect + +* Img2Img 인물 사진을 추가한다. +* Just resize 선택 +* Resize를 하면서 크기를 키워야 좋은 결과를 얻을 수 있다. +예제의 경우는 512x768 --> 800x1200 으로 resize한 것이다. +* Denoising Strength는 0.4~0.6 정도를 설정한다. +모델별로 다르지만, 배경까지 심하게 바뀌는 경우 숫자를 줄인다. + +

+ +

+ +* BMAB를 Enable 한다. +* 반드시 Process before img2img를 체크한다. +* Detect enabled 체크하고 prompt에 person, 1girl, human...뭐든.. + +

+ +

+ +* 최종적으로 좋은 결과를 내기 위해서 합쳐진 이미지를 프롬프트로 잘 표현해야 한다. +* 프롬프트가 없거나 적당히 적으면 원하는 이미지에서 멀어지기 때문에 프롬프트가 매우 중요하다. + +

+ +

+ diff --git a/3-bmab/filter/Put filter file here.txt b/3-bmab/filter/Put filter file here.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/filter/basic.py b/3-bmab/filter/basic.py new file mode 100644 index 0000000000000000000000000000000000000000..e06528f9c58cc15f1f5fbd3a76a848adadb798a4 --- /dev/null +++ b/3-bmab/filter/basic.py @@ -0,0 +1,34 @@ +import os +from PIL import Image +from PIL import ImageEnhance + +import sd_bmab +from sd_bmab import util +from sd_bmab.base import filter +from sd_bmab.base import cache + + +class Filter(filter.BaseFilter): + + def preprocess(self, context, image, *args, **kwargs): + pass + + def basic_process(self, image: Image): + enhancer = ImageEnhance.Brightness(image) + image = enhancer.enhance(0.8) + enhancer = ImageEnhance.Contrast(image) + image = enhancer.enhance(1.2) + return image + + def basic_process_with_noise(self, processed: Image): + noise = cache.get_noise_from_cache(0, processed.width, processed.height).convert('LA') + noise = noise.convert('RGBA') + blended = Image.blend(processed.convert('RGBA'), noise, alpha=0.1) + return self.basic_process(blended.convert('RGB')) + + def process(self, context, image: Image, processed: Image, *args, **kwargs): + print('-----FILTER BASIC-----') + return self.basic_process(processed) + + def postprocess(self, context, *args, **kwargs): + pass diff --git a/3-bmab/filter/vintage.py b/3-bmab/filter/vintage.py new file mode 100644 index 0000000000000000000000000000000000000000..73e42a44db5f302deb65d5e2587de1a744483df3 --- /dev/null +++ b/3-bmab/filter/vintage.py @@ -0,0 +1,46 @@ +from PIL import Image +from PIL import ImageEnhance + +from sd_bmab.base import filter +from sd_bmab.base import cache +from sd_bmab.processors.basic import final + + +CONTRAST = 0.8 +BRIGHTNESS = 0.9 +SHARPNESS = 0.5 +COLOR = 0.85 +COLOR_TEMPERATURE = 5240 +NOISE = 0.05 + + +class Filter(filter.BaseFilter): + + def preprocess(self, context, image, *args, **kwargs): + pass + + def basic_process(self, image: Image): + enhancer = ImageEnhance.Contrast(image) + image = enhancer.enhance(CONTRAST) + enhancer = ImageEnhance.Brightness(image) + image = enhancer.enhance(BRIGHTNESS) + enhancer = ImageEnhance.Sharpness(image) + image = enhancer.enhance(SHARPNESS) + enhancer = ImageEnhance.Color(image) + image = enhancer.enhance(COLOR) + temp = final.calc_color_temperature(COLOR_TEMPERATURE) + az = [] + for d in image.getdata(): + az.append((int(d[0] * temp[0]), int(d[1] * temp[1]), int(d[2] * temp[2]))) + image = Image.new('RGB', image.size) + image.putdata(az) + noise = cache.get_noise_from_cache(0, image.size[0], image.size[1]) + image = Image.blend(image, noise, alpha=NOISE) + return image + + def process(self, context, image: Image, processed: Image, *args, **kwargs): + print('-----FILTER VINTAGE-----') + return self.basic_process(processed) + + def postprocess(self, context, *args, **kwargs): + pass diff --git a/3-bmab/install.py b/3-bmab/install.py new file mode 100644 index 0000000000000000000000000000000000000000..ddd307d57817bec50cd942078fe1a700b9acecae --- /dev/null +++ b/3-bmab/install.py @@ -0,0 +1,42 @@ +import json + +import launch + + +def install_segmentanything(): + launch.run_pip('install segment_anything') + + +def install_segmentanything_hq(): + launch.run_pip('install segment_anything_hq') + + +def install_ultralytics(): + launch.run_pip('install ultralytics') + + +def install_diffusers(): + launch.run_pip('install diffusers==0.27.2') + + +try: + from basicsr.utils.download_util import load_file_from_url +except: + launch.run_pip('install basicsr==1.4.2') + +required = { + ('segment_anything', install_segmentanything), + ('segment_anything_hq', install_segmentanything_hq), + ('ultralytics', install_ultralytics), + ('diffusers', install_diffusers) +} + +for pack_name, func in required: + if not launch.is_installed(pack_name): + func() + +''' +with open('ui-config.json', 'rt', encoding='UTF8') as f: + j = json.load(f) + print(json.dumps(j, indent=2)) +''' diff --git a/3-bmab/models/Put model file here.txt b/3-bmab/models/Put model file here.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/requirements.txt b/3-bmab/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c9d018e4b32a022d270bae7644dc72994338ca54 --- /dev/null +++ b/3-bmab/requirements.txt @@ -0,0 +1,9 @@ +segment_anything +segment_anything_hq +ultralytics +pillow + +basicsr==1.4.2 +kornia +omegaconf +numpy diff --git a/3-bmab/resources/preset/3girls.json b/3-bmab/resources/preset/3girls.json new file mode 100644 index 0000000000000000000000000000000000000000..2a34de9568123a619e64a89618095e82bb06ec9f --- /dev/null +++ b/3-bmab/resources/preset/3girls.json @@ -0,0 +1,46 @@ +{ + "enabled": true, + "contrast": 1.2, + "brightness": 0.9, + "sharpeness": 1.5, + "execute_before_img2img": true, + "edge_flavor_enabled": true, + "edge_low_threadhold": 50, + "edge_high_threadhold": 200, + "edge_strength": 0.5, + "resize_by_person_enabled": true, + "resize_by_person": 0.85, + "face_detailing_enabled": true, + "module_config": { + "multiple_face": [ + { + "denoising_strength": 0.40, + "prompt": "smile, #!org!#", + "inpaint_full_res": true, + "inpaint_full_res_padding": 32, + "cfg_scale": 7 + }, + { + "denoising_strength": 0.40, + "prompt": "sad, #!org!#", + "inpaint_full_res": true, + "inpaint_full_res_padding": 32, + "cfg_scale": 7 + }, + { + "denoising_strength": 0.40, + "prompt": "sad, #!org!#", + "width": 512, + "height": 512, + "inpaint_full_res": true, + "inpaint_full_res_padding": 32, + "cfg_scale": 7 + } + ], + "multiple_face_opt": { + "mask dilation": 4, + "limit": -1, + "order": "left" + } + } +} diff --git a/3-bmab/resources/preset/Put config file here.txt b/3-bmab/resources/preset/Put config file here.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/resources/preset/example.json b/3-bmab/resources/preset/example.json new file mode 100644 index 0000000000000000000000000000000000000000..a7866abeeb321a4cb98ed94c4c591ce5786f558a --- /dev/null +++ b/3-bmab/resources/preset/example.json @@ -0,0 +1,25 @@ +{ + "enabled": true, + "contrast": 1.2, + "brightness": 0.9, + "sharpeness": 1.5, + "edge_flavor_enabled": true, + "edge_low_threadhold": 50, + "edge_high_threadhold": 200, + "edge_strength": 0.5, + "resize_by_person_enabled": true, + "resize_by_person": 0.85, + "face_detailing_enabled": true, + "module_config": { + "face_detailing": { + "denoising_strength": 0.40, + "prompt": "smile, #!org!#", + "inpaint_full_res": true, + "inpaint_full_res_padding": 32, + "cfg_scale": 7 + }, + "face_detailing_opt": { + "mask dilation": 4 + } + } +} diff --git a/3-bmab/resources/preset/example2.json b/3-bmab/resources/preset/example2.json new file mode 100644 index 0000000000000000000000000000000000000000..b9369d02aa10b235e9ffcd60d74b8f19c0e25575 --- /dev/null +++ b/3-bmab/resources/preset/example2.json @@ -0,0 +1,27 @@ +{ + "enabled": true, + "contrast": 1.2, + "brightness": 0.9, + "sharpeness": 1.5, + "edge_flavor_enabled": true, + "edge_low_threadhold": 50, + "edge_high_threadhold": 200, + "edge_strength": 0.5, + "resize_by_person_enabled": true, + "resize_by_person": 0.85, + "face_detailing_enabled": true, + "module_config": { + "face_detailing": { + "denoising_strength": 0.40, + "prompt": ", (ulzzang-6500:0.4), #!org!#", + "width": 512, + "height": 512, + "inpaint_full_res": true, + "inpaint_full_res_padding": 32, + "cfg_scale": 7 + }, + "face_detailing_opt": { + "mask dilation": 4 + } + } +} diff --git a/3-bmab/resources/preset/hand.json b/3-bmab/resources/preset/hand.json new file mode 100644 index 0000000000000000000000000000000000000000..062fdd139e105101ca90b110e08d907036d2b67e --- /dev/null +++ b/3-bmab/resources/preset/hand.json @@ -0,0 +1,18 @@ +{ + "enabled": true, + "hand_detailing_enabled": true, + "module_config": { + "hand_detailing": { + "denoising_strength": 0.4, + "steps": 20, + "cfg_scale": 7, + "inpaint_full_res": 0, + "inpaint_full_res_padding": 32 + }, + "hand_detailing_opt": { + "scale": 2, + "mode": "inpaint", + "detailing method": "subframe" + } + } +} diff --git a/3-bmab/resources/preset/hand2.json b/3-bmab/resources/preset/hand2.json new file mode 100644 index 0000000000000000000000000000000000000000..47faa563408ff2d4eca56e22a40669c64f741434 --- /dev/null +++ b/3-bmab/resources/preset/hand2.json @@ -0,0 +1,20 @@ +{ + "enabled": true, + "hand_detailing_enabled": true, + "module_config": { + "hand_detailing": { + "denoising_strength": 0.5, + "prompt": "(good anatomy:1.2), (five fingers:1.3), pretty hands, detail hands, detail fingers, detail nails", + "negative_prompt": "(bad anatomy:1.2), (wrong anatomy:1.2), mutation, amputation, extra fingers, missing fingers, disconnected fingers", + "steps": 20, + "cfg_scale": 7, + "inpaint_full_res": 0, + "inpaint_full_res_padding": 32 + }, + "hand_detailing_opt": { + "scale": 4, + "mode": "inpaint", + "detailing method": "each hand" + } + } +} diff --git a/3-bmab/scripts/__pycache__/sd_webui_bmab.cpython-310.pyc b/3-bmab/scripts/__pycache__/sd_webui_bmab.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53889c66eaeed4661aea1d4062fb4896a6bea50a Binary files /dev/null and b/3-bmab/scripts/__pycache__/sd_webui_bmab.cpython-310.pyc differ diff --git a/3-bmab/scripts/sd_webui_bmab.py b/3-bmab/scripts/sd_webui_bmab.py new file mode 100644 index 0000000000000000000000000000000000000000..a7c8dd84372021594e9a9262bad294a35b935721 --- /dev/null +++ b/3-bmab/scripts/sd_webui_bmab.py @@ -0,0 +1 @@ +from sd_bmab.bmab import BmabExtScript diff --git a/3-bmab/sd_bmab/__init__.py b/3-bmab/sd_bmab/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/sd_bmab/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dea9ccd11d1166c93e6699f445ae63880f9a5871 Binary files /dev/null and b/3-bmab/sd_bmab/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/__pycache__/bmab.cpython-310.pyc b/3-bmab/sd_bmab/__pycache__/bmab.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b3b00521ddc0e9ad4318e82a19b1060ce6f4d4c Binary files /dev/null and b/3-bmab/sd_bmab/__pycache__/bmab.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/__pycache__/compat.cpython-310.pyc b/3-bmab/sd_bmab/__pycache__/compat.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46966f878fac3dfa212fabea62e42e93bf71d4ac Binary files /dev/null and b/3-bmab/sd_bmab/__pycache__/compat.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/__pycache__/constants.cpython-310.pyc b/3-bmab/sd_bmab/__pycache__/constants.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f6dead94e0f6f7c257b6a9b1e13de10050a23c3 Binary files /dev/null and b/3-bmab/sd_bmab/__pycache__/constants.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/__pycache__/controlnet.cpython-310.pyc b/3-bmab/sd_bmab/__pycache__/controlnet.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc2980d178a5cca1d92e0f8be8c1839259a62fa9 Binary files /dev/null and b/3-bmab/sd_bmab/__pycache__/controlnet.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/__pycache__/parameters.cpython-310.pyc b/3-bmab/sd_bmab/__pycache__/parameters.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c08cc7c88284409ccf858c3ad038ad8b4f7a743b Binary files /dev/null and b/3-bmab/sd_bmab/__pycache__/parameters.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/__pycache__/ui.cpython-310.pyc b/3-bmab/sd_bmab/__pycache__/ui.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c81bed351af6f44cec401ae8b2d6482088d9dbd Binary files /dev/null and b/3-bmab/sd_bmab/__pycache__/ui.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/base/__init__.py b/3-bmab/sd_bmab/base/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9555c2661006bed1a4333fcff7aa510e605d686b --- /dev/null +++ b/3-bmab/sd_bmab/base/__init__.py @@ -0,0 +1,7 @@ +from sd_bmab.base.common import VAEMethodOverride +from sd_bmab.base.context import Context +from sd_bmab.base.detectorbase import DetectorBase +from sd_bmab.base.processorbase import ProcessorBase +from sd_bmab.base.process import process_img2img, process_txt2img, build_img2img, apply_extensions, process_img2img_with_controlnet +from sd_bmab.base.maskbase import MaskBase + diff --git a/3-bmab/sd_bmab/base/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/base/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7833753d39bad11720c340ad4bdd0c43cc8ed78 Binary files /dev/null and b/3-bmab/sd_bmab/base/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/base/__pycache__/cache.cpython-310.pyc b/3-bmab/sd_bmab/base/__pycache__/cache.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c7956a04443d0a37fa045c44a1d5ab091a89f495 Binary files /dev/null and b/3-bmab/sd_bmab/base/__pycache__/cache.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/base/__pycache__/common.cpython-310.pyc b/3-bmab/sd_bmab/base/__pycache__/common.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f35a9e631a5982f29c780eed8ee66f97c0618b9d Binary files /dev/null and b/3-bmab/sd_bmab/base/__pycache__/common.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/base/__pycache__/context.cpython-310.pyc b/3-bmab/sd_bmab/base/__pycache__/context.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55214a1595bdc356e023dc93b2a37790d6b64165 Binary files /dev/null and b/3-bmab/sd_bmab/base/__pycache__/context.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/base/__pycache__/detectorbase.cpython-310.pyc b/3-bmab/sd_bmab/base/__pycache__/detectorbase.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b995167377f68098df6a56924901e0c42f20e404 Binary files /dev/null and b/3-bmab/sd_bmab/base/__pycache__/detectorbase.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/base/__pycache__/filter.cpython-310.pyc b/3-bmab/sd_bmab/base/__pycache__/filter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a8cde2c8bf4bf8d130c6001c26565b3089d52c5 Binary files /dev/null and b/3-bmab/sd_bmab/base/__pycache__/filter.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/base/__pycache__/maskbase.cpython-310.pyc b/3-bmab/sd_bmab/base/__pycache__/maskbase.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..353ac9018417c9029d8379e25725a42fe69974f3 Binary files /dev/null and b/3-bmab/sd_bmab/base/__pycache__/maskbase.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/base/__pycache__/process.cpython-310.pyc b/3-bmab/sd_bmab/base/__pycache__/process.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b86a0b4f6ce61570a2a9c841146145d62f923dfc Binary files /dev/null and b/3-bmab/sd_bmab/base/__pycache__/process.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/base/__pycache__/processorbase.cpython-310.pyc b/3-bmab/sd_bmab/base/__pycache__/processorbase.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..98ab08653107d0478b502a22f476e217cb3fdf08 Binary files /dev/null and b/3-bmab/sd_bmab/base/__pycache__/processorbase.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/base/cache.py b/3-bmab/sd_bmab/base/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..83e9ccb2068345020addf0908e923b09068eff19 --- /dev/null +++ b/3-bmab/sd_bmab/base/cache.py @@ -0,0 +1,40 @@ +import os +from PIL import Image + +import sd_bmab +from sd_bmab import util + + +def check_cache_dir(path): + if not os.path.exists(path): + os.mkdir(path) + + +def get_noise_from_cache(seed, width, height): + path = os.path.dirname(sd_bmab.__file__) + path = os.path.normpath(os.path.join(path, '../resources/cache')) + check_cache_dir(path) + cache_file = f'{path}/noise_{width}_{height}.png' + if os.path.isfile(cache_file): + return Image.open(cache_file) + img = util.generate_noise(seed, width, height) + img.save(cache_file) + return img + + +def get_image_from_cache(filename): + path = os.path.dirname(sd_bmab.__file__) + path = os.path.normpath(os.path.join(path, '../resources/cache')) + check_cache_dir(path) + full_path = os.path.join(path, filename) + if os.path.exists(full_path): + return Image.open(full_path) + return None + + +def put_image_to_cache(filename, image): + path = os.path.dirname(sd_bmab.__file__) + path = os.path.normpath(os.path.join(path, '../resources/cache')) + check_cache_dir(path) + full_path = os.path.join(path, filename) + image.save(full_path) diff --git a/3-bmab/sd_bmab/base/common.py b/3-bmab/sd_bmab/base/common.py new file mode 100644 index 0000000000000000000000000000000000000000..a2a261c1fb463bfee59f41eab494a6b9220da786 --- /dev/null +++ b/3-bmab/sd_bmab/base/common.py @@ -0,0 +1,47 @@ +from modules import shared + + +class VAEMethodOverride: + + def __init__(self, hiresfix=False) -> None: + super().__init__() + self.org_encode_method = None + self.org_decode_method = None + self.img2img_fix_steps = None + self.hiresfix = hiresfix + + def __enter__(self): + if ('sd_vae_encode_method' in shared.opts.data) and shared.opts.bmab_detail_full: + self.encode_method = shared.opts.sd_vae_encode_method + self.decode_method = shared.opts.sd_vae_decode_method + shared.opts.sd_vae_encode_method = 'Full' + shared.opts.sd_vae_decode_method = 'Full' + if self.hiresfix and not shared.opts.img2img_fix_steps: + self.img2img_fix_steps = shared.opts.img2img_fix_steps + shared.opts.img2img_fix_steps = True + + def __exit__(self, *args, **kwargs): + if ('sd_vae_encode_method' in shared.opts.data) and shared.opts.bmab_detail_full: + shared.opts.sd_vae_encode_method = self.encode_method + shared.opts.sd_vae_decode_method = self.decode_method + if self.img2img_fix_steps is not None: + shared.opts.img2img_fix_steps = self.img2img_fix_steps + + +class StopGeneration: + + def __init__(self) -> None: + super().__init__() + if not hasattr(shared.state, 'stopping_generation'): + return + self.stopping_generation = shared.state.stopping_generation + + def __enter__(self): + if not hasattr(shared.state, 'stopping_generation'): + return + shared.state.stopping_generation = False + + def __exit__(self, *args, **kwargs): + if not hasattr(shared.state, 'stopping_generation'): + return + shared.state.stopping_generation = self.stopping_generation diff --git a/3-bmab/sd_bmab/base/context.py b/3-bmab/sd_bmab/base/context.py new file mode 100644 index 0000000000000000000000000000000000000000..7a1beae413c12c44567051320f32480475b6264a --- /dev/null +++ b/3-bmab/sd_bmab/base/context.py @@ -0,0 +1,114 @@ +from modules import shared +from modules.processing import StableDiffusionProcessingImg2Img + +from sd_bmab.sd_override import StableDiffusionProcessingTxt2ImgOv, StableDiffusionProcessingImg2ImgOv +from sd_bmab import constants + + +class Context(object): + def __init__(self, s, p, a, idx, **kwargs) -> None: + super().__init__() + + self.script = s + self.sdprocessing = p + self.args = a + self.index = idx + self.refiner = None + self.sd_model_name = None + self.sd_vae_name = None + self.container = {} + + @staticmethod + def newContext(s, p, a, idx, **kwargs): + return Context(s, p, a, idx, **kwargs) + + def get_current_prompt(self): + return self.sdprocessing.prompt + + def get_prompt_by_index(self): + if self.sdprocessing.all_prompts is None or len(self.sdprocessing.all_prompts) <= self.index: + return self.sdprocessing.prompt + return self.sdprocessing.all_prompts[self.index] + + def get_negative_prompt_by_index(self): + if self.sdprocessing.all_negative_prompts is None or len(self.sdprocessing.all_negative_prompts) <= self.index: + return self.sdprocessing.negative_prompt + return self.sdprocessing.all_negative_prompts[self.index] + + def get_hires_prompt_by_index(self): + if self.sdprocessing.all_hr_prompts is None or len(self.sdprocessing.all_hr_prompts) <= self.index: + return self.sdprocessing.hr_prompt + return self.sdprocessing.all_hr_prompts[self.index] + + def get_hires_negative_prompt_by_index(self): + if self.sdprocessing.all_hr_negative_prompts is None or len(self.sdprocessing.all_hr_negative_prompts) <= self.index: + return self.sdprocessing.hr_negative_prompt + return self.sdprocessing.all_hr_negative_prompts[self.index] + + def get_seeds(self): + if self.sdprocessing.all_seeds is None or self.sdprocessing.all_subseeds is None: + return self.sdprocessing.seed, self.sdprocessing.subseed + if len(self.sdprocessing.all_seeds) <= self.index or len(self.sdprocessing.all_subseeds) <= self.index: + return self.sdprocessing.seed, self.sdprocessing.subseed + return self.sdprocessing.all_seeds[self.index], self.sdprocessing.all_subseeds[self.index] + + def get_max_area(self): + if shared.opts.bmab_optimize_vram == 'low vram': + return 512 * 768 + elif shared.opts.bmab_optimize_vram == 'med vram': + return self.sdprocessing.width * self.sdprocessing.height + if isinstance(self.sdprocessing, StableDiffusionProcessingTxt2ImgOv) and self.sdprocessing.enable_hr: + return self.sdprocessing.hr_upscale_to_x * self.sdprocessing.hr_upscale_to_y + return self.sdprocessing.width * self.sdprocessing.height + + def add_generation_param(self, key: object, value: object) -> object: + self.sdprocessing.extra_generation_params[key] = value + + def add_extra_image(self, image): + self.script.extra_image.append(image) + + def with_refiner(self): + return self.args.get('refiner_enabled', False) + + def is_refiner_context(self): + return self.refiner is not None + + def is_hires_fix(self): + if isinstance(self.sdprocessing, StableDiffusionProcessingTxt2ImgOv) and self.sdprocessing.enable_hr: + return True + return False + + def add_job(self, count=1): + shared.state.job_count += count + shared.state.sampling_step = 0 + shared.state.current_image_sampling_step = 0 + + def is_img2img(self): + return isinstance(self.sdprocessing, StableDiffusionProcessingImg2ImgOv) or isinstance(self.sdprocessing, StableDiffusionProcessingImg2Img) + + def is_txtimg(self): + return isinstance(self.sdprocessing, StableDiffusionProcessingTxt2ImgOv) + + def save_and_apply_checkpoint(self, checkpoint, vae): + self.sd_model_name = checkpoint + self.sd_vae_name = vae + + def restore_checkpoint(self): + self.sd_model_name = None + self.sd_vae_name = None + + def apply_checkpoint(self, options): + if self.sd_model_name is not None and self.sd_model_name != constants.checkpoint_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_model_checkpoint'] = self.sd_model_name + options['override_settings'] = override_settings + if self.sd_vae_name is not None and self.sd_vae_name != constants.vae_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_vae'] = self.sd_vae_name + options['override_settings'] = override_settings + + def save(self, key, value): + self.container[key] = value + + def load(self, key): + return self.container.get(key) diff --git a/3-bmab/sd_bmab/base/detectorbase.py b/3-bmab/sd_bmab/base/detectorbase.py new file mode 100644 index 0000000000000000000000000000000000000000..3e5dfd05271709d33557dd14ef8d7dc5b201e11d --- /dev/null +++ b/3-bmab/sd_bmab/base/detectorbase.py @@ -0,0 +1,16 @@ +from PIL import Image +from sd_bmab.base.context import Context + + +class DetectorBase(object): + def __init__(self, **kwargs) -> None: + super().__init__() + + def target(self): + pass + + def description(self): + pass + + def predict(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/base/filter.py b/3-bmab/sd_bmab/base/filter.py new file mode 100644 index 0000000000000000000000000000000000000000..2665b09b4ee95e6033f2a831629379ec445a1f74 --- /dev/null +++ b/3-bmab/sd_bmab/base/filter.py @@ -0,0 +1,92 @@ +import os +import sys +import glob +import importlib.util + +from PIL import Image + +import sd_bmab +from sd_bmab import constants +from sd_bmab.util import debug_print +from sd_bmab import controlnet + + +filters = [constants.filter_default] + + +class BaseFilter(object): + + def __init__(self) -> None: + super().__init__() + + def configurations(self): + return {} + + def is_controlnet_required(self): + return False + + def preprocess(self, context, image, *args, **kwargs): + pass + + def process(self, context, base: Image, processed: Image, *args, **kwargs): + return processed + + def postprocess(self, context, *args, **kwargs): + pass + + def finalprocess(self, context, *args, **kwargs): + pass + + +class NoneFilter(BaseFilter): + + def process_filter(self, context, base: Image, processed: Image, *args, **kwargs): + return processed + + +def reload_filters(): + global filters + filters = [constants.filter_default] + + path = os.path.dirname(sd_bmab.__file__) + path = os.path.normpath(os.path.join(path, '../filter')) + files = sorted(glob.glob(f'{path}/*.py')) + for file in files: + fname = os.path.splitext(os.path.basename(file))[0] + filters.append(fname) + + +def get_filter(name): + if name == 'None': + return NoneFilter() + debug_print('Filter', name) + path = os.path.dirname(sd_bmab.__file__) + path = os.path.normpath(os.path.join(path, '../filter')) + filter_path = f'{path}/{name}.py' + mod = load_module(filter_path, 'filter') + return eval(f'mod.Filter()') + + +def load_module(file_name, module_name): + spec = importlib.util.spec_from_file_location(module_name, file_name) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +def preprocess_filter(bmab_filter, context, image, *args, **kwargs): + bmab_filter.preprocess(context, image, *args, **kwargs) + + +def process_filter(bmab_filter, context, base: Image, processed: Image, *args, **kwargs): + return bmab_filter.process(context, base, processed, *args, **kwargs) + + +def postprocess_filter(bmab_filter, context, *args, **kwargs): + bmab_filter.postprocess(context, *args, **kwargs) + + +def finalprocess_filter(bmab_filter, context, *args, **kwargs): + bmab_filter.finalprocess(context, *args, **kwargs) + diff --git a/3-bmab/sd_bmab/base/maskbase.py b/3-bmab/sd_bmab/base/maskbase.py new file mode 100644 index 0000000000000000000000000000000000000000..2af2c4b08e00e2cc7a4c6c7cf37914b9478f8c3a --- /dev/null +++ b/3-bmab/sd_bmab/base/maskbase.py @@ -0,0 +1,22 @@ + +class MaskBase(object): + def __init__(self) -> None: + super().__init__() + + @property + def name(self): + pass + + @classmethod + def init(cls, *args, **kwargs): + pass + + def predict(self, image, box): + pass + + def predict_multiple(self, image, points, labels, boxes=None): + pass + + @classmethod + def release(cls): + pass diff --git a/3-bmab/sd_bmab/base/process.py b/3-bmab/sd_bmab/base/process.py new file mode 100644 index 0000000000000000000000000000000000000000..dbfa90cfc9e3f7d1a278c8a84178829a55a29c10 --- /dev/null +++ b/3-bmab/sd_bmab/base/process.py @@ -0,0 +1,226 @@ +from PIL import Image +from PIL import ImageDraw + +from copy import copy, deepcopy +from pathlib import Path + +from modules import shared +from modules import devices +from modules.processing import process_images + +from sd_bmab import util +from sd_bmab.base.common import StopGeneration +from sd_bmab.base.context import Context +from sd_bmab.sd_override import StableDiffusionProcessingTxt2ImgOv, StableDiffusionProcessingImg2ImgOv + + +def apply_extensions(p, cn_enabled=False): + script_runner = copy(p.scripts) + script_args = deepcopy(p.script_args) + active_script = ['dynamic_thresholding', 'wildcards'] + + if cn_enabled: + active_script.append('controlnet') + for idx, obj in enumerate(script_args): + if 'controlnet' in obj.__class__.__name__.lower(): + if hasattr(obj, 'enabled'): + obj.enabled = False + if hasattr(obj, 'input_mode'): + obj.input_mode = getattr(obj.input_mode, 'SIMPLE', 'simple') + elif isinstance(obj, dict) and 'module' in obj: + obj['enabled'] = False + + filtered_alwayson = [] + for script_object in script_runner.alwayson_scripts: + filepath = script_object.filename + filename = Path(filepath).stem + if filename in active_script: + filtered_alwayson.append(script_object) + + script_runner.alwayson_scripts = filtered_alwayson + return script_runner, script_args + + +def build_img2img(context: Context, img, options): + p = context.sdprocessing + img = img.convert('RGB') + + if 'inpaint_full_res' in options: + res = options['inpaint_full_res'] + if res == 'Whole picture': + options['inpaint_full_res'] = 0 + if res == 'Only masked': + options['inpaint_full_res'] = 1 + + i2i_param = dict( + init_images=[img], + resize_mode=0, + denoising_strength=0.4, + mask=None, + mask_blur=4, + inpainting_fill=1, + inpaint_full_res=True, + inpaint_full_res_padding=32, + inpainting_mask_invert=0, + initial_noise_multiplier=1.0, + outpath_samples=p.outpath_samples, + outpath_grids=p.outpath_grids, + prompt=p.prompt, + negative_prompt=p.negative_prompt, + styles=p.styles, + seed=p.seed, + subseed=p.subseed, + subseed_strength=p.subseed_strength, + seed_resize_from_h=p.seed_resize_from_h, + seed_resize_from_w=p.seed_resize_from_w, + sampler_name=p.sampler_name, + batch_size=1, + n_iter=1, + steps=p.steps, + cfg_scale=p.cfg_scale, + width=img.width, + height=img.height, + restore_faces=False, + tiling=p.tiling, + extra_generation_params=p.extra_generation_params, + do_not_save_samples=True, + do_not_save_grid=True, + override_settings={ + 'sd_model_checkpoint': shared.sd_model.sd_checkpoint_info.name_for_extra + }, + ) + + if hasattr(p, 'scheduler'): + i2i_param['scheduler'] = p.scheduler + else: + if 'scheduler' in options: + del options['scheduler'] + + context.apply_checkpoint(i2i_param) + if options is not None: + i2i_param.update(options) + + return i2i_param + + +def process_img2img(context: Context, img, options=None): + if shared.state.skipped or shared.state.interrupted: + return img + + i2i_param = build_img2img(context, img, options) + + img2img = StableDiffusionProcessingImg2ImgOv(**i2i_param) + img2img.cached_c = [None, None] + img2img.cached_uc = [None, None] + img2img.scripts, img2img.script_args = apply_extensions(context.sdprocessing) + + with StopGeneration(): + processed = process_images(img2img) + img = processed.images[0] + + img2img.close() + + devices.torch_gc() + return img + + +def process_img2img_with_controlnet(context: Context, image, options, controlnet): + i2i_param = build_img2img(context, image, options) + + img2img = StableDiffusionProcessingImg2ImgOv(**i2i_param) + img2img.cached_c = [None, None] + img2img.cached_uc = [None, None] + img2img.scripts, img2img.script_args = apply_extensions(context.sdprocessing, cn_enabled=True) + + cn_args_begin, cn_args_end = util.get_cn_args(img2img) + cn_args = range(cn_args_begin, cn_args_end) + sc_args = list(img2img.script_args) + for ix in range(0, len(controlnet), 1): + idx = cn_args[ix] + sc_args[idx] = controlnet[ix] + img2img.script_args = sc_args + + processed = process_images(img2img) + image = processed.images[0] + img2img.close() + devices.torch_gc() + + return image + + +def process_txt2img(context, options=None, controlnet=None, processor=None): + p = context.sdprocessing + t2i_param = dict( + denoising_strength=0.4, + outpath_samples=p.outpath_samples, + outpath_grids=p.outpath_grids, + prompt=p.prompt, + negative_prompt=p.negative_prompt, + styles=p.styles, + seed=p.seed, + subseed=p.subseed, + subseed_strength=p.subseed_strength, + seed_resize_from_h=p.seed_resize_from_h, + seed_resize_from_w=p.seed_resize_from_w, + sampler_name=p.sampler_name, + batch_size=1, + n_iter=1, + steps=p.steps, + cfg_scale=p.cfg_scale, + width=p.width, + height=p.height, + restore_faces=False, + tiling=p.tiling, + extra_generation_params=p.extra_generation_params, + do_not_save_samples=True, + do_not_save_grid=True, + override_settings={ + 'sd_model_checkpoint': shared.sd_model.sd_checkpoint_info.name_for_extra + }, + ) + + if hasattr(p, 'scheduler'): + t2i_param['scheduler'] = p.scheduler + else: + if 'scheduler' in options: + del options['scheduler'] + + context.apply_checkpoint(t2i_param) + if options is not None: + t2i_param.update(options) + + if processor: + txt2img = processor(**t2i_param) + else: + txt2img = StableDiffusionProcessingTxt2ImgOv(**t2i_param) + txt2img.context = context + txt2img.cached_c = [None, None] + txt2img.cached_uc = [None, None] + + if controlnet is None: + txt2img.scripts, txt2img.script_args = apply_extensions(p, False) + else: + txt2img.scripts, txt2img.script_args = apply_extensions(p, True) + cn_args_begin, cn_args_end = util.get_cn_args(txt2img) + cn_args = range(cn_args_begin, cn_args_end) + sc_args = list(txt2img.script_args) + for ix in range(0, len(controlnet), 1): + idx = cn_args[ix] + sc_args[idx] = controlnet[ix] + txt2img.script_args = sc_args + + with StopGeneration(): + processed = process_images(txt2img) + img = processed.images[0] + devices.torch_gc() + return img + + +def masked_image(img, xyxy): + x1, y1, x2, y2 = xyxy + check = img.convert('RGBA') + dd = Image.new('RGBA', img.size, (0, 0, 0, 0)) + dr = ImageDraw.Draw(dd, 'RGBA') + dr.rectangle((x1, y1, x2, y2), fill=(255, 0, 0, 255)) + check = Image.blend(check, dd, alpha=0.5) + check.convert('RGB').save('check.png') diff --git a/3-bmab/sd_bmab/base/processorbase.py b/3-bmab/sd_bmab/base/processorbase.py new file mode 100644 index 0000000000000000000000000000000000000000..b8c8dd3288a7dcea5451495bf338b018258eca30 --- /dev/null +++ b/3-bmab/sd_bmab/base/processorbase.py @@ -0,0 +1,22 @@ +from PIL import Image +from sd_bmab.base.context import Context + + +class ProcessorBase(object): + def __init__(self) -> None: + super().__init__() + + def use_controlnet(self, context: Context): + return False + + def preprocess(self, context: Context, image: Image): + pass + + def process(self, context: Context, image: Image): + pass + + def postprocess(self, context: Context, image: Image): + pass + + def finalprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/base/sam.py b/3-bmab/sd_bmab/base/sam.py new file mode 100644 index 0000000000000000000000000000000000000000..d36dc83a21934a9dca16534210803e68e2a7854d --- /dev/null +++ b/3-bmab/sd_bmab/base/sam.py @@ -0,0 +1,82 @@ +import cv2 +import os +import numpy as np + +import torch + +from PIL import Image +from modules.paths import models_path +from modules.safe import unsafe_torch_load, load +from modules.devices import device, torch_gc + +from segment_anything import SamPredictor +from segment_anything import sam_model_registry + +bmab_model_path = os.path.join(models_path, "bmab") + +sam_model = None + + +def sam_init(): + MODEL_TYPE = 'vit_b' + + global sam_model + if not sam_model: + torch.load = unsafe_torch_load + sam_model = sam_model_registry[MODEL_TYPE](checkpoint='%s/sam_vit_b_01ec64.pth' % bmab_model_path) + sam_model.to(device=device) + sam_model.eval() + torch.load = load + + return sam_model + + +def sam_predict(pilimg, boxes): + sam = sam_init() + + mask_predictor = SamPredictor(sam) + + numpy_image = np.array(pilimg) + opencv_image = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR) + mask_predictor.set_image(opencv_image) + + result = Image.new('L', pilimg.size, 0) + for box in boxes: + x1, y1, x2, y2 = box + + box = np.array([int(x1), int(y1), int(x2), int(y2)]) + masks, scores, logits = mask_predictor.predict( + box=box, + multimask_output=False + ) + + mask = Image.fromarray(masks[0]) + result.paste(mask, mask=mask) + + return result + + +def sam_predict_box(pilimg, box): + sam = sam_init() + + mask_predictor = SamPredictor(sam) + + numpy_image = np.array(pilimg) + opencv_image = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR) + mask_predictor.set_image(opencv_image) + + x1, y1, x2, y2 = box + box = np.array([int(x1), int(y1), int(x2), int(y2)]) + + masks, scores, logits = mask_predictor.predict( + box=box, + multimask_output=False + ) + + return Image.fromarray(masks[0]) + + +def release(): + global sam_model + sam_model = None + torch_gc() diff --git a/3-bmab/sd_bmab/bmab.py b/3-bmab/sd_bmab/bmab.py new file mode 100644 index 0000000000000000000000000000000000000000..86a5c518fbe07917b29af04a99e35aea33b05e60 --- /dev/null +++ b/3-bmab/sd_bmab/bmab.py @@ -0,0 +1,110 @@ +from modules import scripts +from modules import shared +from modules import script_callbacks +from modules import images + +from sd_bmab import parameters +from sd_bmab.base import context, filter + +from sd_bmab.pipeline import post +from sd_bmab.pipeline import internal +from sd_bmab import masking +from sd_bmab import ui +from sd_bmab import util +from sd_bmab import controlnet +from sd_bmab.sd_override import override_sd_webui, StableDiffusionProcessingTxt2ImgOv +from sd_bmab.sd_override import sd_models +from sd_bmab.compat import check_directory +from sd_bmab.processors.basic import preprocessfilter + + +check_directory() +override_sd_webui() +filter.reload_filters() + +if not shared.opts.data.get('bmab_for_developer', False): + util.check_models() + +if shared.opts.data.get('bmab_additional_checkpoint_path', '') != '': + sd_models.override() + + +class BmabExtScript(scripts.Script): + + def __init__(self) -> None: + super().__init__() + self.extra_image = [] + self.config = {} + self.index = 0 + self.stop_generation = False + + def title(self): + return 'BMAB' + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def ui(self, is_img2img): + return ui.create_ui(self, is_img2img) + + def before_process(self, p, *args): + self.stop_generation = False + self.extra_image = [] + ui.final_images = [] + ui.last_process = p + ui.bmab_script = self + self.index = 0 + self.config, a = parameters.parse_args(args) + if not a['enabled']: + return + + controlnet.update_controlnet_args(p) + if not hasattr(p, 'context') or p.context is None: + ctx = context.Context.newContext(self, p, a, 0, hiresfix=True) + p.context = ctx + preprocessfilter.run_preprocess_filter(ctx) + post.process_controlnet(p.context) + internal.process_img2img(p.context) + if isinstance(p, StableDiffusionProcessingTxt2ImgOv): + p.initial_noise_multiplier = a.get('txt2img_noise_multiplier', 1) + p.extra_noise = a.get('txt2img_extra_noise_multiplier', 0) + + def postprocess_image(self, p, pp, *args): + self.config, a = parameters.parse_args(args) + if not a['enabled']: + ui.final_images.append(pp.image) + return + + if shared.state.interrupted or shared.state.skipped: + return + + if hasattr(p, 'context') and p.context is not None: + p.context.index = self.index + with controlnet.PreventControlNet(p.context, cn_enabled=post.is_controlnet_required(p.context)): + pp.image = post.process(p.context, pp.image) + ui.final_images.append(pp.image) + self.index += 1 + if self.stop_generation: + shared.state.interrupted = True + + def postprocess(self, p, processed, *args): + if shared.opts.bmab_show_extends: + processed.images.extend(self.extra_image) + + post.release() + masking.release() + + def describe(self): + return 'This stuff is worth it, you can buy me a beer in return.' + + def resize_image(self, ctx: context.Context, resize_mode, idx, image, width, height, upscaler_name): + if not ctx.args['enabled']: + return images.resize_image(resize_mode, image, width, height, upscaler_name=upscaler_name) + with controlnet.PreventControlNet(ctx, cn_enabled=internal.is_controlnet_required(ctx)): + image = internal.process_intermediate_before_upscale(ctx, image) + image = images.resize_image(resize_mode, image, width, height, upscaler_name=upscaler_name) + image = internal.process_intermediate_after_upscale(ctx, image) + return image + + +script_callbacks.on_ui_settings(ui.on_ui_settings) diff --git a/3-bmab/sd_bmab/compat.py b/3-bmab/sd_bmab/compat.py new file mode 100644 index 0000000000000000000000000000000000000000..06c807a4fc9a53904cb0cf747b48632925e4822a --- /dev/null +++ b/3-bmab/sd_bmab/compat.py @@ -0,0 +1,13 @@ +import os +import shutil +import sd_bmab + + +def check_directory(): + target = ['cache', 'ipadapter', 'pose', 'saved'] + bmab_path = os.path.dirname(sd_bmab.__file__) + dest_path = os.path.normpath(os.path.join(bmab_path, f'../resources')) + for t in target: + path = os.path.normpath(os.path.join(bmab_path, f'../{t}')) + if os.path.exists(path) and os.path.isdir(path): + shutil.move(path, dest_path) diff --git a/3-bmab/sd_bmab/constants.py b/3-bmab/sd_bmab/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..7d0a1458f3506ecd1b3f023e4e0a4825077f902c --- /dev/null +++ b/3-bmab/sd_bmab/constants.py @@ -0,0 +1,39 @@ +sampler_default = 'Use same sampler' +scheduler_default = 'Use same scheduler' +resize_mode_default = 'Intermediate' +checkpoint_default = 'Use same checkpoint' +vae_default = 'Use same vae' +fast_upscaler = 'BMAB fast' +filter_default = 'None' +face_detector_default = 'BMAB Face(Normal)' + +checkpoint_description = ''' + +Specify Checkpoint and VAE to be used in BMAB.
+It applies to all functions, and if you change it to Checkpoint and VAE that exist for each function,
+it will be applied to all subsequent processes.
+''' + + +resize_description = ''' + + +
+txt2img --resize--> hires.fix --> BMAB Preprocess --> BMAB
+txt2img --resize--> BMAB Preprocess --> BMAB
+
+Methods
+stretching : Fast process. Please denoising strength should be over 0.6. (Only CPU).
+inpaint : Slow process but High quality. Repaint stretching image. (Use GPU).
+inpaint+lama : Very slow process but Very high quality. Repaint stretching image using ControlNet inpaint_only+lama (Use GPU with FIRE!!).
+inpaint_only : Very slow process but Very high quality. Repaint stretching image using ControlNet inpaint_only (Use GPU with FIRE!!).
+
+Please DO NOT SET Latent upscaler in hires.fix. +
+''' + +kohya_hiresfix_description = ''' + +This is EXPERIMENTAL function. + +''' diff --git a/3-bmab/sd_bmab/controlnet.py b/3-bmab/sd_bmab/controlnet.py new file mode 100644 index 0000000000000000000000000000000000000000..983727d0094362d518efccbec2011adb9a1d87ba --- /dev/null +++ b/3-bmab/sd_bmab/controlnet.py @@ -0,0 +1,147 @@ +from copy import copy + +from modules import shared +from modules import processing +from modules import img2img +from modules.processing import Processed + +from sd_bmab.util import debug_print, get_cn_args + + +controlnet_args = (0, 0) + + +class FakeControlNet: + def __init__(self, ctx, cn_enabled=False) -> None: + super().__init__() + self.context = ctx + self.process = ctx.sdprocessing + self.all_prompts = None + self.all_negative_prompts = None + self.extra_image = None + self.enabled = self.is_controlnet_enabled() if cn_enabled else False + self.control_index = [] + debug_print('FakeControlNet', self.enabled, cn_enabled) + + def __enter__(self): + if self.enabled: + dummy = Processed(self.process, [], self.process.seed, "") + self.all_prompts = copy(self.process.all_prompts) + self.all_negative_prompts = copy(self.process.all_negative_prompts) + self.extra_image = copy(self.context.script.extra_image) + self.process.scripts.postprocess(copy(self.process), dummy) + for idx, obj in enumerate(self.process.script_args): + if 'controlnet' in obj.__class__.__name__.lower(): + if hasattr(obj, 'enabled') and obj.enabled: + obj.enabled = False + self.control_index.append(idx) + elif isinstance(obj, dict) and 'model' in obj and obj['enabled']: + obj['enabled'] = False + self.control_index.append(idx) + + def __exit__(self, *args, **kwargs): + if self.enabled: + copy_p = copy(self.process) + self.process.all_prompts = self.all_prompts + self.process.all_negative_prompts = self.all_negative_prompts + self.extra_image.extend(self.context.script.extra_image) + for idx in self.control_index: + obj = self.process.script_args[idx] + if 'controlnet' in obj.__class__.__name__.lower(): + if hasattr(obj, 'enabled'): + obj.enabled = True + elif isinstance(obj, dict) and 'model' in obj: + obj['enabled'] = True + self.process.scripts.before_process(copy_p) + self.process.scripts.process(copy_p) + self.context.script.extra_image.extend(self.extra_image) + + def is_controlnet_enabled(self): + global controlnet_args + for idx in range(controlnet_args[0], controlnet_args[1]): + obj = self.process.script_args[idx] + if isinstance(obj, dict): + return True + if 'controlnet' in obj.__class__.__name__.lower(): + if hasattr(obj, 'enabled'): + return True + return False + + +class PreventControlNet(FakeControlNet): + process_images_inner = processing.process_images_inner + process_batch = img2img.process_batch + + def __init__(self, ctx, cn_enabled=False) -> None: + super().__init__(ctx, cn_enabled) + self._process_images_inner = processing.process_images_inner + self._process_batch = img2img.process_batch + self.allow_script_control = None + self.p = ctx.sdprocessing + self.all_prompts = copy(ctx.sdprocessing.all_prompts) + self.all_negative_prompts = copy(ctx.sdprocessing.all_negative_prompts) + + def is_controlnet_used(self): + if not self.p.script_args: + return False + + for idx, obj in enumerate(self.p.script_args): + if 'controlnet' in obj.__class__.__name__.lower(): + if hasattr(obj, 'enabled') and obj.enabled: + debug_print('Use controlnet True') + return True + elif isinstance(obj, dict) and 'module' in obj and obj['enabled']: + debug_print('Use controlnet True') + return True + + debug_print('Use controlnet False') + return False + + def __enter__(self): + model = self.p.sd_model.model.diffusion_model + if hasattr(model, '_original_forward'): + model._old_forward = self.p.sd_model.model.diffusion_model.forward + model.forward = getattr(model, '_original_forward') + + processing.process_images_inner = PreventControlNet.process_images_inner + img2img.process_batch = PreventControlNet.process_batch + if 'control_net_allow_script_control' in shared.opts.data: + self.allow_script_control = shared.opts.data['control_net_allow_script_control'] + shared.opts.data['control_net_allow_script_control'] = True + self.multiple_tqdm = shared.opts.data.get('multiple_tqdm', True) + shared.opts.data['multiple_tqdm'] = False + super().__enter__() + + def __exit__(self, *args, **kwargs): + processing.process_images_inner = self._process_images_inner + img2img.process_batch = self._process_batch + if 'control_net_allow_script_control' in shared.opts.data: + shared.opts.data['control_net_allow_script_control'] = self.allow_script_control + shared.opts.data['multiple_tqdm'] = self.multiple_tqdm + model = self.p.sd_model.model.diffusion_model + if hasattr(model, '_original_forward') and hasattr(model, '_old_forward'): + self.p.sd_model.model.diffusion_model.forward = model._old_forward + super().__exit__(*args, **kwargs) + + +def update_controlnet_args(p): + cn_arg_index = [] + for idx, obj in enumerate(p.script_args): + if 'controlnet' in obj.__class__.__name__.lower(): + cn_arg_index.append(idx) + global controlnet_args + controlnet_args = (cn_arg_index[0], cn_arg_index[-1]) + + +def get_controlnet_index(p): + cn_args = get_cn_args(p) + controlnet_count = 0 + for num in range(*cn_args): + obj = p.script_args[num] + if hasattr(obj, 'enabled') and obj.enabled: + controlnet_count += 1 + elif isinstance(obj, dict) and 'model' in obj and obj['enabled']: + controlnet_count += 1 + else: + break + return cn_args[0] + controlnet_count diff --git a/3-bmab/sd_bmab/detectors/__init__.py b/3-bmab/sd_bmab/detectors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..840b54a401cd3bc95c4087c87fdc4c3181fe0827 --- /dev/null +++ b/3-bmab/sd_bmab/detectors/__init__.py @@ -0,0 +1,15 @@ +from sd_bmab.detectors.face import UltralyticsFaceDetector8s +from sd_bmab.detectors.face import UltralyticsFaceDetector8nv2 +from sd_bmab.detectors.face import UltralyticsFaceDetector8n +from sd_bmab.detectors.face import UltralyticsFaceDetector8m +from sd_bmab.detectors.face import BmabFaceNormal +from sd_bmab.detectors.face import BmabFaceSmall +from sd_bmab.detectors.person import UltralyticsPersonDetector8m +from sd_bmab.detectors.person import UltralyticsPersonDetector8n +from sd_bmab.detectors.person import UltralyticsPersonDetector8s +from sd_bmab.detectors.hand import UltralyticsHandDetector8s +from sd_bmab.detectors.hand import UltralyticsHandDetector8n +from sd_bmab.detectors.hand import BmabHandDetector +from sd_bmab.detectors.detector import list_face_detectors, list_hand_detectors, list_person_detectors, get_detector + + diff --git a/3-bmab/sd_bmab/detectors/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/detectors/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..acc5e137e1a506f90d0b03e0f115ad0b9f80c3b1 Binary files /dev/null and b/3-bmab/sd_bmab/detectors/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/detectors/__pycache__/detector.cpython-310.pyc b/3-bmab/sd_bmab/detectors/__pycache__/detector.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be69b18c6bd18bde91dedc82d87f3fe9e3398ed0 Binary files /dev/null and b/3-bmab/sd_bmab/detectors/__pycache__/detector.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/detectors/__pycache__/face.cpython-310.pyc b/3-bmab/sd_bmab/detectors/__pycache__/face.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6d155e0ec87bed11862248d85194dd3c3c4d8f6 Binary files /dev/null and b/3-bmab/sd_bmab/detectors/__pycache__/face.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/detectors/__pycache__/hand.cpython-310.pyc b/3-bmab/sd_bmab/detectors/__pycache__/hand.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d356cfabf3a5a5fd0a241f8dbf8fba952b4eccea Binary files /dev/null and b/3-bmab/sd_bmab/detectors/__pycache__/hand.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/detectors/__pycache__/person.cpython-310.pyc b/3-bmab/sd_bmab/detectors/__pycache__/person.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e32e5b2b63d9ccf5a833d70e30cd32224fe1a36 Binary files /dev/null and b/3-bmab/sd_bmab/detectors/__pycache__/person.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/detectors/anything.py b/3-bmab/sd_bmab/detectors/anything.py new file mode 100644 index 0000000000000000000000000000000000000000..6eeeb0748970e461bdb5f30d46f8b5c78cda1185 --- /dev/null +++ b/3-bmab/sd_bmab/detectors/anything.py @@ -0,0 +1,15 @@ +from sd_bmab.base.detectorbase import DetectorBase + + +class UltralyticsAnythingDetector(DetectorBase): + def __init__(self) -> None: + super().__init__() + + def target(self): + pass + + def description(self): + pass + + def detect(self, context, image): + pass diff --git a/3-bmab/sd_bmab/detectors/detector.py b/3-bmab/sd_bmab/detectors/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..7abf6c9ae156755858219562d7311e2b36032a94 --- /dev/null +++ b/3-bmab/sd_bmab/detectors/detector.py @@ -0,0 +1,67 @@ +from sd_bmab.base.context import Context + +from sd_bmab.detectors.person import UltralyticsPersonDetector8m +from sd_bmab.detectors.person import UltralyticsPersonDetector8n, UltralyticsPersonDetector8s +from sd_bmab.detectors.face import UltralyticsFaceDetector8n, UltralyticsFaceDetector8s +from sd_bmab.detectors.face import UltralyticsFaceDetector8nv2, UltralyticsFaceDetector8m +from sd_bmab.detectors.face import BmabFaceSmall, BmabFaceNormal +from sd_bmab.detectors.hand import UltralyticsHandDetector8n, UltralyticsHandDetector8s +from sd_bmab.util import debug_print + + +def get_detector(context: Context, model: str, **kwargs): + + debug_print('model', model) + if model == 'face_yolov8n.pt': + return UltralyticsFaceDetector8n(**kwargs) + + all_detectors = [ + BmabFaceNormal(**kwargs), + BmabFaceSmall(**kwargs), + UltralyticsPersonDetector8m(**kwargs), + UltralyticsPersonDetector8n(**kwargs), + UltralyticsPersonDetector8s(**kwargs), + UltralyticsFaceDetector8n(**kwargs), + UltralyticsFaceDetector8nv2(**kwargs), + UltralyticsFaceDetector8m(**kwargs), + UltralyticsFaceDetector8s(**kwargs), + UltralyticsHandDetector8n(**kwargs), + UltralyticsHandDetector8s(**kwargs), + ] + + targets = [x for x in all_detectors if model == x.target()] + if len(targets) == 1: + return targets[0] + raise Exception(f'Not found or multiple detector {model}') + + +def list_person_detectors(): + kwargs = {} + person_detectors = [ + UltralyticsPersonDetector8m(**kwargs), + UltralyticsPersonDetector8n(**kwargs), + UltralyticsPersonDetector8s(**kwargs), + ] + return [x.target() for x in person_detectors] + + +def list_face_detectors(): + kwargs = {} + face_detectors = [ + BmabFaceNormal(**kwargs), + BmabFaceSmall(**kwargs), + UltralyticsFaceDetector8n(**kwargs), + UltralyticsFaceDetector8nv2(**kwargs), + UltralyticsFaceDetector8m(**kwargs), + UltralyticsFaceDetector8s(**kwargs), + ] + return [x.target() for x in face_detectors] + + +def list_hand_detectors(): + kwargs = {} + hand_detectors = [ + UltralyticsHandDetector8n(**kwargs), + UltralyticsHandDetector8s(**kwargs), + ] + return [x.target() for x in hand_detectors] diff --git a/3-bmab/sd_bmab/detectors/face.py b/3-bmab/sd_bmab/detectors/face.py new file mode 100644 index 0000000000000000000000000000000000000000..d26967d1138cb4495056f11bbe716000eeb63212 --- /dev/null +++ b/3-bmab/sd_bmab/detectors/face.py @@ -0,0 +1,119 @@ +import torch +from PIL import Image +from ultralytics import YOLO + +import modules +from modules import images +from modules import shared + +from sd_bmab import util +from sd_bmab.base.context import Context +from sd_bmab.base.detectorbase import DetectorBase + + +class FaceDetector(DetectorBase): + + def description(self): + return f'Face detecting using {self.target()}' + + +class UltralyticsFaceDetector(FaceDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.confidence = kwargs.get('box_threshold', 0.35) + self.model = None + + def target(self): + return f'Ultralytics({self.model})' + + def predict(self, context: Context, image: Image): + yolo = util.lazy_loader(self.model) + boxes = [] + confs = [] + load = torch.load + torch.load = modules.safe.unsafe_torch_load + try: + model = YOLO(yolo) + pred = model(image, conf=self.confidence, device='') + boxes = pred[0].boxes.xyxy.cpu().numpy() + boxes = boxes.tolist() + confs = pred[0].boxes.conf.tolist() + except: + pass + torch.load = load + return boxes, confs + + +class UltralyticsFaceDetector8n(UltralyticsFaceDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'face_yolov8n.pt' + + +class UltralyticsFaceDetector8m(UltralyticsFaceDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'face_yolov8m.pt' + + +class UltralyticsFaceDetector8nv2(UltralyticsFaceDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'face_yolov8n_v2.pt' + + +class UltralyticsFaceDetector8s(UltralyticsFaceDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'face_yolov8s.pt' + + +class BmabFaceSmall(UltralyticsFaceDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'bmab_face_sm_yolov8n.pt' + + def target(self): + return 'BMAB Face(Small)' + + def predict(self, context: Context, image: Image): + if shared.opts.bmab_debug_logging: + boxes, logits = super().predict(context, image) + if len(boxes) == 0: + images.save_image( + image, context.sdprocessing.outpath_samples, '', + context.sdprocessing.all_seeds[context.index], context.sdprocessing.all_prompts[context.index], + shared.opts.samples_format, p=context.sdprocessing, suffix="-debugging") + det = UltralyticsFaceDetector8n() + return det.predict(context, image) + return boxes, logits + else: + return super().predict(context, image) + + +class BmabFaceNormal(UltralyticsFaceDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'bmab_face_nm_yolov8n.pt' + + def target(self): + return 'BMAB Face(Normal)' + + def predict(self, context: Context, image: Image): + if shared.opts.bmab_debug_logging: + boxes, logits = super().predict(context, image) + if len(boxes) == 0: + images.save_image( + image, context.sdprocessing.outpath_samples, '', + context.sdprocessing.all_seeds[context.index], context.sdprocessing.all_prompts[context.index], + shared.opts.samples_format, p=context.sdprocessing, suffix="-debugging") + det = UltralyticsFaceDetector8n() + return det.predict(context, image) + return boxes, logits + else: + return super().predict(context, image) + + + + + diff --git a/3-bmab/sd_bmab/detectors/hand.py b/3-bmab/sd_bmab/detectors/hand.py new file mode 100644 index 0000000000000000000000000000000000000000..c8b5b6810e338b7b89f657fe2d1eaae94e7f4262 --- /dev/null +++ b/3-bmab/sd_bmab/detectors/hand.py @@ -0,0 +1,63 @@ +import torch +from PIL import Image +from ultralytics import YOLO + +import modules + +from sd_bmab import util +from sd_bmab.base.context import Context +from sd_bmab.base.detectorbase import DetectorBase + + +class HandDetector(DetectorBase): + + def description(self): + return f'Hand detecting using {self.target()}' + + +class UltralyticsHandDetector(HandDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.confidence = kwargs.get('box_threshold', 0.35) + self.model = None + + def target(self): + return f'Ultralytics({self.model})' + + def predict(self, context: Context, image: Image): + yolo = util.lazy_loader(self.model) + boxes = [] + confs = [] + load = torch.load + torch.load = modules.safe.unsafe_torch_load + try: + model = YOLO(yolo) + pred = model(image, conf=self.confidence, device='') + boxes = pred[0].boxes.xyxy.cpu().numpy() + boxes = boxes.tolist() + confs = pred[0].boxes.conf.tolist() + except: + pass + torch.load = load + return boxes, confs + + +class UltralyticsHandDetector8n(UltralyticsHandDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'hand_yolov8n.pt' + + +class UltralyticsHandDetector8s(UltralyticsHandDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'hand_yolov8s.pt' + + +class BmabHandDetector(UltralyticsHandDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'bmab_hand_yolov8n.pt' + + def target(self): + return 'BMAB Hand(Normal)' diff --git a/3-bmab/sd_bmab/detectors/person.py b/3-bmab/sd_bmab/detectors/person.py new file mode 100644 index 0000000000000000000000000000000000000000..cd3fb53abbfc552d3491bb7b6c69d08105c1bd9b --- /dev/null +++ b/3-bmab/sd_bmab/detectors/person.py @@ -0,0 +1,60 @@ +import torch +from PIL import Image +from ultralytics import YOLO + +import modules + +from sd_bmab import util +from sd_bmab.base.context import Context +from sd_bmab.base.detectorbase import DetectorBase + + +class PersonDetector(DetectorBase): + + def description(self): + return f'Person detecting using {self.target()}' + + +class UltralyticsPersonDetector(PersonDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.confidence = kwargs.get('box_threshold', 0.35) + self.model = None + + def target(self): + return f'Ultralytics({self.model})' + + def predict(self, context: Context, image: Image): + yolo = util.lazy_loader(self.model) + boxes = [] + confs = [] + load = torch.load + torch.load = modules.safe.unsafe_torch_load + try: + model = YOLO(yolo) + pred = model(image, conf=self.confidence, device='') + boxes = pred[0].boxes.xyxy.cpu().numpy() + boxes = boxes.tolist() + confs = pred[0].boxes.conf.tolist() + except: + pass + torch.load = load + return boxes, confs + + +class UltralyticsPersonDetector8n(UltralyticsPersonDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'person_yolov8n-seg.pt' + + +class UltralyticsPersonDetector8m(UltralyticsPersonDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'person_yolov8m-seg.pt' + + +class UltralyticsPersonDetector8s(UltralyticsPersonDetector): + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.model = 'person_yolov8s-seg.pt' diff --git a/3-bmab/sd_bmab/external/__init__.py b/3-bmab/sd_bmab/external/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..18dd7c61db144447b0093f9c1ce1bd2d03946a3c --- /dev/null +++ b/3-bmab/sd_bmab/external/__init__.py @@ -0,0 +1,30 @@ +import os +import sys +import importlib.util + + +def load_external_module(module, name): + path = os.path.dirname(__file__) + path = os.path.normpath(os.path.join(path, f'{module}/{name}.py')) + return load_module(path, 'module') + + +def load_module(file_name, module_name): + spec = importlib.util.spec_from_file_location(module_name, file_name) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +class ModuleAutoLoader(object): + + def __init__(self, module, name) -> None: + super().__init__() + self.mod = load_external_module(module, name) + + def __enter__(self): + return self.mod + + def __exit__(self, *args, **kwargs): + self.mod.release() diff --git a/3-bmab/sd_bmab/external/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/external/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8dda5da69079952e543e6f1af267e88bf3a7ae8e Binary files /dev/null and b/3-bmab/sd_bmab/external/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/groundingdino/grdino.py b/3-bmab/sd_bmab/external/groundingdino/grdino.py new file mode 100644 index 0000000000000000000000000000000000000000..f96a513a4b4a39065d7d1f27353fb68c8d2c2669 --- /dev/null +++ b/3-bmab/sd_bmab/external/groundingdino/grdino.py @@ -0,0 +1,58 @@ +import numpy as np + +import torch + +from groundingdino.util.inference import load_model, predict +from modules.devices import device, torch_gc + +from torchvision.ops import box_convert +import groundingdino.datasets.transforms as T + +from sd_bmab import util + + +dino_model = None + + +def dino_init(): + global dino_model + if not dino_model: + util.debug_print('initialize grounding dino.') + swint_ogc = util.lazy_loader('GroundingDINO_SwinT_OGC.py') + swint_ogc_pth = util.lazy_loader('groundingdino_swint_ogc.pth') + dino_model = load_model(swint_ogc, swint_ogc_pth) + return dino_model + + +def dino_predict(pilimg, prompt, box_threahold=0.35, text_threshold=0.25): + transform = T.Compose( + [ + T.RandomResize([800], max_size=1333), + T.ToTensor(), + T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ] + ) + img = pilimg.convert('RGB') + image_source = np.asarray(img) + image, _ = transform(img, None) + + model = dino_init() + boxes, logits, phrases = predict( + model=model, + image=image, + caption=prompt, + box_threshold=box_threahold, + text_threshold=text_threshold + ) + + h, w, _ = image_source.shape + boxes = boxes * torch.Tensor([w, h, w, h]) + annotated_frame = box_convert(boxes=boxes, in_fmt='cxcywh', out_fmt='xyxy').numpy() + + return annotated_frame, logits, phrases + + +def release(): + global dino_model + del dino_model + torch_gc() diff --git a/3-bmab/sd_bmab/external/iclight/__init__.py b/3-bmab/sd_bmab/external/iclight/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/sd_bmab/external/iclight/bmabiclight.py b/3-bmab/sd_bmab/external/iclight/bmabiclight.py new file mode 100644 index 0000000000000000000000000000000000000000..44388079904f664c36891d7bf3c889b2728ee1bd --- /dev/null +++ b/3-bmab/sd_bmab/external/iclight/bmabiclight.py @@ -0,0 +1,157 @@ +import torch +import numpy as np + +from ultralytics import YOLO + +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFilter + +import modules +from modules import devices + +from sd_bmab import util +from sd_bmab.base import sam +from sd_bmab.external import load_external_module + + +def process_iclight(context, image, bg_image, prompt, blending, bg_source, arg1, arg2): + np_image = np.array(image.convert('RGB')).astype("uint8") + + if bg_image is None: + mod = load_external_module('iclight', 'iclightnm') + input_fg, matting = mod.run_rmbg(np_image) + seed, subseed = context.get_seeds() + result = mod.process_relight(input_fg, prompt, image.width, image.height, 1, seed, 25, + 'best quality', 'lowres, bad anatomy, bad hands, cropped, worst quality', + arg1[0], arg1[1], arg1[2], arg1[3], bg_source) + mod.release() + fg = Image.fromarray(input_fg.astype('uint8'), 'RGB') + context.add_extra_image(image) + context.add_extra_image(fg) + context.add_extra_image(result) + else: + mod = load_external_module('iclight', 'iclightbg') + input_fg, matting = mod.run_rmbg(np_image) + seed, subseed = context.get_seeds() + result = mod.process_relight(input_fg, None, prompt, image.width, image.height, 1, seed, 20, + 'best quality', 'lowres, bad anatomy, bad hands, cropped, worst quality', + arg2[0], arg2[1], arg2[2], bg_source) + mod.release() + context.add_extra_image(image) + context.add_extra_image(bg_image) + context.add_extra_image(result) + return result + + +def process_bmab_relight(context, image, bg_image, prompt, blending, bg_source, arg1): + mod = load_external_module('iclight', 'iclightbg') + seed, subseed = context.get_seeds() + img1 = image.convert('RGBA') + if bg_image is None: + print('BG Source', bg_source) + if bg_source == 'Face' or bg_source == 'Person': + img2 = generate_detection_gradient(image, bg_source) + context.add_extra_image(img2) + else: + img2 = generate_gradient((32, 32, 32), (224, 224, 224), image.width, image.height, bg_source) + img2 = img2.convert('RGBA') + else: + img2 = bg_image.resize(img1.size, Image.LANCZOS).convert('RGBA') + + blended = Image.blend(img1, img2, alpha=blending) + np_image = np.array(image.convert('RGB')).astype("uint8") + input_bg = np.array(blended.convert('RGB')).astype("uint8") + input_fg, matting = mod.run_rmbg(np_image) + result = mod.process_relight(input_fg, input_bg, prompt, image.width, image.height, 1, seed, 20, + 'best quality', 'lowres, bad anatomy, bad hands, cropped, worst quality', + arg1[0], arg1[1], arg1[2], 'Use Background Image') + mod.release() + return result + + +def generate_gradient( + colour1, colour2, width: int, height: int, d) -> Image: + """Generate a vertical gradient.""" + base = Image.new('RGB', (width, height), colour1) + top = Image.new('RGB', (width, height), colour2) + mask = Image.new('L', (width, height)) + mask_data = [] + if d == 'Left': + for y in range(height): + mask_data.extend([255 - int(255 * (x / width)) for x in range(width)]) + if d == 'Right': + for y in range(height): + mask_data.extend([int(255 * (x / width)) for x in range(width)]) + if d == 'Bottom': + for y in range(height): + mask_data.extend([int(255 * (y / height))] * width) + if d == 'Top': + for y in range(height): + mask_data.extend([255 - int(255 * (y / height))] * width) + mask.putdata(mask_data) + base.paste(top, (0, 0), mask) + return base + + +def predict(image: Image, model, confidence): + yolo = util.load_pretraining_model(model) + boxes = [] + confs = [] + load = torch.load + torch.load = modules.safe.unsafe_torch_load + try: + model = YOLO(yolo) + pred = model(image, conf=confidence, device='') + boxes = pred[0].boxes.xyxy.cpu().numpy() + boxes = boxes.tolist() + confs = pred[0].boxes.conf.tolist() + except: + pass + torch.load = load + devices.torch_gc() + + return boxes, confs + + +def generate_detection_gradient(image, model): + mask = Image.new('L', (512, 768), 32) + dr = ImageDraw.Draw(mask, 'L') + + if model == 'Face': + boxes, confs = predict(image, 'face_yolov8n.pt', 0.35) + for box, conf in zip(boxes, confs): + x1, y1, x2, y2 = tuple(int(x) for x in box) + dx = int((x2-x1)) + dy = int((y2-y1)) + dr.ellipse((x1 - dx, y1 - dy, x2 + dx, y2 + dy), fill=225) + blur = ImageFilter.GaussianBlur(10) + elif model == 'Person': + boxes, confs = predict(image, 'person_yolov8n-seg.pt', 0.35) + for box, conf in zip(boxes, confs): + x1, y1, x2, y2 = tuple(int(x) for x in box) + m = sam.sam_predict_box(image, (x1, y1, x2, y2)) + mask.paste(m, mask=m) + blur = ImageFilter.GaussianBlur(30) + else: + return mask + return mask.filter(blur) + + +def bmab_relight(context, process_type, image, bg_image, prompt, blending, bg_source): + if process_type == 'intensive': + if bg_source == 'Face' or bg_source == 'Person': + bg_source = 'None' + return process_iclight(context, image, bg_image, prompt, blending, bg_source, (2, 1.0, 0.5, 0.9), (7, 1.0, 0.5)) + elif process_type == 'less intensive': + if bg_source == 'Face' or bg_source == 'Person': + bg_source = 'None' + return process_iclight(context, image, bg_image, prompt, blending, bg_source, (2, 1.0, 0.45, 0.85), (7, 1.0, 0.45)) + elif process_type == 'normal': + return process_bmab_relight(context, image, bg_image, prompt, blending, bg_source, (7, 1.0, 0.45)) + elif process_type == 'soft': + return process_bmab_relight(context, image, bg_image, prompt, blending, bg_source, (7, 1.0, 0.4)) + + +def release(): + pass diff --git a/3-bmab/sd_bmab/external/iclight/briarmbg.py b/3-bmab/sd_bmab/external/iclight/briarmbg.py new file mode 100644 index 0000000000000000000000000000000000000000..27b1b01a3c14feb546d019d63179bd3fed7036cc --- /dev/null +++ b/3-bmab/sd_bmab/external/iclight/briarmbg.py @@ -0,0 +1,467 @@ +''' +THIS CODE FROM https://github.com/lllyasviel/IC-Light +License : Apache 2.0 +''' + +# RMBG1.4 (diffusers implementation) +# Found on huggingface space of several projects +# Not sure which project is the source of this file + +import torch +import torch.nn as nn +import torch.nn.functional as F +from huggingface_hub import PyTorchModelHubMixin + + +class REBNCONV(nn.Module): + def __init__(self, in_ch=3, out_ch=3, dirate=1, stride=1): + super(REBNCONV, self).__init__() + + self.conv_s1 = nn.Conv2d( + in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate, stride=stride + ) + self.bn_s1 = nn.BatchNorm2d(out_ch) + self.relu_s1 = nn.ReLU(inplace=True) + + def forward(self, x): + hx = x + xout = self.relu_s1(self.bn_s1(self.conv_s1(hx))) + + return xout + + +def _upsample_like(src, tar): + src = F.interpolate(src, size=tar.shape[2:], mode="bilinear") + return src + + +### RSU-7 ### +class RSU7(nn.Module): + def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512): + super(RSU7, self).__init__() + + self.in_ch = in_ch + self.mid_ch = mid_ch + self.out_ch = out_ch + + self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) ## 1 -> 1/2 + + self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) + self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) + self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) + self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1) + self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1) + self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1) + + self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2) + + self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) + + def forward(self, x): + b, c, h, w = x.shape + + hx = x + hxin = self.rebnconvin(hx) + + hx1 = self.rebnconv1(hxin) + hx = self.pool1(hx1) + + hx2 = self.rebnconv2(hx) + hx = self.pool2(hx2) + + hx3 = self.rebnconv3(hx) + hx = self.pool3(hx3) + + hx4 = self.rebnconv4(hx) + hx = self.pool4(hx4) + + hx5 = self.rebnconv5(hx) + hx = self.pool5(hx5) + + hx6 = self.rebnconv6(hx) + + hx7 = self.rebnconv7(hx6) + + hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1)) + hx6dup = _upsample_like(hx6d, hx5) + + hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1)) + hx5dup = _upsample_like(hx5d, hx4) + + hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1)) + hx4dup = _upsample_like(hx4d, hx3) + + hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1)) + hx3dup = _upsample_like(hx3d, hx2) + + hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1)) + hx2dup = _upsample_like(hx2d, hx1) + + hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1)) + + return hx1d + hxin + + +### RSU-6 ### +class RSU6(nn.Module): + def __init__(self, in_ch=3, mid_ch=12, out_ch=3): + super(RSU6, self).__init__() + + self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) + + self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) + self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) + self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) + self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1) + self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1) + + self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2) + + self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) + + def forward(self, x): + hx = x + + hxin = self.rebnconvin(hx) + + hx1 = self.rebnconv1(hxin) + hx = self.pool1(hx1) + + hx2 = self.rebnconv2(hx) + hx = self.pool2(hx2) + + hx3 = self.rebnconv3(hx) + hx = self.pool3(hx3) + + hx4 = self.rebnconv4(hx) + hx = self.pool4(hx4) + + hx5 = self.rebnconv5(hx) + + hx6 = self.rebnconv6(hx5) + + hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1)) + hx5dup = _upsample_like(hx5d, hx4) + + hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1)) + hx4dup = _upsample_like(hx4d, hx3) + + hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1)) + hx3dup = _upsample_like(hx3d, hx2) + + hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1)) + hx2dup = _upsample_like(hx2d, hx1) + + hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1)) + + return hx1d + hxin + + +### RSU-5 ### +class RSU5(nn.Module): + def __init__(self, in_ch=3, mid_ch=12, out_ch=3): + super(RSU5, self).__init__() + + self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) + + self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) + self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) + self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) + self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1) + + self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2) + + self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) + + def forward(self, x): + hx = x + + hxin = self.rebnconvin(hx) + + hx1 = self.rebnconv1(hxin) + hx = self.pool1(hx1) + + hx2 = self.rebnconv2(hx) + hx = self.pool2(hx2) + + hx3 = self.rebnconv3(hx) + hx = self.pool3(hx3) + + hx4 = self.rebnconv4(hx) + + hx5 = self.rebnconv5(hx4) + + hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1)) + hx4dup = _upsample_like(hx4d, hx3) + + hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1)) + hx3dup = _upsample_like(hx3d, hx2) + + hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1)) + hx2dup = _upsample_like(hx2d, hx1) + + hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1)) + + return hx1d + hxin + + +### RSU-4 ### +class RSU4(nn.Module): + def __init__(self, in_ch=3, mid_ch=12, out_ch=3): + super(RSU4, self).__init__() + + self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) + + self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) + self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) + self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) + + self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2) + + self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) + self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) + + def forward(self, x): + hx = x + + hxin = self.rebnconvin(hx) + + hx1 = self.rebnconv1(hxin) + hx = self.pool1(hx1) + + hx2 = self.rebnconv2(hx) + hx = self.pool2(hx2) + + hx3 = self.rebnconv3(hx) + + hx4 = self.rebnconv4(hx3) + + hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1)) + hx3dup = _upsample_like(hx3d, hx2) + + hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1)) + hx2dup = _upsample_like(hx2d, hx1) + + hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1)) + + return hx1d + hxin + + +### RSU-4F ### +class RSU4F(nn.Module): + def __init__(self, in_ch=3, mid_ch=12, out_ch=3): + super(RSU4F, self).__init__() + + self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) + + self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) + self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2) + self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4) + + self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8) + + self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4) + self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2) + self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) + + def forward(self, x): + hx = x + + hxin = self.rebnconvin(hx) + + hx1 = self.rebnconv1(hxin) + hx2 = self.rebnconv2(hx1) + hx3 = self.rebnconv3(hx2) + + hx4 = self.rebnconv4(hx3) + + hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1)) + hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1)) + hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1)) + + return hx1d + hxin + + +class myrebnconv(nn.Module): + def __init__( + self, + in_ch=3, + out_ch=1, + kernel_size=3, + stride=1, + padding=1, + dilation=1, + groups=1, + ): + super(myrebnconv, self).__init__() + + self.conv = nn.Conv2d( + in_ch, + out_ch, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + ) + self.bn = nn.BatchNorm2d(out_ch) + self.rl = nn.ReLU(inplace=True) + + def forward(self, x): + return self.rl(self.bn(self.conv(x))) + + +class BriaRMBG(nn.Module, PyTorchModelHubMixin): + def __init__(self, config: dict = {"in_ch": 3, "out_ch": 1}): + super(BriaRMBG, self).__init__() + in_ch = config["in_ch"] + out_ch = config["out_ch"] + self.conv_in = nn.Conv2d(in_ch, 64, 3, stride=2, padding=1) + self.pool_in = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.stage1 = RSU7(64, 32, 64) + self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.stage2 = RSU6(64, 32, 128) + self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.stage3 = RSU5(128, 64, 256) + self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.stage4 = RSU4(256, 128, 512) + self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.stage5 = RSU4F(512, 256, 512) + self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.stage6 = RSU4F(512, 256, 512) + + # decoder + self.stage5d = RSU4F(1024, 256, 512) + self.stage4d = RSU4(1024, 128, 256) + self.stage3d = RSU5(512, 64, 128) + self.stage2d = RSU6(256, 32, 64) + self.stage1d = RSU7(128, 16, 64) + + self.side1 = nn.Conv2d(64, out_ch, 3, padding=1) + self.side2 = nn.Conv2d(64, out_ch, 3, padding=1) + self.side3 = nn.Conv2d(128, out_ch, 3, padding=1) + self.side4 = nn.Conv2d(256, out_ch, 3, padding=1) + self.side5 = nn.Conv2d(512, out_ch, 3, padding=1) + self.side6 = nn.Conv2d(512, out_ch, 3, padding=1) + + # self.outconv = nn.Conv2d(6*out_ch,out_ch,1) + + def forward(self, x): + hx = x + + hxin = self.conv_in(hx) + # hx = self.pool_in(hxin) + + # stage 1 + hx1 = self.stage1(hxin) + hx = self.pool12(hx1) + + # stage 2 + hx2 = self.stage2(hx) + hx = self.pool23(hx2) + + # stage 3 + hx3 = self.stage3(hx) + hx = self.pool34(hx3) + + # stage 4 + hx4 = self.stage4(hx) + hx = self.pool45(hx4) + + # stage 5 + hx5 = self.stage5(hx) + hx = self.pool56(hx5) + + # stage 6 + hx6 = self.stage6(hx) + hx6up = _upsample_like(hx6, hx5) + + # -------------------- decoder -------------------- + hx5d = self.stage5d(torch.cat((hx6up, hx5), 1)) + hx5dup = _upsample_like(hx5d, hx4) + + hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1)) + hx4dup = _upsample_like(hx4d, hx3) + + hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1)) + hx3dup = _upsample_like(hx3d, hx2) + + hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1)) + hx2dup = _upsample_like(hx2d, hx1) + + hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1)) + + # side output + d1 = self.side1(hx1d) + d1 = _upsample_like(d1, x) + + d2 = self.side2(hx2d) + d2 = _upsample_like(d2, x) + + d3 = self.side3(hx3d) + d3 = _upsample_like(d3, x) + + d4 = self.side4(hx4d) + d4 = _upsample_like(d4, x) + + d5 = self.side5(hx5d) + d5 = _upsample_like(d5, x) + + d6 = self.side6(hx6) + d6 = _upsample_like(d6, x) + + return [ + F.sigmoid(d1), + F.sigmoid(d2), + F.sigmoid(d3), + F.sigmoid(d4), + F.sigmoid(d5), + F.sigmoid(d6), + ], [hx1d, hx2d, hx3d, hx4d, hx5d, hx6] diff --git a/3-bmab/sd_bmab/external/iclight/iclightbg.py b/3-bmab/sd_bmab/external/iclight/iclightbg.py new file mode 100644 index 0000000000000000000000000000000000000000..27135ac7b3aa9884e0e184ad194bf2d906858ece --- /dev/null +++ b/3-bmab/sd_bmab/external/iclight/iclightbg.py @@ -0,0 +1,384 @@ +''' +THIS CODE FROM https://github.com/lllyasviel/IC-Light +License : Apache 2.0 +''' + +import gc +import os +import math +import numpy as np +import torch +import safetensors.torch as sf + +from PIL import Image +from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline +from diffusers import AutoencoderKL, UNet2DConditionModel, DDIMScheduler, EulerAncestralDiscreteScheduler, DPMSolverMultistepScheduler +from diffusers.models.attention_processor import AttnProcessor2_0 +from transformers import CLIPTextModel, CLIPTokenizer +from sd_bmab.external.iclight.briarmbg import BriaRMBG +from enum import Enum +from torch.hub import download_url_to_file + + +# 'stablediffusionapi/realistic-vision-v51' +# 'runwayml/stable-diffusion-v1-5' +sd15_name = 'stablediffusionapi/realistic-vision-v51' +tokenizer = CLIPTokenizer.from_pretrained(sd15_name, subfolder="tokenizer") +text_encoder = CLIPTextModel.from_pretrained(sd15_name, subfolder="text_encoder") +vae = AutoencoderKL.from_pretrained(sd15_name, subfolder="vae") +unet = UNet2DConditionModel.from_pretrained(sd15_name, subfolder="unet") +rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4") + +# Change UNet + +with torch.no_grad(): + new_conv_in = torch.nn.Conv2d(12, unet.conv_in.out_channels, unet.conv_in.kernel_size, unet.conv_in.stride, unet.conv_in.padding) + new_conv_in.weight.zero_() + new_conv_in.weight[:, :4, :, :].copy_(unet.conv_in.weight) + new_conv_in.bias = unet.conv_in.bias + unet.conv_in = new_conv_in + +unet_original_forward = unet.forward + + +def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs): + c_concat = kwargs['cross_attention_kwargs']['concat_conds'].to(sample) + c_concat = torch.cat([c_concat] * (sample.shape[0] // c_concat.shape[0]), dim=0) + new_sample = torch.cat([sample, c_concat], dim=1) + kwargs['cross_attention_kwargs'] = {} + return unet_original_forward(new_sample, timestep, encoder_hidden_states, **kwargs) + + +unet.forward = hooked_unet_forward + +# Load + +model_path = './models/iclight_sd15_fbc.safetensors' + +if not os.path.exists(model_path): + download_url_to_file(url='https://huggingface.co/lllyasviel/ic-light/resolve/main/iclight_sd15_fbc.safetensors', dst=model_path) + +sd_offset = sf.load_file(model_path) +sd_origin = unet.state_dict() +keys = sd_origin.keys() +sd_merged = {k: sd_origin[k] + sd_offset[k] for k in sd_origin.keys()} +unet.load_state_dict(sd_merged, strict=True) +del sd_offset, sd_origin, sd_merged, keys + +# Device + +device = torch.device('cuda') +text_encoder = text_encoder.to(device=device, dtype=torch.float16) +vae = vae.to(device=device, dtype=torch.bfloat16) +unet = unet.to(device=device, dtype=torch.float16) +rmbg = rmbg.to(device=device, dtype=torch.float32) + +# SDP + +unet.set_attn_processor(AttnProcessor2_0()) +vae.set_attn_processor(AttnProcessor2_0()) + +# Samplers + +ddim_scheduler = DDIMScheduler( + num_train_timesteps=1000, + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + steps_offset=1, +) + +euler_a_scheduler = EulerAncestralDiscreteScheduler( + num_train_timesteps=1000, + beta_start=0.00085, + beta_end=0.012, + steps_offset=1 +) + +dpmpp_2m_sde_karras_scheduler = DPMSolverMultistepScheduler( + num_train_timesteps=1000, + beta_start=0.00085, + beta_end=0.012, + algorithm_type="sde-dpmsolver++", + use_karras_sigmas=True, + steps_offset=1 +) + +# Pipelines + +t2i_pipe = StableDiffusionPipeline( + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + unet=unet, + scheduler=dpmpp_2m_sde_karras_scheduler, + safety_checker=None, + requires_safety_checker=False, + feature_extractor=None, + image_encoder=None +) + +i2i_pipe = StableDiffusionImg2ImgPipeline( + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + unet=unet, + scheduler=dpmpp_2m_sde_karras_scheduler, + safety_checker=None, + requires_safety_checker=False, + feature_extractor=None, + image_encoder=None +) + + +@torch.inference_mode() +def encode_prompt_inner(txt: str): + max_length = tokenizer.model_max_length + chunk_length = tokenizer.model_max_length - 2 + id_start = tokenizer.bos_token_id + id_end = tokenizer.eos_token_id + id_pad = id_end + + def pad(x, p, i): + return x[:i] if len(x) >= i else x + [p] * (i - len(x)) + + tokens = tokenizer(txt, truncation=False, add_special_tokens=False)["input_ids"] + chunks = [[id_start] + tokens[i: i + chunk_length] + [id_end] for i in range(0, len(tokens), chunk_length)] + chunks = [pad(ck, id_pad, max_length) for ck in chunks] + + token_ids = torch.tensor(chunks).to(device=device, dtype=torch.int64) + conds = text_encoder(token_ids).last_hidden_state + + return conds + + +@torch.inference_mode() +def encode_prompt_pair(positive_prompt, negative_prompt): + c = encode_prompt_inner(positive_prompt) + uc = encode_prompt_inner(negative_prompt) + + c_len = float(len(c)) + uc_len = float(len(uc)) + max_count = max(c_len, uc_len) + c_repeat = int(math.ceil(max_count / c_len)) + uc_repeat = int(math.ceil(max_count / uc_len)) + max_chunk = max(len(c), len(uc)) + + c = torch.cat([c] * c_repeat, dim=0)[:max_chunk] + uc = torch.cat([uc] * uc_repeat, dim=0)[:max_chunk] + + c = torch.cat([p[None, ...] for p in c], dim=1) + uc = torch.cat([p[None, ...] for p in uc], dim=1) + + return c, uc + + +@torch.inference_mode() +def pytorch2numpy(imgs, quant=True): + results = [] + for x in imgs: + y = x.movedim(0, -1) + + if quant: + y = y * 127.5 + 127.5 + y = y.detach().float().cpu().numpy().clip(0, 255).astype(np.uint8) + else: + y = y * 0.5 + 0.5 + y = y.detach().float().cpu().numpy().clip(0, 1).astype(np.float32) + + results.append(y) + return results + + +@torch.inference_mode() +def numpy2pytorch(imgs): + h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0 # so that 127 must be strictly 0.0 + h = h.movedim(-1, 1) + return h + + +def resize_and_center_crop(image, target_width, target_height): + pil_image = Image.fromarray(image) + original_width, original_height = pil_image.size + scale_factor = max(target_width / original_width, target_height / original_height) + resized_width = int(round(original_width * scale_factor)) + resized_height = int(round(original_height * scale_factor)) + resized_image = pil_image.resize((resized_width, resized_height), Image.LANCZOS) + left = (resized_width - target_width) / 2 + top = (resized_height - target_height) / 2 + right = (resized_width + target_width) / 2 + bottom = (resized_height + target_height) / 2 + cropped_image = resized_image.crop((left, top, right, bottom)) + return np.array(cropped_image) + + +def resize_without_crop(image, target_width, target_height): + pil_image = Image.fromarray(image) + resized_image = pil_image.resize((target_width, target_height), Image.LANCZOS) + return np.array(resized_image) + + +@torch.inference_mode() +def run_rmbg(img, sigma=0.0): + H, W, C = img.shape + assert C == 3 + k = (256.0 / float(H * W)) ** 0.5 + feed = resize_without_crop(img, int(64 * round(W * k)), int(64 * round(H * k))) + feed = numpy2pytorch([feed]).to(device=device, dtype=torch.float32) + alpha = rmbg(feed)[0][0] + alpha = torch.nn.functional.interpolate(alpha, size=(H, W), mode="bilinear") + alpha = alpha.movedim(1, -1)[0] + alpha = alpha.detach().float().cpu().numpy().clip(0, 1) + result = 127 + (img.astype(np.float32) - 127 + sigma) * alpha + return result.clip(0, 255).astype(np.uint8), alpha + + +@torch.inference_mode() +def process(input_fg, input_bg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, bg_source): + bg_source = BGSource(bg_source) + + if bg_source == BGSource.UPLOAD: + pass + elif bg_source == BGSource.UPLOAD_FLIP: + input_bg = np.fliplr(input_bg) + elif bg_source == BGSource.GREY: + input_bg = np.zeros(shape=(image_height, image_width, 3), dtype=np.uint8) + 64 + elif bg_source == BGSource.LEFT: + gradient = np.linspace(224, 32, image_width) + image = np.tile(gradient, (image_height, 1)) + input_bg = np.stack((image,) * 3, axis=-1).astype(np.uint8) + elif bg_source == BGSource.RIGHT: + gradient = np.linspace(32, 224, image_width) + image = np.tile(gradient, (image_height, 1)) + input_bg = np.stack((image,) * 3, axis=-1).astype(np.uint8) + elif bg_source == BGSource.TOP: + gradient = np.linspace(224, 32, image_height)[:, None] + image = np.tile(gradient, (1, image_width)) + input_bg = np.stack((image,) * 3, axis=-1).astype(np.uint8) + elif bg_source == BGSource.BOTTOM: + gradient = np.linspace(32, 224, image_height)[:, None] + image = np.tile(gradient, (1, image_width)) + input_bg = np.stack((image,) * 3, axis=-1).astype(np.uint8) + else: + raise 'Wrong background source!' + + rng = torch.Generator(device=device).manual_seed(seed) + + fg = resize_and_center_crop(input_fg, image_width, image_height) + bg = resize_and_center_crop(input_bg, image_width, image_height) + concat_conds = numpy2pytorch([fg, bg]).to(device=vae.device, dtype=vae.dtype) + concat_conds = vae.encode(concat_conds).latent_dist.mode() * vae.config.scaling_factor + concat_conds = torch.cat([c[None, ...] for c in concat_conds], dim=1) + + conds, unconds = encode_prompt_pair(positive_prompt=prompt + ', ' + a_prompt, negative_prompt=n_prompt) + + latents = t2i_pipe( + prompt_embeds=conds, + negative_prompt_embeds=unconds, + width=image_width, + height=image_height, + num_inference_steps=steps, + num_images_per_prompt=num_samples, + generator=rng, + output_type='latent', + guidance_scale=cfg, + cross_attention_kwargs={'concat_conds': concat_conds}, + ).images.to(vae.dtype) / vae.config.scaling_factor + + pixels = vae.decode(latents).sample + pixels = pytorch2numpy(pixels) + + pixels = [resize_without_crop( + image=p, + target_width=int(round(image_width * highres_scale / 64.0) * 64), + target_height=int(round(image_height * highres_scale / 64.0) * 64)) + for p in pixels] + + pixels = numpy2pytorch(pixels).to(device=vae.device, dtype=vae.dtype) + latents = vae.encode(pixels).latent_dist.mode() * vae.config.scaling_factor + latents = latents.to(device=unet.device, dtype=unet.dtype) + + image_height, image_width = latents.shape[2] * 8, latents.shape[3] * 8 + fg = resize_and_center_crop(input_fg, image_width, image_height) + bg = resize_and_center_crop(input_bg, image_width, image_height) + concat_conds = numpy2pytorch([fg, bg]).to(device=vae.device, dtype=vae.dtype) + concat_conds = vae.encode(concat_conds).latent_dist.mode() * vae.config.scaling_factor + concat_conds = torch.cat([c[None, ...] for c in concat_conds], dim=1) + + latents = i2i_pipe( + image=latents, + strength=highres_denoise, + prompt_embeds=conds, + negative_prompt_embeds=unconds, + width=image_width, + height=image_height, + num_inference_steps=int(round(steps / highres_denoise)), + num_images_per_prompt=num_samples, + generator=rng, + output_type='latent', + guidance_scale=cfg, + cross_attention_kwargs={'concat_conds': concat_conds}, + ).images.to(vae.dtype) / vae.config.scaling_factor + pixels = vae.decode(latents).sample + pixels = pytorch2numpy(pixels, quant=False) + results = [(x * 255.0).clip(0, 255).astype(np.uint8) for x in pixels] + result = Image.fromarray(results[0].astype('uint8'), 'RGB') + return result + + +@torch.inference_mode() +def process_relight(input_fg, input_bg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, bg_source): + input_fg, matting = run_rmbg(input_fg) + result = process(input_fg, input_bg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, bg_source) + return result + + +quick_prompts = [ + 'beautiful woman', + 'handsome man', + 'beautiful woman, cinematic lighting', + 'handsome man, cinematic lighting', + 'beautiful woman, natural lighting', + 'handsome man, natural lighting', + 'beautiful woman, neo punk lighting, cyberpunk', + 'handsome man, neo punk lighting, cyberpunk', +] +quick_prompts = [[x] for x in quick_prompts] + + +class BGSource(Enum): + UPLOAD = "Use Background Image" + UPLOAD_FLIP = "Use Flipped Background Image" + LEFT = "Left Light" + RIGHT = "Right Light" + TOP = "Top Light" + BOTTOM = "Bottom Light" + GREY = "Ambient" + + +preference = { + 'Use Background Image': BGSource.UPLOAD, + 'None': BGSource.LEFT, + 'Left': BGSource.LEFT, + 'Right': BGSource.RIGHT, + 'Top': BGSource.TOP, + 'Bottom': BGSource.BOTTOM, +} + + +def release(): + global vae, unet, rmbg, text_encoder, tokenizer + global ddim_scheduler, euler_a_scheduler, dpmpp_2m_sde_karras_scheduler, t2i_pipe, i2i_pipe + dev = torch.device('cpu') + text_encoder = text_encoder.to(device=dev, dtype=torch.float16) + vae = vae.to(device=dev, dtype=torch.bfloat16) + unet = unet.to(device=dev, dtype=torch.float16) + rmbg = rmbg.to(device=dev, dtype=torch.float32) + del ddim_scheduler, euler_a_scheduler, dpmpp_2m_sde_karras_scheduler, t2i_pipe, i2i_pipe + del vae, unet, rmbg, text_encoder, tokenizer + if torch.cuda.is_available(): + gc.collect() + torch.cuda.empty_cache() + torch.cuda.ipc_collect() diff --git a/3-bmab/sd_bmab/external/iclight/iclightnm.py b/3-bmab/sd_bmab/external/iclight/iclightnm.py new file mode 100644 index 0000000000000000000000000000000000000000..74267fc940d75a7a9db70acfae794073fbc60840 --- /dev/null +++ b/3-bmab/sd_bmab/external/iclight/iclightnm.py @@ -0,0 +1,381 @@ +''' +THIS CODE FROM https://github.com/lllyasviel/IC-Light +License : Apache 2.0 +''' + +import gc +import os +import math +import numpy as np +import torch +import safetensors.torch as sf + +from PIL import Image +from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline +from diffusers import AutoencoderKL, UNet2DConditionModel, DDIMScheduler, EulerAncestralDiscreteScheduler, DPMSolverMultistepScheduler +from diffusers.models.attention_processor import AttnProcessor2_0 +from transformers import CLIPTextModel, CLIPTokenizer +from sd_bmab.external.iclight.briarmbg import BriaRMBG +from enum import Enum +from torch.hub import download_url_to_file + + +# 'stablediffusionapi/realistic-vision-v51' +# 'runwayml/stable-diffusion-v1-5' +sd15_name = 'stablediffusionapi/realistic-vision-v51' +tokenizer = CLIPTokenizer.from_pretrained(sd15_name, subfolder="tokenizer") +text_encoder = CLIPTextModel.from_pretrained(sd15_name, subfolder="text_encoder") +vae = AutoencoderKL.from_pretrained(sd15_name, subfolder="vae") +unet = UNet2DConditionModel.from_pretrained(sd15_name, subfolder="unet") +rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4") + +# Change UNet + +with torch.no_grad(): + new_conv_in = torch.nn.Conv2d(8, unet.conv_in.out_channels, unet.conv_in.kernel_size, unet.conv_in.stride, unet.conv_in.padding) + new_conv_in.weight.zero_() + new_conv_in.weight[:, :4, :, :].copy_(unet.conv_in.weight) + new_conv_in.bias = unet.conv_in.bias + unet.conv_in = new_conv_in + +unet_original_forward = unet.forward + + +def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs): + c_concat = kwargs['cross_attention_kwargs']['concat_conds'].to(sample) + c_concat = torch.cat([c_concat] * (sample.shape[0] // c_concat.shape[0]), dim=0) + new_sample = torch.cat([sample, c_concat], dim=1) + kwargs['cross_attention_kwargs'] = {} + return unet_original_forward(new_sample, timestep, encoder_hidden_states, **kwargs) + + +unet.forward = hooked_unet_forward + +# Load + +model_path = './models/iclight_sd15_fc.safetensors' + +if not os.path.exists(model_path): + download_url_to_file(url='https://huggingface.co/lllyasviel/ic-light/resolve/main/iclight_sd15_fc.safetensors', dst=model_path) + +sd_offset = sf.load_file(model_path) +sd_origin = unet.state_dict() +keys = sd_origin.keys() +sd_merged = {k: sd_origin[k] + sd_offset[k] for k in sd_origin.keys()} +unet.load_state_dict(sd_merged, strict=True) +del sd_offset, sd_origin, sd_merged, keys + +# Device + +device = torch.device('cuda') +text_encoder = text_encoder.to(device=device, dtype=torch.float16) +vae = vae.to(device=device, dtype=torch.bfloat16) +unet = unet.to(device=device, dtype=torch.float16) +rmbg = rmbg.to(device=device, dtype=torch.float32) + +# SDP + +unet.set_attn_processor(AttnProcessor2_0()) +vae.set_attn_processor(AttnProcessor2_0()) + +# Samplers + +ddim_scheduler = DDIMScheduler( + num_train_timesteps=1000, + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + clip_sample=False, + set_alpha_to_one=False, + steps_offset=1, +) + +euler_a_scheduler = EulerAncestralDiscreteScheduler( + num_train_timesteps=1000, + beta_start=0.00085, + beta_end=0.012, + steps_offset=1 +) + +dpmpp_2m_sde_karras_scheduler = DPMSolverMultistepScheduler( + num_train_timesteps=1000, + beta_start=0.00085, + beta_end=0.012, + algorithm_type="sde-dpmsolver++", + use_karras_sigmas=True, + steps_offset=1 +) + +# Pipelines + +t2i_pipe = StableDiffusionPipeline( + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + unet=unet, + scheduler=dpmpp_2m_sde_karras_scheduler, + safety_checker=None, + requires_safety_checker=False, + feature_extractor=None, + image_encoder=None +) + +i2i_pipe = StableDiffusionImg2ImgPipeline( + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + unet=unet, + scheduler=dpmpp_2m_sde_karras_scheduler, + safety_checker=None, + requires_safety_checker=False, + feature_extractor=None, + image_encoder=None +) + + +@torch.inference_mode() +def encode_prompt_inner(txt: str): + max_length = tokenizer.model_max_length + chunk_length = tokenizer.model_max_length - 2 + id_start = tokenizer.bos_token_id + id_end = tokenizer.eos_token_id + id_pad = id_end + + def pad(x, p, i): + return x[:i] if len(x) >= i else x + [p] * (i - len(x)) + + tokens = tokenizer(txt, truncation=False, add_special_tokens=False)["input_ids"] + chunks = [[id_start] + tokens[i: i + chunk_length] + [id_end] for i in range(0, len(tokens), chunk_length)] + chunks = [pad(ck, id_pad, max_length) for ck in chunks] + + token_ids = torch.tensor(chunks).to(device=device, dtype=torch.int64) + conds = text_encoder(token_ids).last_hidden_state + + return conds + + +@torch.inference_mode() +def encode_prompt_pair(positive_prompt, negative_prompt): + c = encode_prompt_inner(positive_prompt) + uc = encode_prompt_inner(negative_prompt) + + c_len = float(len(c)) + uc_len = float(len(uc)) + max_count = max(c_len, uc_len) + c_repeat = int(math.ceil(max_count / c_len)) + uc_repeat = int(math.ceil(max_count / uc_len)) + max_chunk = max(len(c), len(uc)) + + c = torch.cat([c] * c_repeat, dim=0)[:max_chunk] + uc = torch.cat([uc] * uc_repeat, dim=0)[:max_chunk] + + c = torch.cat([p[None, ...] for p in c], dim=1) + uc = torch.cat([p[None, ...] for p in uc], dim=1) + + return c, uc + + +@torch.inference_mode() +def pytorch2numpy(imgs, quant=True): + results = [] + for x in imgs: + y = x.movedim(0, -1) + + if quant: + y = y * 127.5 + 127.5 + y = y.detach().float().cpu().numpy().clip(0, 255).astype(np.uint8) + else: + y = y * 0.5 + 0.5 + y = y.detach().float().cpu().numpy().clip(0, 1).astype(np.float32) + + results.append(y) + return results + + +@torch.inference_mode() +def numpy2pytorch(imgs): + h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0 # so that 127 must be strictly 0.0 + h = h.movedim(-1, 1) + return h + + +def resize_and_center_crop(image, target_width, target_height): + pil_image = Image.fromarray(image) + original_width, original_height = pil_image.size + scale_factor = max(target_width / original_width, target_height / original_height) + resized_width = int(round(original_width * scale_factor)) + resized_height = int(round(original_height * scale_factor)) + resized_image = pil_image.resize((resized_width, resized_height), Image.LANCZOS) + left = (resized_width - target_width) / 2 + top = (resized_height - target_height) / 2 + right = (resized_width + target_width) / 2 + bottom = (resized_height + target_height) / 2 + cropped_image = resized_image.crop((left, top, right, bottom)) + return np.array(cropped_image) + + +def resize_without_crop(image, target_width, target_height): + pil_image = Image.fromarray(image) + resized_image = pil_image.resize((target_width, target_height), Image.LANCZOS) + return np.array(resized_image) + + +@torch.inference_mode() +def run_rmbg(img, sigma=0.0): + H, W, C = img.shape + assert C == 3 + k = (256.0 / float(H * W)) ** 0.5 + feed = resize_without_crop(img, int(64 * round(W * k)), int(64 * round(H * k))) + feed = numpy2pytorch([feed]).to(device=device, dtype=torch.float32) + alpha = rmbg(feed)[0][0] + alpha = torch.nn.functional.interpolate(alpha, size=(H, W), mode="bilinear") + alpha = alpha.movedim(1, -1)[0] + alpha = alpha.detach().float().cpu().numpy().clip(0, 1) + result = 127 + (img.astype(np.float32) - 127 + sigma) * alpha + return result.clip(0, 255).astype(np.uint8), alpha + + +@torch.inference_mode() +def process(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source): + bg_source = BGSource(bg_source) + input_bg = None + + if bg_source == BGSource.NONE: + pass + elif bg_source == BGSource.LEFT: + gradient = np.linspace(255, 0, image_width) + image = np.tile(gradient, (image_height, 1)) + input_bg = np.stack((image,) * 3, axis=-1).astype(np.uint8) + elif bg_source == BGSource.RIGHT: + gradient = np.linspace(0, 255, image_width) + image = np.tile(gradient, (image_height, 1)) + input_bg = np.stack((image,) * 3, axis=-1).astype(np.uint8) + elif bg_source == BGSource.TOP: + gradient = np.linspace(255, 0, image_height)[:, None] + image = np.tile(gradient, (1, image_width)) + input_bg = np.stack((image,) * 3, axis=-1).astype(np.uint8) + elif bg_source == BGSource.BOTTOM: + gradient = np.linspace(0, 255, image_height)[:, None] + image = np.tile(gradient, (1, image_width)) + input_bg = np.stack((image,) * 3, axis=-1).astype(np.uint8) + else: + raise 'Wrong initial latent!' + + rng = torch.Generator(device=device).manual_seed(int(seed)) + + fg = resize_and_center_crop(input_fg, image_width, image_height) + + concat_conds = numpy2pytorch([fg]).to(device=vae.device, dtype=vae.dtype) + concat_conds = vae.encode(concat_conds).latent_dist.mode() * vae.config.scaling_factor + + conds, unconds = encode_prompt_pair(positive_prompt=prompt + ', ' + a_prompt, negative_prompt=n_prompt) + + if input_bg is None: + latents = t2i_pipe( + prompt_embeds=conds, + negative_prompt_embeds=unconds, + width=image_width, + height=image_height, + num_inference_steps=steps, + num_images_per_prompt=num_samples, + generator=rng, + output_type='latent', + guidance_scale=cfg, + cross_attention_kwargs={'concat_conds': concat_conds}, + ).images.to(vae.dtype) / vae.config.scaling_factor + else: + bg = resize_and_center_crop(input_bg, image_width, image_height) + bg_latent = numpy2pytorch([bg]).to(device=vae.device, dtype=vae.dtype) + bg_latent = vae.encode(bg_latent).latent_dist.mode() * vae.config.scaling_factor + latents = i2i_pipe( + image=bg_latent, + strength=lowres_denoise, + prompt_embeds=conds, + negative_prompt_embeds=unconds, + width=image_width, + height=image_height, + num_inference_steps=int(round(steps / lowres_denoise)), + num_images_per_prompt=num_samples, + generator=rng, + output_type='latent', + guidance_scale=cfg, + cross_attention_kwargs={'concat_conds': concat_conds}, + ).images.to(vae.dtype) / vae.config.scaling_factor + + pixels = vae.decode(latents).sample + pixels = pytorch2numpy(pixels) + pixels = [resize_without_crop( + image=p, + target_width=int(round(image_width * highres_scale / 64.0) * 64), + target_height=int(round(image_height * highres_scale / 64.0) * 64)) + for p in pixels] + + pixels = numpy2pytorch(pixels).to(device=vae.device, dtype=vae.dtype) + latents = vae.encode(pixels).latent_dist.mode() * vae.config.scaling_factor + latents = latents.to(device=unet.device, dtype=unet.dtype) + + image_height, image_width = latents.shape[2] * 8, latents.shape[3] * 8 + + fg = resize_and_center_crop(input_fg, image_width, image_height) + concat_conds = numpy2pytorch([fg]).to(device=vae.device, dtype=vae.dtype) + concat_conds = vae.encode(concat_conds).latent_dist.mode() * vae.config.scaling_factor + + latents = i2i_pipe( + image=latents, + strength=highres_denoise, + prompt_embeds=conds, + negative_prompt_embeds=unconds, + width=image_width, + height=image_height, + num_inference_steps=int(round(steps / highres_denoise)), + num_images_per_prompt=num_samples, + generator=rng, + output_type='latent', + guidance_scale=cfg, + cross_attention_kwargs={'concat_conds': concat_conds}, + ).images.to(vae.dtype) / vae.config.scaling_factor + + pixels = vae.decode(latents).sample + pixels = pytorch2numpy(pixels) + return Image.fromarray(pixels[0].astype('uint8'), 'RGB') + + +@torch.inference_mode() +def process_relight(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source): + input_fg, matting = run_rmbg(input_fg) + pref = preference.get(bg_source, BGSource.NONE) + result = process(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, pref) + return result + + +class BGSource(Enum): + NONE = "None" + LEFT = "Left Light" + RIGHT = "Right Light" + TOP = "Top Light" + BOTTOM = "Bottom Light" + + +preference = { + 'None': BGSource.NONE, + 'Left': BGSource.LEFT, + 'Right': BGSource.RIGHT, + 'Top': BGSource.TOP, + 'Bottom': BGSource.BOTTOM, +} + +def release(): + global vae, unet, rmbg, text_encoder, tokenizer + global ddim_scheduler, euler_a_scheduler, dpmpp_2m_sde_karras_scheduler, t2i_pipe, i2i_pipe + dev = torch.device('cpu') + text_encoder = text_encoder.to(device=dev, dtype=torch.float16) + vae = vae.to(device=dev, dtype=torch.bfloat16) + unet = unet.to(device=dev, dtype=torch.float16) + rmbg = rmbg.to(device=dev, dtype=torch.float32) + del ddim_scheduler, euler_a_scheduler, dpmpp_2m_sde_karras_scheduler, t2i_pipe, i2i_pipe + del vae, unet, rmbg, text_encoder, tokenizer + if torch.cuda.is_available(): + gc.collect() + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + diff --git a/3-bmab/sd_bmab/external/kohyahiresfix/__init__.py b/3-bmab/sd_bmab/external/kohyahiresfix/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3f42b6c392054f02f53fc3e4b65f12490957d5de --- /dev/null +++ b/3-bmab/sd_bmab/external/kohyahiresfix/__init__.py @@ -0,0 +1,173 @@ +### https://gist.github.com/kohya-ss/3f774da220df102548093a7abc8538ed +### https://github.com/wcde/sd-webui-kohya-hiresfix + +from pathlib import Path +from omegaconf import DictConfig, OmegaConf +from modules import scripts, script_callbacks +import gradio as gr +import torch + + +CONFIG_PATH = Path(__file__).parent.resolve() / '../config.yaml' + + +class Scaler(torch.nn.Module): + def __init__(self, scale, block, scaler): + super().__init__() + self.scale = scale + self.block = block + self.scaler = scaler + + def forward(self, x, *args): + x = torch.nn.functional.interpolate(x, scale_factor=self.scale, mode=self.scaler) + return self.block(x, *args) + + +class KohyaHiresFix(object): + def __init__(self): + super().__init__() + if not CONFIG_PATH.exists(): + open(CONFIG_PATH, 'w').close() + self.config: DictConfig = OmegaConf.load(CONFIG_PATH) + self.disable = False + self.step_limit = 0 + + def title(self): + return "Kohya Hires.fix" + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def ui(self, is_img2img): + with gr.Accordion(label='Kohya Hires.fix', open=False): + with gr.Row(): + enable = gr.Checkbox(label='Enable extension', value=False) + with gr.Row(): + s1 = gr.Slider(minimum=0, maximum=0.5, step=0.01, label="Stop at", value=self.config.get('s1', 0.15)) + d1 = gr.Slider(minimum=1, maximum=10, step=1, label="Depth", value=self.config.get('d1', 3)) + with gr.Row(): + s2 = gr.Slider(minimum=0, maximum=0.5, step=0.01, label="Stop at", value=self.config.get('s2', 0.3)) + d2 = gr.Slider(minimum=1, maximum=10, step=1, label="Depth", value=self.config.get('d2', 4)) + with gr.Row(): + scaler = gr.Dropdown(['bicubic', 'bilinear', 'nearest', 'nearest-exact'], label='Layer scaler', + value=self.config.get('scaler', 'bicubic')) + downscale = gr.Slider(minimum=0.1, maximum=1.0, step=0.05, label="Downsampling scale", + value=self.config.get('downscale', 0.5)) + upscale = gr.Slider(minimum=1.0, maximum=4.0, step=0.1, label="Upsampling scale", + value=self.config.get('upscale', 2.0)) + with gr.Row(): + smooth_scaling = gr.Checkbox(label="Smooth scaling", value=self.config.get('smooth_scaling', True)) + early_out = gr.Checkbox(label="Early upsampling", value=self.config.get('early_out', False)) + only_one_pass = gr.Checkbox(label='Disable for additional passes', + value=self.config.get('only_one_pass', True)) + + ui = [enable, only_one_pass, d1, d2, s1, s2, scaler, downscale, upscale, smooth_scaling, early_out] + for elem in ui: + setattr(elem, "do_not_save_to_config", True) + return ui + + def process(self, p, enable, only_one_pass, d1, d2, s1, s2, scaler, downscale, upscale, smooth_scaling, early_out): + self.config = DictConfig({name: var for name, var in locals().items() if name not in ['self', 'p']}) + if not enable or self.disable: + script_callbacks.remove_current_script_callbacks() + return + model = p.sd_model.model.diffusion_model + if s1 > s2: + self.config.s2 = s1 + self.p1 = (s1, d1 - 1) + self.p2 = (s2, d2 - 1) + self.step_limit = 0 + + def denoiser_callback(params: script_callbacks.CFGDenoiserParams): + if params.sampling_step < self.step_limit: return + for s, d in [self.p1, self.p2]: + out_d = d if self.config.early_out else -(d + 1) + if params.sampling_step < params.total_sampling_steps * s: + if not isinstance(model.input_blocks[d], Scaler): + model.input_blocks[d] = Scaler(self.config.downscale, model.input_blocks[d], self.config.scaler) + model.output_blocks[out_d] = Scaler(self.config.upscale, model.output_blocks[out_d], self.config.scaler) + elif self.config.smooth_scaling: + scale_ratio = params.sampling_step / (params.total_sampling_steps * s) + downscale = min((1 - self.config.downscale) * scale_ratio + self.config.downscale, 1.0) + model.input_blocks[d].scale = downscale + model.output_blocks[out_d].scale = self.config.upscale * (self.config.downscale / downscale) + return + elif isinstance(model.input_blocks[d], Scaler) and (self.p1[1] != self.p2[1] or s == self.p2[0]): + model.input_blocks[d] = model.input_blocks[d].block + model.output_blocks[out_d] = model.output_blocks[out_d].block + self.step_limit = params.sampling_step if self.config.only_one_pass else 0 + + script_callbacks.on_cfg_denoiser(denoiser_callback) + + def postprocess(self, p, processed, *args): + for i, b in enumerate(p.sd_model.model.diffusion_model.input_blocks): + if isinstance(b, Scaler): + p.sd_model.model.diffusion_model.input_blocks[i] = b.block + for i, b in enumerate(p.sd_model.model.diffusion_model.output_blocks): + if isinstance(b, Scaler): + p.sd_model.model.diffusion_model.output_blocks[i] = b.block + OmegaConf.save(self.config, CONFIG_PATH) + + def process_batch(self, p, *args, **kwargs): + self.step_limit = 0 + + +class KohyaHiresFixPreprocessor(object): + def __init__(self, p) -> None: + super().__init__() + self.enabled = False + self.s1 = 0.15 + self.d1 = 3 + self.s2 = 0.4 + self.d2 = 4 + self.scaler = 'bicubic' + self.downsampling = 0.5 + self.upsampling = 2.0 + self.smooth_scaling = True + self.early_upsampling = False + self.only_one_pass = True + self.kohyahiresfix = KohyaHiresFix() + self.args = [] + self.sdprocess = p + self.preprocess(p.context) + + def preprocess(self, context): + if context is None: + return self.enabled + + fix_opt = context.args.get('module_config', {}).get('kohyahiresfix', {}) + self.enabled = fix_opt.get('enabled', self.enabled) + self.s1 = fix_opt.get('stop1', self.s1) + self.d1 = fix_opt.get('depth1', self.d1) + self.s2 = fix_opt.get('stop2', self.s2) + self.d2 = fix_opt.get('depth2', self.d2) + self.scaler = fix_opt.get('scaler', self.scaler) + self.downsampling = fix_opt.get('downsampling', self.downsampling) + self.upsampling = fix_opt.get('upsampling', self.upsampling) + self.smooth_scaling = fix_opt.get('smooth_scaling', self.smooth_scaling) + self.early_upsampling = fix_opt.get('early_upsampling', self.early_upsampling) + self.only_one_pass = fix_opt.get('only_one_pass', self.only_one_pass) + self.args = [ + self.enabled, + self.only_one_pass, + self.d1, + self.d2, + self.s1, + self.s2, + self.scaler, + self.downsampling, + self.upsampling, + self.smooth_scaling, + self.early_upsampling + ] + return self.enabled + + def __enter__(self): + if self.enabled: + print('Kohya hires.fix process') + self.kohyahiresfix.process(self.sdprocess, *self.args) + + def __exit__(self, *args, **kwargs): + if self.enabled: + print('Kohya hires.fix postprocess') + self.kohyahiresfix.postprocess(self.sdprocess, None, *self.args) diff --git a/3-bmab/sd_bmab/external/kohyahiresfix/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/external/kohyahiresfix/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3de6ccb60c9e60a92a639b72654c72586e745b76 Binary files /dev/null and b/3-bmab/sd_bmab/external/kohyahiresfix/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/__init__.py b/3-bmab/sd_bmab/external/lama/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..129d886418eaa8805b3964ab7bcbde4ed149f3bf --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/__init__.py @@ -0,0 +1,75 @@ +# https://github.com/advimman/lama +# https://github.com/Mikubill/sd-webui-controlnet + +import os +import cv2 +import yaml +import torch +import numpy as np +from PIL import Image +from omegaconf import OmegaConf +from einops import rearrange + +from modules import devices + +from sd_bmab.external.lama.saicinpainting.training.trainers import load_checkpoint +from sd_bmab import util + + +class LamaInpainting: + + def __init__(self): + self.model = None + self.device = 'cpu' #devices.get_optimal_device() + # self.device = devices.get_optimal_device() + + @staticmethod + def load_image(pilimg, mode='RGB'): + img = np.array(pilimg.convert(mode)) + if img.ndim == 3: + print('transpose') + img = np.transpose(img, (2, 0, 1)) + out_img = img.astype('float32') / 255 + return out_img + + def load_model(self): + config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.yaml') + cfg = yaml.safe_load(open(config_path, 'rt')) + cfg = OmegaConf.create(cfg) + cfg.training_model.predict_only = True + cfg.visualizer.kind = 'noop' + lamapth = util.lazy_loader('ControlNetLama.pth') + self.model = load_checkpoint(cfg, lamapth, strict=False, map_location='cpu') + self.model = self.model.to(self.device) + self.model.eval() + + def unload_model(self): + if self.model is not None: + self.model.cpu() + + def __call__(self, image, mask): + if self.model is None: + self.load_model() + self.model.to(self.device) + + opencv_image = cv2.cvtColor(np.array(image.convert('RGB')), cv2.COLOR_RGB2BGR)[:, :, 0:3] + opencv_mask = cv2.cvtColor(np.array(mask.convert('RGB')), cv2.COLOR_RGB2BGR)[:, :, 0:1] + color = np.ascontiguousarray(opencv_image).astype(np.float32) / 255.0 + mask = np.ascontiguousarray(opencv_mask).astype(np.float32) / 255.0 + + with torch.no_grad(): + color = torch.from_numpy(color).float().to(self.device) + mask = torch.from_numpy(mask).float().to(self.device) + mask = (mask > 0.5).float() + color = color * (1 - mask) + image_feed = torch.cat([color, mask], dim=2) + image_feed = rearrange(image_feed, 'h w c -> 1 c h w') + result = self.model(image_feed)[0] + result = rearrange(result, 'c h w -> h w c') + result = result * mask + color * (1 - mask) + result *= 255.0 + + img = result.detach().cpu().numpy().clip(0, 255).astype(np.uint8) + color_coverted = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(color_coverted) + return pil_image diff --git a/3-bmab/sd_bmab/external/lama/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..735909af1db0824fd6b5a610ec9d6e46c24461bf Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/config.yaml b/3-bmab/sd_bmab/external/lama/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55fd91b5bcacd654e3045a2331e9c186818e6edc --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/config.yaml @@ -0,0 +1,157 @@ +run_title: b18_ffc075_batch8x15 +training_model: + kind: default + visualize_each_iters: 1000 + concat_mask: true + store_discr_outputs_for_vis: true +losses: + l1: + weight_missing: 0 + weight_known: 10 + perceptual: + weight: 0 + adversarial: + kind: r1 + weight: 10 + gp_coef: 0.001 + mask_as_fake_target: true + allow_scale_mask: true + feature_matching: + weight: 100 + resnet_pl: + weight: 30 + weights_path: ${env:TORCH_HOME} + +optimizers: + generator: + kind: adam + lr: 0.001 + discriminator: + kind: adam + lr: 0.0001 +visualizer: + key_order: + - image + - predicted_image + - discr_output_fake + - discr_output_real + - inpainted + rescale_keys: + - discr_output_fake + - discr_output_real + kind: directory + outdir: /group-volume/User-Driven-Content-Generation/r.suvorov/inpainting/experiments/r.suvorov_2021-04-30_14-41-12_train_simple_pix2pix2_gap_sdpl_novgg_large_b18_ffc075_batch8x15/samples +location: + data_root_dir: /group-volume/User-Driven-Content-Generation/datasets/inpainting_data_root_large + out_root_dir: /group-volume/User-Driven-Content-Generation/${env:USER}/inpainting/experiments + tb_dir: /group-volume/User-Driven-Content-Generation/${env:USER}/inpainting/tb_logs +data: + batch_size: 15 + val_batch_size: 2 + num_workers: 3 + train: + indir: ${location.data_root_dir}/train + out_size: 256 + mask_gen_kwargs: + irregular_proba: 1 + irregular_kwargs: + max_angle: 4 + max_len: 200 + max_width: 100 + max_times: 5 + min_times: 1 + box_proba: 1 + box_kwargs: + margin: 10 + bbox_min_size: 30 + bbox_max_size: 150 + max_times: 3 + min_times: 1 + segm_proba: 0 + segm_kwargs: + confidence_threshold: 0.5 + max_object_area: 0.5 + min_mask_area: 0.07 + downsample_levels: 6 + num_variants_per_mask: 1 + rigidness_mode: 1 + max_foreground_coverage: 0.3 + max_foreground_intersection: 0.7 + max_mask_intersection: 0.1 + max_hidden_area: 0.1 + max_scale_change: 0.25 + horizontal_flip: true + max_vertical_shift: 0.2 + position_shuffle: true + transform_variant: distortions + dataloader_kwargs: + batch_size: ${data.batch_size} + shuffle: true + num_workers: ${data.num_workers} + val: + indir: ${location.data_root_dir}/val + img_suffix: .png + dataloader_kwargs: + batch_size: ${data.val_batch_size} + shuffle: false + num_workers: ${data.num_workers} + visual_test: + indir: ${location.data_root_dir}/korean_test + img_suffix: _input.png + pad_out_to_modulo: 32 + dataloader_kwargs: + batch_size: 1 + shuffle: false + num_workers: ${data.num_workers} +generator: + kind: ffc_resnet + input_nc: 4 + output_nc: 3 + ngf: 64 + n_downsampling: 3 + n_blocks: 18 + add_out_act: sigmoid + init_conv_kwargs: + ratio_gin: 0 + ratio_gout: 0 + enable_lfu: false + downsample_conv_kwargs: + ratio_gin: ${generator.init_conv_kwargs.ratio_gout} + ratio_gout: ${generator.downsample_conv_kwargs.ratio_gin} + enable_lfu: false + resnet_conv_kwargs: + ratio_gin: 0.75 + ratio_gout: ${generator.resnet_conv_kwargs.ratio_gin} + enable_lfu: false +discriminator: + kind: pix2pixhd_nlayer + input_nc: 3 + ndf: 64 + n_layers: 4 +evaluator: + kind: default + inpainted_key: inpainted + integral_kind: ssim_fid100_f1 +trainer: + kwargs: + gpus: -1 + accelerator: ddp + max_epochs: 200 + gradient_clip_val: 1 + log_gpu_memory: None + limit_train_batches: 25000 + val_check_interval: ${trainer.kwargs.limit_train_batches} + log_every_n_steps: 1000 + precision: 32 + terminate_on_nan: false + check_val_every_n_epoch: 1 + num_sanity_val_steps: 8 + limit_val_batches: 1000 + replace_sampler_ddp: false + checkpoint_kwargs: + verbose: true + save_top_k: 5 + save_last: true + period: 1 + monitor: val_ssim_fid100_f1_total_mean + mode: max diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/__init__.py b/3-bmab/sd_bmab/external/lama/saicinpainting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b4ce2d0fec77fffbcce7fecc84cc3ee3d9f04e6c Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/__pycache__/utils.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..de203bac1af4e9948b40c6fc54ab47fbf91b0e2a Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/__pycache__/utils.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/__init__.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0490af7df3d0721e8d3ecc0bd48bb6ef8923a78a Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/data/__init__.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/data/masks.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/data/masks.py new file mode 100644 index 0000000000000000000000000000000000000000..dcb473ce3ea795a8b9e57cd77a20d49e617f9cb7 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/data/masks.py @@ -0,0 +1,332 @@ +import math +import random +import hashlib +import logging +from enum import Enum + +import cv2 +import numpy as np + +# from sd_bmab.external.lama.saicinpainting.evaluation.masks.mask import SegmentationMask +from sd_bmab.external.lama.saicinpainting.utils import LinearRamp + +LOGGER = logging.getLogger(__name__) + + +class DrawMethod(Enum): + LINE = 'line' + CIRCLE = 'circle' + SQUARE = 'square' + + +def make_random_irregular_mask(shape, max_angle=4, max_len=60, max_width=20, min_times=0, max_times=10, + draw_method=DrawMethod.LINE): + draw_method = DrawMethod(draw_method) + + height, width = shape + mask = np.zeros((height, width), np.float32) + times = np.random.randint(min_times, max_times + 1) + for i in range(times): + start_x = np.random.randint(width) + start_y = np.random.randint(height) + for j in range(1 + np.random.randint(5)): + angle = 0.01 + np.random.randint(max_angle) + if i % 2 == 0: + angle = 2 * 3.1415926 - angle + length = 10 + np.random.randint(max_len) + brush_w = 5 + np.random.randint(max_width) + end_x = np.clip((start_x + length * np.sin(angle)).astype(np.int32), 0, width) + end_y = np.clip((start_y + length * np.cos(angle)).astype(np.int32), 0, height) + if draw_method == DrawMethod.LINE: + cv2.line(mask, (start_x, start_y), (end_x, end_y), 1.0, brush_w) + elif draw_method == DrawMethod.CIRCLE: + cv2.circle(mask, (start_x, start_y), radius=brush_w, color=1., thickness=-1) + elif draw_method == DrawMethod.SQUARE: + radius = brush_w // 2 + mask[start_y - radius:start_y + radius, start_x - radius:start_x + radius] = 1 + start_x, start_y = end_x, end_y + return mask[None, ...] + + +class RandomIrregularMaskGenerator: + def __init__(self, max_angle=4, max_len=60, max_width=20, min_times=0, max_times=10, ramp_kwargs=None, + draw_method=DrawMethod.LINE): + self.max_angle = max_angle + self.max_len = max_len + self.max_width = max_width + self.min_times = min_times + self.max_times = max_times + self.draw_method = draw_method + self.ramp = LinearRamp(**ramp_kwargs) if ramp_kwargs is not None else None + + def __call__(self, img, iter_i=None, raw_image=None): + coef = self.ramp(iter_i) if (self.ramp is not None) and (iter_i is not None) else 1 + cur_max_len = int(max(1, self.max_len * coef)) + cur_max_width = int(max(1, self.max_width * coef)) + cur_max_times = int(self.min_times + 1 + (self.max_times - self.min_times) * coef) + return make_random_irregular_mask(img.shape[1:], max_angle=self.max_angle, max_len=cur_max_len, + max_width=cur_max_width, min_times=self.min_times, max_times=cur_max_times, + draw_method=self.draw_method) + + +def make_random_rectangle_mask(shape, margin=10, bbox_min_size=30, bbox_max_size=100, min_times=0, max_times=3): + height, width = shape + mask = np.zeros((height, width), np.float32) + bbox_max_size = min(bbox_max_size, height - margin * 2, width - margin * 2) + times = np.random.randint(min_times, max_times + 1) + for i in range(times): + box_width = np.random.randint(bbox_min_size, bbox_max_size) + box_height = np.random.randint(bbox_min_size, bbox_max_size) + start_x = np.random.randint(margin, width - margin - box_width + 1) + start_y = np.random.randint(margin, height - margin - box_height + 1) + mask[start_y:start_y + box_height, start_x:start_x + box_width] = 1 + return mask[None, ...] + + +class RandomRectangleMaskGenerator: + def __init__(self, margin=10, bbox_min_size=30, bbox_max_size=100, min_times=0, max_times=3, ramp_kwargs=None): + self.margin = margin + self.bbox_min_size = bbox_min_size + self.bbox_max_size = bbox_max_size + self.min_times = min_times + self.max_times = max_times + self.ramp = LinearRamp(**ramp_kwargs) if ramp_kwargs is not None else None + + def __call__(self, img, iter_i=None, raw_image=None): + coef = self.ramp(iter_i) if (self.ramp is not None) and (iter_i is not None) else 1 + cur_bbox_max_size = int(self.bbox_min_size + 1 + (self.bbox_max_size - self.bbox_min_size) * coef) + cur_max_times = int(self.min_times + (self.max_times - self.min_times) * coef) + return make_random_rectangle_mask(img.shape[1:], margin=self.margin, bbox_min_size=self.bbox_min_size, + bbox_max_size=cur_bbox_max_size, min_times=self.min_times, + max_times=cur_max_times) + + +class RandomSegmentationMaskGenerator: + def __init__(self, **kwargs): + self.impl = None # will be instantiated in first call (effectively in subprocess) + self.kwargs = kwargs + + def __call__(self, img, iter_i=None, raw_image=None): + if self.impl is None: + self.impl = SegmentationMask(**self.kwargs) + + masks = self.impl.get_masks(np.transpose(img, (1, 2, 0))) + masks = [m for m in masks if len(np.unique(m)) > 1] + return np.random.choice(masks) + + +def make_random_superres_mask(shape, min_step=2, max_step=4, min_width=1, max_width=3): + height, width = shape + mask = np.zeros((height, width), np.float32) + step_x = np.random.randint(min_step, max_step + 1) + width_x = np.random.randint(min_width, min(step_x, max_width + 1)) + offset_x = np.random.randint(0, step_x) + + step_y = np.random.randint(min_step, max_step + 1) + width_y = np.random.randint(min_width, min(step_y, max_width + 1)) + offset_y = np.random.randint(0, step_y) + + for dy in range(width_y): + mask[offset_y + dy::step_y] = 1 + for dx in range(width_x): + mask[:, offset_x + dx::step_x] = 1 + return mask[None, ...] + + +class RandomSuperresMaskGenerator: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __call__(self, img, iter_i=None): + return make_random_superres_mask(img.shape[1:], **self.kwargs) + + +class DumbAreaMaskGenerator: + min_ratio = 0.1 + max_ratio = 0.35 + default_ratio = 0.225 + + def __init__(self, is_training): + #Parameters: + # is_training(bool): If true - random rectangular mask, if false - central square mask + self.is_training = is_training + + def _random_vector(self, dimension): + if self.is_training: + lower_limit = math.sqrt(self.min_ratio) + upper_limit = math.sqrt(self.max_ratio) + mask_side = round((random.random() * (upper_limit - lower_limit) + lower_limit) * dimension) + u = random.randint(0, dimension-mask_side-1) + v = u+mask_side + else: + margin = (math.sqrt(self.default_ratio) / 2) * dimension + u = round(dimension/2 - margin) + v = round(dimension/2 + margin) + return u, v + + def __call__(self, img, iter_i=None, raw_image=None): + c, height, width = img.shape + mask = np.zeros((height, width), np.float32) + x1, x2 = self._random_vector(width) + y1, y2 = self._random_vector(height) + mask[x1:x2, y1:y2] = 1 + return mask[None, ...] + + +class OutpaintingMaskGenerator: + def __init__(self, min_padding_percent:float=0.04, max_padding_percent:int=0.25, left_padding_prob:float=0.5, top_padding_prob:float=0.5, + right_padding_prob:float=0.5, bottom_padding_prob:float=0.5, is_fixed_randomness:bool=False): + """ + is_fixed_randomness - get identical paddings for the same image if args are the same + """ + self.min_padding_percent = min_padding_percent + self.max_padding_percent = max_padding_percent + self.probs = [left_padding_prob, top_padding_prob, right_padding_prob, bottom_padding_prob] + self.is_fixed_randomness = is_fixed_randomness + + assert self.min_padding_percent <= self.max_padding_percent + assert self.max_padding_percent > 0 + assert len([x for x in [self.min_padding_percent, self.max_padding_percent] if (x>=0 and x<=1)]) == 2, f"Padding percentage should be in [0,1]" + assert sum(self.probs) > 0, f"At least one of the padding probs should be greater than 0 - {self.probs}" + assert len([x for x in self.probs if (x >= 0) and (x <= 1)]) == 4, f"At least one of padding probs is not in [0,1] - {self.probs}" + if len([x for x in self.probs if x > 0]) == 1: + LOGGER.warning(f"Only one padding prob is greater than zero - {self.probs}. That means that the outpainting masks will be always on the same side") + + def apply_padding(self, mask, coord): + mask[int(coord[0][0]*self.img_h):int(coord[1][0]*self.img_h), + int(coord[0][1]*self.img_w):int(coord[1][1]*self.img_w)] = 1 + return mask + + def get_padding(self, size): + n1 = int(self.min_padding_percent*size) + n2 = int(self.max_padding_percent*size) + return self.rnd.randint(n1, n2) / size + + @staticmethod + def _img2rs(img): + arr = np.ascontiguousarray(img.astype(np.uint8)) + str_hash = hashlib.sha1(arr).hexdigest() + res = hash(str_hash)%(2**32) + return res + + def __call__(self, img, iter_i=None, raw_image=None): + c, self.img_h, self.img_w = img.shape + mask = np.zeros((self.img_h, self.img_w), np.float32) + at_least_one_mask_applied = False + + if self.is_fixed_randomness: + assert raw_image is not None, f"Cant calculate hash on raw_image=None" + rs = self._img2rs(raw_image) + self.rnd = np.random.RandomState(rs) + else: + self.rnd = np.random + + coords = [[ + (0,0), + (1,self.get_padding(size=self.img_h)) + ], + [ + (0,0), + (self.get_padding(size=self.img_w),1) + ], + [ + (0,1-self.get_padding(size=self.img_h)), + (1,1) + ], + [ + (1-self.get_padding(size=self.img_w),0), + (1,1) + ]] + + for pp, coord in zip(self.probs, coords): + if self.rnd.random() < pp: + at_least_one_mask_applied = True + mask = self.apply_padding(mask=mask, coord=coord) + + if not at_least_one_mask_applied: + idx = self.rnd.choice(range(len(coords)), p=np.array(self.probs)/sum(self.probs)) + mask = self.apply_padding(mask=mask, coord=coords[idx]) + return mask[None, ...] + + +class MixedMaskGenerator: + def __init__(self, irregular_proba=1/3, irregular_kwargs=None, + box_proba=1/3, box_kwargs=None, + segm_proba=1/3, segm_kwargs=None, + squares_proba=0, squares_kwargs=None, + superres_proba=0, superres_kwargs=None, + outpainting_proba=0, outpainting_kwargs=None, + invert_proba=0): + self.probas = [] + self.gens = [] + + if irregular_proba > 0: + self.probas.append(irregular_proba) + if irregular_kwargs is None: + irregular_kwargs = {} + else: + irregular_kwargs = dict(irregular_kwargs) + irregular_kwargs['draw_method'] = DrawMethod.LINE + self.gens.append(RandomIrregularMaskGenerator(**irregular_kwargs)) + + if box_proba > 0: + self.probas.append(box_proba) + if box_kwargs is None: + box_kwargs = {} + self.gens.append(RandomRectangleMaskGenerator(**box_kwargs)) + + if segm_proba > 0: + self.probas.append(segm_proba) + if segm_kwargs is None: + segm_kwargs = {} + self.gens.append(RandomSegmentationMaskGenerator(**segm_kwargs)) + + if squares_proba > 0: + self.probas.append(squares_proba) + if squares_kwargs is None: + squares_kwargs = {} + else: + squares_kwargs = dict(squares_kwargs) + squares_kwargs['draw_method'] = DrawMethod.SQUARE + self.gens.append(RandomIrregularMaskGenerator(**squares_kwargs)) + + if superres_proba > 0: + self.probas.append(superres_proba) + if superres_kwargs is None: + superres_kwargs = {} + self.gens.append(RandomSuperresMaskGenerator(**superres_kwargs)) + + if outpainting_proba > 0: + self.probas.append(outpainting_proba) + if outpainting_kwargs is None: + outpainting_kwargs = {} + self.gens.append(OutpaintingMaskGenerator(**outpainting_kwargs)) + + self.probas = np.array(self.probas, dtype='float32') + self.probas /= self.probas.sum() + self.invert_proba = invert_proba + + def __call__(self, img, iter_i=None, raw_image=None): + kind = np.random.choice(len(self.probas), p=self.probas) + gen = self.gens[kind] + result = gen(img, iter_i=iter_i, raw_image=raw_image) + if self.invert_proba > 0 and random.random() < self.invert_proba: + result = 1 - result + return result + + +def get_mask_generator(kind, kwargs): + if kind is None: + kind = "mixed" + if kwargs is None: + kwargs = {} + + if kind == "mixed": + cl = MixedMaskGenerator + elif kind == "outpainting": + cl = OutpaintingMaskGenerator + elif kind == "dumb": + cl = DumbAreaMaskGenerator + else: + raise NotImplementedError(f"No such generator kind = {kind}") + return cl(**kwargs) diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__init__.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2560c6a3efc8be120e681390844958f2f07660db Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/distance_weighting.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/distance_weighting.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e4ab9ff00eb2159e6b35557d4f8772873e97d41 Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/distance_weighting.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/feature_matching.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/feature_matching.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d19ed432be6846d9c8b8c9ec2596b32667358f24 Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/feature_matching.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/perceptual.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/perceptual.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f76fcd1c70faac71f4d046d4ee279a38c393369c Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/__pycache__/perceptual.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/adversarial.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/adversarial.py new file mode 100644 index 0000000000000000000000000000000000000000..d6db2967ce5074d94ed3b4c51fc743ff2f7831b1 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/adversarial.py @@ -0,0 +1,177 @@ +from typing import Tuple, Dict, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BaseAdversarialLoss: + def pre_generator_step(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + generator: nn.Module, discriminator: nn.Module): + """ + Prepare for generator step + :param real_batch: Tensor, a batch of real samples + :param fake_batch: Tensor, a batch of samples produced by generator + :param generator: + :param discriminator: + :return: None + """ + + def pre_discriminator_step(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + generator: nn.Module, discriminator: nn.Module): + """ + Prepare for discriminator step + :param real_batch: Tensor, a batch of real samples + :param fake_batch: Tensor, a batch of samples produced by generator + :param generator: + :param discriminator: + :return: None + """ + + def generator_loss(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + discr_real_pred: torch.Tensor, discr_fake_pred: torch.Tensor, + mask: Optional[torch.Tensor] = None) \ + -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Calculate generator loss + :param real_batch: Tensor, a batch of real samples + :param fake_batch: Tensor, a batch of samples produced by generator + :param discr_real_pred: Tensor, discriminator output for real_batch + :param discr_fake_pred: Tensor, discriminator output for fake_batch + :param mask: Tensor, actual mask, which was at input of generator when making fake_batch + :return: total generator loss along with some values that might be interesting to log + """ + raise NotImplemented() + + def discriminator_loss(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + discr_real_pred: torch.Tensor, discr_fake_pred: torch.Tensor, + mask: Optional[torch.Tensor] = None) \ + -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Calculate discriminator loss and call .backward() on it + :param real_batch: Tensor, a batch of real samples + :param fake_batch: Tensor, a batch of samples produced by generator + :param discr_real_pred: Tensor, discriminator output for real_batch + :param discr_fake_pred: Tensor, discriminator output for fake_batch + :param mask: Tensor, actual mask, which was at input of generator when making fake_batch + :return: total discriminator loss along with some values that might be interesting to log + """ + raise NotImplemented() + + def interpolate_mask(self, mask, shape): + assert mask is not None + assert self.allow_scale_mask or shape == mask.shape[-2:] + if shape != mask.shape[-2:] and self.allow_scale_mask: + if self.mask_scale_mode == 'maxpool': + mask = F.adaptive_max_pool2d(mask, shape) + else: + mask = F.interpolate(mask, size=shape, mode=self.mask_scale_mode) + return mask + +def make_r1_gp(discr_real_pred, real_batch): + if torch.is_grad_enabled(): + grad_real = torch.autograd.grad(outputs=discr_real_pred.sum(), inputs=real_batch, create_graph=True)[0] + grad_penalty = (grad_real.view(grad_real.shape[0], -1).norm(2, dim=1) ** 2).mean() + else: + grad_penalty = 0 + real_batch.requires_grad = False + + return grad_penalty + +class NonSaturatingWithR1(BaseAdversarialLoss): + def __init__(self, gp_coef=5, weight=1, mask_as_fake_target=False, allow_scale_mask=False, + mask_scale_mode='nearest', extra_mask_weight_for_gen=0, + use_unmasked_for_gen=True, use_unmasked_for_discr=True): + self.gp_coef = gp_coef + self.weight = weight + # use for discr => use for gen; + # otherwise we teach only the discr to pay attention to very small difference + assert use_unmasked_for_gen or (not use_unmasked_for_discr) + # mask as target => use unmasked for discr: + # if we don't care about unmasked regions at all + # then it doesn't matter if the value of mask_as_fake_target is true or false + assert use_unmasked_for_discr or (not mask_as_fake_target) + self.use_unmasked_for_gen = use_unmasked_for_gen + self.use_unmasked_for_discr = use_unmasked_for_discr + self.mask_as_fake_target = mask_as_fake_target + self.allow_scale_mask = allow_scale_mask + self.mask_scale_mode = mask_scale_mode + self.extra_mask_weight_for_gen = extra_mask_weight_for_gen + + def generator_loss(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + discr_real_pred: torch.Tensor, discr_fake_pred: torch.Tensor, + mask=None) \ + -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + fake_loss = F.softplus(-discr_fake_pred) + if (self.mask_as_fake_target and self.extra_mask_weight_for_gen > 0) or \ + not self.use_unmasked_for_gen: # == if masked region should be treated differently + mask = self.interpolate_mask(mask, discr_fake_pred.shape[-2:]) + if not self.use_unmasked_for_gen: + fake_loss = fake_loss * mask + else: + pixel_weights = 1 + mask * self.extra_mask_weight_for_gen + fake_loss = fake_loss * pixel_weights + + return fake_loss.mean() * self.weight, dict() + + def pre_discriminator_step(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + generator: nn.Module, discriminator: nn.Module): + real_batch.requires_grad = True + + def discriminator_loss(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + discr_real_pred: torch.Tensor, discr_fake_pred: torch.Tensor, + mask=None) \ + -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + + real_loss = F.softplus(-discr_real_pred) + grad_penalty = make_r1_gp(discr_real_pred, real_batch) * self.gp_coef + fake_loss = F.softplus(discr_fake_pred) + + if not self.use_unmasked_for_discr or self.mask_as_fake_target: + # == if masked region should be treated differently + mask = self.interpolate_mask(mask, discr_fake_pred.shape[-2:]) + # use_unmasked_for_discr=False only makes sense for fakes; + # for reals there is no difference beetween two regions + fake_loss = fake_loss * mask + if self.mask_as_fake_target: + fake_loss = fake_loss + (1 - mask) * F.softplus(-discr_fake_pred) + + sum_discr_loss = real_loss + grad_penalty + fake_loss + metrics = dict(discr_real_out=discr_real_pred.mean(), + discr_fake_out=discr_fake_pred.mean(), + discr_real_gp=grad_penalty) + return sum_discr_loss.mean(), metrics + +class BCELoss(BaseAdversarialLoss): + def __init__(self, weight): + self.weight = weight + self.bce_loss = nn.BCEWithLogitsLoss() + + def generator_loss(self, discr_fake_pred: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + real_mask_gt = torch.zeros(discr_fake_pred.shape).to(discr_fake_pred.device) + fake_loss = self.bce_loss(discr_fake_pred, real_mask_gt) * self.weight + return fake_loss, dict() + + def pre_discriminator_step(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + generator: nn.Module, discriminator: nn.Module): + real_batch.requires_grad = True + + def discriminator_loss(self, + mask: torch.Tensor, + discr_real_pred: torch.Tensor, + discr_fake_pred: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + + real_mask_gt = torch.zeros(discr_real_pred.shape).to(discr_real_pred.device) + sum_discr_loss = (self.bce_loss(discr_real_pred, real_mask_gt) + self.bce_loss(discr_fake_pred, mask)) / 2 + metrics = dict(discr_real_out=discr_real_pred.mean(), + discr_fake_out=discr_fake_pred.mean(), + discr_real_gp=0) + return sum_discr_loss, metrics + + +def make_discrim_loss(kind, **kwargs): + if kind == 'r1': + return NonSaturatingWithR1(**kwargs) + elif kind == 'bce': + return BCELoss(**kwargs) + raise ValueError(f'Unknown adversarial loss kind {kind}') diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/constants.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..ae3e5e151342232be8e2c2a77fe6fd5798dc2a8c --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/constants.py @@ -0,0 +1,152 @@ +weights = {"ade20k": + [6.34517766497462, + 9.328358208955224, + 11.389521640091116, + 16.10305958132045, + 20.833333333333332, + 22.22222222222222, + 25.125628140703515, + 43.29004329004329, + 50.5050505050505, + 54.6448087431694, + 55.24861878453038, + 60.24096385542168, + 62.5, + 66.2251655629139, + 84.74576271186442, + 90.90909090909092, + 91.74311926605505, + 96.15384615384616, + 96.15384615384616, + 97.08737864077669, + 102.04081632653062, + 135.13513513513513, + 149.2537313432836, + 153.84615384615384, + 163.93442622950818, + 166.66666666666666, + 188.67924528301887, + 192.30769230769232, + 217.3913043478261, + 227.27272727272725, + 227.27272727272725, + 227.27272727272725, + 303.03030303030306, + 322.5806451612903, + 333.3333333333333, + 370.3703703703703, + 384.61538461538464, + 416.6666666666667, + 416.6666666666667, + 434.7826086956522, + 434.7826086956522, + 454.5454545454545, + 454.5454545454545, + 500.0, + 526.3157894736842, + 526.3157894736842, + 555.5555555555555, + 555.5555555555555, + 555.5555555555555, + 555.5555555555555, + 555.5555555555555, + 555.5555555555555, + 555.5555555555555, + 588.2352941176471, + 588.2352941176471, + 588.2352941176471, + 588.2352941176471, + 588.2352941176471, + 666.6666666666666, + 666.6666666666666, + 666.6666666666666, + 666.6666666666666, + 714.2857142857143, + 714.2857142857143, + 714.2857142857143, + 714.2857142857143, + 714.2857142857143, + 769.2307692307693, + 769.2307692307693, + 769.2307692307693, + 833.3333333333334, + 833.3333333333334, + 833.3333333333334, + 833.3333333333334, + 909.090909090909, + 1000.0, + 1111.111111111111, + 1111.111111111111, + 1111.111111111111, + 1111.111111111111, + 1111.111111111111, + 1250.0, + 1250.0, + 1250.0, + 1250.0, + 1250.0, + 1428.5714285714287, + 1428.5714285714287, + 1428.5714285714287, + 1428.5714285714287, + 1428.5714285714287, + 1428.5714285714287, + 1428.5714285714287, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 5000.0, + 5000.0, + 5000.0] +} \ No newline at end of file diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/distance_weighting.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/distance_weighting.py new file mode 100644 index 0000000000000000000000000000000000000000..7b48edfb858d2e97882ba8dd31465842237edfed --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/distance_weighting.py @@ -0,0 +1,126 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +from sd_bmab.external.lama.saicinpainting.training.losses.perceptual import IMAGENET_STD, IMAGENET_MEAN + + +def dummy_distance_weighter(real_img, pred_img, mask): + return mask + + +def get_gauss_kernel(kernel_size, width_factor=1): + coords = torch.stack(torch.meshgrid(torch.arange(kernel_size), + torch.arange(kernel_size)), + dim=0).float() + diff = torch.exp(-((coords - kernel_size // 2) ** 2).sum(0) / kernel_size / width_factor) + diff /= diff.sum() + return diff + + +class BlurMask(nn.Module): + def __init__(self, kernel_size=5, width_factor=1): + super().__init__() + self.filter = nn.Conv2d(1, 1, kernel_size, padding=kernel_size // 2, padding_mode='replicate', bias=False) + self.filter.weight.data.copy_(get_gauss_kernel(kernel_size, width_factor=width_factor)) + + def forward(self, real_img, pred_img, mask): + with torch.no_grad(): + result = self.filter(mask) * mask + return result + + +class EmulatedEDTMask(nn.Module): + def __init__(self, dilate_kernel_size=5, blur_kernel_size=5, width_factor=1): + super().__init__() + self.dilate_filter = nn.Conv2d(1, 1, dilate_kernel_size, padding=dilate_kernel_size// 2, padding_mode='replicate', + bias=False) + self.dilate_filter.weight.data.copy_(torch.ones(1, 1, dilate_kernel_size, dilate_kernel_size, dtype=torch.float)) + self.blur_filter = nn.Conv2d(1, 1, blur_kernel_size, padding=blur_kernel_size // 2, padding_mode='replicate', bias=False) + self.blur_filter.weight.data.copy_(get_gauss_kernel(blur_kernel_size, width_factor=width_factor)) + + def forward(self, real_img, pred_img, mask): + with torch.no_grad(): + known_mask = 1 - mask + dilated_known_mask = (self.dilate_filter(known_mask) > 1).float() + result = self.blur_filter(1 - dilated_known_mask) * mask + return result + + +class PropagatePerceptualSim(nn.Module): + def __init__(self, level=2, max_iters=10, temperature=500, erode_mask_size=3): + super().__init__() + vgg = torchvision.models.vgg19(pretrained=True).features + vgg_avg_pooling = [] + + for weights in vgg.parameters(): + weights.requires_grad = False + + cur_level_i = 0 + for module in vgg.modules(): + if module.__class__.__name__ == 'Sequential': + continue + elif module.__class__.__name__ == 'MaxPool2d': + vgg_avg_pooling.append(nn.AvgPool2d(kernel_size=2, stride=2, padding=0)) + else: + vgg_avg_pooling.append(module) + if module.__class__.__name__ == 'ReLU': + cur_level_i += 1 + if cur_level_i == level: + break + + self.features = nn.Sequential(*vgg_avg_pooling) + + self.max_iters = max_iters + self.temperature = temperature + self.do_erode = erode_mask_size > 0 + if self.do_erode: + self.erode_mask = nn.Conv2d(1, 1, erode_mask_size, padding=erode_mask_size // 2, bias=False) + self.erode_mask.weight.data.fill_(1) + + def forward(self, real_img, pred_img, mask): + with torch.no_grad(): + real_img = (real_img - IMAGENET_MEAN.to(real_img)) / IMAGENET_STD.to(real_img) + real_feats = self.features(real_img) + + vertical_sim = torch.exp(-(real_feats[:, :, 1:] - real_feats[:, :, :-1]).pow(2).sum(1, keepdim=True) + / self.temperature) + horizontal_sim = torch.exp(-(real_feats[:, :, :, 1:] - real_feats[:, :, :, :-1]).pow(2).sum(1, keepdim=True) + / self.temperature) + + mask_scaled = F.interpolate(mask, size=real_feats.shape[-2:], mode='bilinear', align_corners=False) + if self.do_erode: + mask_scaled = (self.erode_mask(mask_scaled) > 1).float() + + cur_knowness = 1 - mask_scaled + + for iter_i in range(self.max_iters): + new_top_knowness = F.pad(cur_knowness[:, :, :-1] * vertical_sim, (0, 0, 1, 0), mode='replicate') + new_bottom_knowness = F.pad(cur_knowness[:, :, 1:] * vertical_sim, (0, 0, 0, 1), mode='replicate') + + new_left_knowness = F.pad(cur_knowness[:, :, :, :-1] * horizontal_sim, (1, 0, 0, 0), mode='replicate') + new_right_knowness = F.pad(cur_knowness[:, :, :, 1:] * horizontal_sim, (0, 1, 0, 0), mode='replicate') + + new_knowness = torch.stack([new_top_knowness, new_bottom_knowness, + new_left_knowness, new_right_knowness], + dim=0).max(0).values + + cur_knowness = torch.max(cur_knowness, new_knowness) + + cur_knowness = F.interpolate(cur_knowness, size=mask.shape[-2:], mode='bilinear') + result = torch.min(mask, 1 - cur_knowness) + + return result + + +def make_mask_distance_weighter(kind='none', **kwargs): + if kind == 'none': + return dummy_distance_weighter + if kind == 'blur': + return BlurMask(**kwargs) + if kind == 'edt': + return EmulatedEDTMask(**kwargs) + if kind == 'pps': + return PropagatePerceptualSim(**kwargs) + raise ValueError(f'Unknown mask distance weighter kind {kind}') diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/feature_matching.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/feature_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..c019895c9178817837d1a6773367b178a861dc61 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/feature_matching.py @@ -0,0 +1,33 @@ +from typing import List + +import torch +import torch.nn.functional as F + + +def masked_l2_loss(pred, target, mask, weight_known, weight_missing): + per_pixel_l2 = F.mse_loss(pred, target, reduction='none') + pixel_weights = mask * weight_missing + (1 - mask) * weight_known + return (pixel_weights * per_pixel_l2).mean() + + +def masked_l1_loss(pred, target, mask, weight_known, weight_missing): + per_pixel_l1 = F.l1_loss(pred, target, reduction='none') + pixel_weights = mask * weight_missing + (1 - mask) * weight_known + return (pixel_weights * per_pixel_l1).mean() + + +def feature_matching_loss(fake_features: List[torch.Tensor], target_features: List[torch.Tensor], mask=None): + if mask is None: + res = torch.stack([F.mse_loss(fake_feat, target_feat) + for fake_feat, target_feat in zip(fake_features, target_features)]).mean() + else: + res = 0 + norm = 0 + for fake_feat, target_feat in zip(fake_features, target_features): + cur_mask = F.interpolate(mask, size=fake_feat.shape[-2:], mode='bilinear', align_corners=False) + error_weights = 1 - cur_mask + cur_val = ((fake_feat - target_feat).pow(2) * error_weights).mean() + res = res + cur_val + norm += 1 + res = res / norm + return res diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/perceptual.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/perceptual.py new file mode 100644 index 0000000000000000000000000000000000000000..c639dad9ea7189d332a850a568637dbc58ee5f84 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/perceptual.py @@ -0,0 +1,113 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +# from models.ade20k import ModelBuilder +from sd_bmab.external.lama.saicinpainting.utils import check_and_warn_input_range + + +IMAGENET_MEAN = torch.FloatTensor([0.485, 0.456, 0.406])[None, :, None, None] +IMAGENET_STD = torch.FloatTensor([0.229, 0.224, 0.225])[None, :, None, None] + + +class PerceptualLoss(nn.Module): + def __init__(self, normalize_inputs=True): + super(PerceptualLoss, self).__init__() + + self.normalize_inputs = normalize_inputs + self.mean_ = IMAGENET_MEAN + self.std_ = IMAGENET_STD + + vgg = torchvision.models.vgg19(pretrained=True).features + vgg_avg_pooling = [] + + for weights in vgg.parameters(): + weights.requires_grad = False + + for module in vgg.modules(): + if module.__class__.__name__ == 'Sequential': + continue + elif module.__class__.__name__ == 'MaxPool2d': + vgg_avg_pooling.append(nn.AvgPool2d(kernel_size=2, stride=2, padding=0)) + else: + vgg_avg_pooling.append(module) + + self.vgg = nn.Sequential(*vgg_avg_pooling) + + def do_normalize_inputs(self, x): + return (x - self.mean_.to(x.device)) / self.std_.to(x.device) + + def partial_losses(self, input, target, mask=None): + check_and_warn_input_range(target, 0, 1, 'PerceptualLoss target in partial_losses') + + # we expect input and target to be in [0, 1] range + losses = [] + + if self.normalize_inputs: + features_input = self.do_normalize_inputs(input) + features_target = self.do_normalize_inputs(target) + else: + features_input = input + features_target = target + + for layer in self.vgg[:30]: + + features_input = layer(features_input) + features_target = layer(features_target) + + if layer.__class__.__name__ == 'ReLU': + loss = F.mse_loss(features_input, features_target, reduction='none') + + if mask is not None: + cur_mask = F.interpolate(mask, size=features_input.shape[-2:], + mode='bilinear', align_corners=False) + loss = loss * (1 - cur_mask) + + loss = loss.mean(dim=tuple(range(1, len(loss.shape)))) + losses.append(loss) + + return losses + + def forward(self, input, target, mask=None): + losses = self.partial_losses(input, target, mask=mask) + return torch.stack(losses).sum(dim=0) + + def get_global_features(self, input): + check_and_warn_input_range(input, 0, 1, 'PerceptualLoss input in get_global_features') + + if self.normalize_inputs: + features_input = self.do_normalize_inputs(input) + else: + features_input = input + + features_input = self.vgg(features_input) + return features_input + + +class ResNetPL(nn.Module): + def __init__(self, weight=1, + weights_path=None, arch_encoder='resnet50dilated', segmentation=True): + super().__init__() + self.impl = ModelBuilder.get_encoder(weights_path=weights_path, + arch_encoder=arch_encoder, + arch_decoder='ppm_deepsup', + fc_dim=2048, + segmentation=segmentation) + self.impl.eval() + for w in self.impl.parameters(): + w.requires_grad_(False) + + self.weight = weight + + def forward(self, pred, target): + pred = (pred - IMAGENET_MEAN.to(pred)) / IMAGENET_STD.to(pred) + target = (target - IMAGENET_MEAN.to(target)) / IMAGENET_STD.to(target) + + pred_feats = self.impl(pred, return_feature_maps=True) + target_feats = self.impl(target, return_feature_maps=True) + + result = torch.stack([F.mse_loss(cur_pred, cur_target) + for cur_pred, cur_target + in zip(pred_feats, target_feats)]).sum() * self.weight + return result diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/segmentation.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/segmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..3d4a9f94eaae84722db584277dbbf9bc41ede357 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/segmentation.py @@ -0,0 +1,43 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .constants import weights as constant_weights + + +class CrossEntropy2d(nn.Module): + def __init__(self, reduction="mean", ignore_label=255, weights=None, *args, **kwargs): + """ + weight (Tensor, optional): a manual rescaling weight given to each class. + If given, has to be a Tensor of size "nclasses" + """ + super(CrossEntropy2d, self).__init__() + self.reduction = reduction + self.ignore_label = ignore_label + self.weights = weights + if self.weights is not None: + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.weights = torch.FloatTensor(constant_weights[weights]).to(device) + + def forward(self, predict, target): + """ + Args: + predict:(n, c, h, w) + target:(n, 1, h, w) + """ + target = target.long() + assert not target.requires_grad + assert predict.dim() == 4, "{0}".format(predict.size()) + assert target.dim() == 4, "{0}".format(target.size()) + assert predict.size(0) == target.size(0), "{0} vs {1} ".format(predict.size(0), target.size(0)) + assert target.size(1) == 1, "{0}".format(target.size(1)) + assert predict.size(2) == target.size(2), "{0} vs {1} ".format(predict.size(2), target.size(2)) + assert predict.size(3) == target.size(3), "{0} vs {1} ".format(predict.size(3), target.size(3)) + target = target.squeeze(1) + n, c, h, w = predict.size() + target_mask = (target >= 0) * (target != self.ignore_label) + target = target[target_mask] + predict = predict.transpose(1, 2).transpose(2, 3).contiguous() + predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c) + loss = F.cross_entropy(predict, target, weight=self.weights, reduction=self.reduction) + return loss diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/style_loss.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/style_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..0bb42d7fbc5d17a47bec7365889868505f5fdfb5 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/losses/style_loss.py @@ -0,0 +1,155 @@ +import torch +import torch.nn as nn +import torchvision.models as models + + +class PerceptualLoss(nn.Module): + r""" + Perceptual loss, VGG-based + https://arxiv.org/abs/1603.08155 + https://github.com/dxyang/StyleTransfer/blob/master/utils.py + """ + + def __init__(self, weights=[1.0, 1.0, 1.0, 1.0, 1.0]): + super(PerceptualLoss, self).__init__() + self.add_module('vgg', VGG19()) + self.criterion = torch.nn.L1Loss() + self.weights = weights + + def __call__(self, x, y): + # Compute features + x_vgg, y_vgg = self.vgg(x), self.vgg(y) + + content_loss = 0.0 + content_loss += self.weights[0] * self.criterion(x_vgg['relu1_1'], y_vgg['relu1_1']) + content_loss += self.weights[1] * self.criterion(x_vgg['relu2_1'], y_vgg['relu2_1']) + content_loss += self.weights[2] * self.criterion(x_vgg['relu3_1'], y_vgg['relu3_1']) + content_loss += self.weights[3] * self.criterion(x_vgg['relu4_1'], y_vgg['relu4_1']) + content_loss += self.weights[4] * self.criterion(x_vgg['relu5_1'], y_vgg['relu5_1']) + + + return content_loss + + +class VGG19(torch.nn.Module): + def __init__(self): + super(VGG19, self).__init__() + features = models.vgg19(pretrained=True).features + self.relu1_1 = torch.nn.Sequential() + self.relu1_2 = torch.nn.Sequential() + + self.relu2_1 = torch.nn.Sequential() + self.relu2_2 = torch.nn.Sequential() + + self.relu3_1 = torch.nn.Sequential() + self.relu3_2 = torch.nn.Sequential() + self.relu3_3 = torch.nn.Sequential() + self.relu3_4 = torch.nn.Sequential() + + self.relu4_1 = torch.nn.Sequential() + self.relu4_2 = torch.nn.Sequential() + self.relu4_3 = torch.nn.Sequential() + self.relu4_4 = torch.nn.Sequential() + + self.relu5_1 = torch.nn.Sequential() + self.relu5_2 = torch.nn.Sequential() + self.relu5_3 = torch.nn.Sequential() + self.relu5_4 = torch.nn.Sequential() + + for x in range(2): + self.relu1_1.add_module(str(x), features[x]) + + for x in range(2, 4): + self.relu1_2.add_module(str(x), features[x]) + + for x in range(4, 7): + self.relu2_1.add_module(str(x), features[x]) + + for x in range(7, 9): + self.relu2_2.add_module(str(x), features[x]) + + for x in range(9, 12): + self.relu3_1.add_module(str(x), features[x]) + + for x in range(12, 14): + self.relu3_2.add_module(str(x), features[x]) + + for x in range(14, 16): + self.relu3_2.add_module(str(x), features[x]) + + for x in range(16, 18): + self.relu3_4.add_module(str(x), features[x]) + + for x in range(18, 21): + self.relu4_1.add_module(str(x), features[x]) + + for x in range(21, 23): + self.relu4_2.add_module(str(x), features[x]) + + for x in range(23, 25): + self.relu4_3.add_module(str(x), features[x]) + + for x in range(25, 27): + self.relu4_4.add_module(str(x), features[x]) + + for x in range(27, 30): + self.relu5_1.add_module(str(x), features[x]) + + for x in range(30, 32): + self.relu5_2.add_module(str(x), features[x]) + + for x in range(32, 34): + self.relu5_3.add_module(str(x), features[x]) + + for x in range(34, 36): + self.relu5_4.add_module(str(x), features[x]) + + # don't need the gradients, just want the features + for param in self.parameters(): + param.requires_grad = False + + def forward(self, x): + relu1_1 = self.relu1_1(x) + relu1_2 = self.relu1_2(relu1_1) + + relu2_1 = self.relu2_1(relu1_2) + relu2_2 = self.relu2_2(relu2_1) + + relu3_1 = self.relu3_1(relu2_2) + relu3_2 = self.relu3_2(relu3_1) + relu3_3 = self.relu3_3(relu3_2) + relu3_4 = self.relu3_4(relu3_3) + + relu4_1 = self.relu4_1(relu3_4) + relu4_2 = self.relu4_2(relu4_1) + relu4_3 = self.relu4_3(relu4_2) + relu4_4 = self.relu4_4(relu4_3) + + relu5_1 = self.relu5_1(relu4_4) + relu5_2 = self.relu5_2(relu5_1) + relu5_3 = self.relu5_3(relu5_2) + relu5_4 = self.relu5_4(relu5_3) + + out = { + 'relu1_1': relu1_1, + 'relu1_2': relu1_2, + + 'relu2_1': relu2_1, + 'relu2_2': relu2_2, + + 'relu3_1': relu3_1, + 'relu3_2': relu3_2, + 'relu3_3': relu3_3, + 'relu3_4': relu3_4, + + 'relu4_1': relu4_1, + 'relu4_2': relu4_2, + 'relu4_3': relu4_3, + 'relu4_4': relu4_4, + + 'relu5_1': relu5_1, + 'relu5_2': relu5_2, + 'relu5_3': relu5_3, + 'relu5_4': relu5_4, + } + return out diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__init__.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f88a5b30b3cf240205ca033b8d856e9297f5d676 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__init__.py @@ -0,0 +1,32 @@ +import logging + +from sd_bmab.external.lama.saicinpainting.training.modules.ffc import FFCResNetGenerator +from sd_bmab.external.lama.saicinpainting.training.modules.pix2pixhd import GlobalGenerator, MultiDilatedGlobalGenerator, \ + NLayerDiscriminator, MultidilatedNLayerDiscriminator + + +def make_generator(config, kind, **kwargs): + logging.info(f'Make generator {kind}') + + if kind == 'pix2pixhd_multidilated': + return MultiDilatedGlobalGenerator(**kwargs) + + if kind == 'pix2pixhd_global': + return GlobalGenerator(**kwargs) + + if kind == 'ffc_resnet': + return FFCResNetGenerator(**kwargs) + + raise ValueError(f'Unknown generator kind {kind}') + + +def make_discriminator(kind, **kwargs): + logging.info(f'Make discriminator {kind}') + + if kind == 'pix2pixhd_nlayer_multidilated': + return MultidilatedNLayerDiscriminator(**kwargs) + + if kind == 'pix2pixhd_nlayer': + return NLayerDiscriminator(**kwargs) + + raise ValueError(f'Unknown discriminator kind {kind}') diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..39cc1a3c64e99c1d10672674a510cd507487c3e8 Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/base.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/base.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..599970fe25a487d9c6728c03f36881a83277bd5b Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/base.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/depthwise_sep_conv.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/depthwise_sep_conv.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..84bcea19fa17019e2fcda272cbe0772fa6321415 Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/depthwise_sep_conv.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/ffc.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/ffc.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..88eb0015ee50964c8db5d3d69bbfc68bbd5102c3 Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/ffc.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/multidilated_conv.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/multidilated_conv.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1129529b37f6a06304dc4ecac268cd9fabe072ac Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/multidilated_conv.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/pix2pixhd.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/pix2pixhd.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..22ce5b440968e5a1cbff3cdaa158ab9447f7010d Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/pix2pixhd.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/spatial_transform.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/spatial_transform.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f8baa2d4128f8206be266b75fe9178090ea50ae9 Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/spatial_transform.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/squeeze_excitation.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/squeeze_excitation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9bb48384dcd011a84160f914907fd24824f4ea1e Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/__pycache__/squeeze_excitation.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/base.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/base.py new file mode 100644 index 0000000000000000000000000000000000000000..5410595ef4153e24dffb4c5182071aab95f3dda8 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/base.py @@ -0,0 +1,80 @@ +import abc +from typing import Tuple, List + +import torch +import torch.nn as nn + +from sd_bmab.external.lama.saicinpainting.training.modules.depthwise_sep_conv import DepthWiseSeperableConv +from sd_bmab.external.lama.saicinpainting.training.modules.multidilated_conv import MultidilatedConv + + +class BaseDiscriminator(nn.Module): + @abc.abstractmethod + def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]: + """ + Predict scores and get intermediate activations. Useful for feature matching loss + :return tuple (scores, list of intermediate activations) + """ + raise NotImplemented() + + +def get_conv_block_ctor(kind='default'): + if not isinstance(kind, str): + return kind + if kind == 'default': + return nn.Conv2d + if kind == 'depthwise': + return DepthWiseSeperableConv + if kind == 'multidilated': + return MultidilatedConv + raise ValueError(f'Unknown convolutional block kind {kind}') + + +def get_norm_layer(kind='bn'): + if not isinstance(kind, str): + return kind + if kind == 'bn': + return nn.BatchNorm2d + if kind == 'in': + return nn.InstanceNorm2d + raise ValueError(f'Unknown norm block kind {kind}') + + +def get_activation(kind='tanh'): + if kind == 'tanh': + return nn.Tanh() + if kind == 'sigmoid': + return nn.Sigmoid() + if kind is False: + return nn.Identity() + raise ValueError(f'Unknown activation kind {kind}') + + +class SimpleMultiStepGenerator(nn.Module): + def __init__(self, steps: List[nn.Module]): + super().__init__() + self.steps = nn.ModuleList(steps) + + def forward(self, x): + cur_in = x + outs = [] + for step in self.steps: + cur_out = step(cur_in) + outs.append(cur_out) + cur_in = torch.cat((cur_in, cur_out), dim=1) + return torch.cat(outs[::-1], dim=1) + +def deconv_factory(kind, ngf, mult, norm_layer, activation, max_features): + if kind == 'convtranspose': + return [nn.ConvTranspose2d(min(max_features, ngf * mult), + min(max_features, int(ngf * mult / 2)), + kernel_size=3, stride=2, padding=1, output_padding=1), + norm_layer(min(max_features, int(ngf * mult / 2))), activation] + elif kind == 'bilinear': + return [nn.Upsample(scale_factor=2, mode='bilinear'), + DepthWiseSeperableConv(min(max_features, ngf * mult), + min(max_features, int(ngf * mult / 2)), + kernel_size=3, stride=1, padding=1), + norm_layer(min(max_features, int(ngf * mult / 2))), activation] + else: + raise Exception(f"Invalid deconv kind: {kind}") \ No newline at end of file diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/depthwise_sep_conv.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/depthwise_sep_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..83dd15c3df1d9f40baf0091a373fa224532c9ddd --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/depthwise_sep_conv.py @@ -0,0 +1,17 @@ +import torch +import torch.nn as nn + +class DepthWiseSeperableConv(nn.Module): + def __init__(self, in_dim, out_dim, *args, **kwargs): + super().__init__() + if 'groups' in kwargs: + # ignoring groups for Depthwise Sep Conv + del kwargs['groups'] + + self.depthwise = nn.Conv2d(in_dim, in_dim, *args, groups=in_dim, **kwargs) + self.pointwise = nn.Conv2d(in_dim, out_dim, kernel_size=1) + + def forward(self, x): + out = self.depthwise(x) + out = self.pointwise(out) + return out \ No newline at end of file diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/fake_fakes.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/fake_fakes.py new file mode 100644 index 0000000000000000000000000000000000000000..45c4ad559cef2730b771a709197e00ae1c87683c --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/fake_fakes.py @@ -0,0 +1,47 @@ +import torch +from kornia import SamplePadding +from kornia.augmentation import RandomAffine, CenterCrop + + +class FakeFakesGenerator: + def __init__(self, aug_proba=0.5, img_aug_degree=30, img_aug_translate=0.2): + self.grad_aug = RandomAffine(degrees=360, + translate=0.2, + padding_mode=SamplePadding.REFLECTION, + keepdim=False, + p=1) + self.img_aug = RandomAffine(degrees=img_aug_degree, + translate=img_aug_translate, + padding_mode=SamplePadding.REFLECTION, + keepdim=True, + p=1) + self.aug_proba = aug_proba + + def __call__(self, input_images, masks): + blend_masks = self._fill_masks_with_gradient(masks) + blend_target = self._make_blend_target(input_images) + result = input_images * (1 - blend_masks) + blend_target * blend_masks + return result, blend_masks + + def _make_blend_target(self, input_images): + batch_size = input_images.shape[0] + permuted = input_images[torch.randperm(batch_size)] + augmented = self.img_aug(input_images) + is_aug = (torch.rand(batch_size, device=input_images.device)[:, None, None, None] < self.aug_proba).float() + result = augmented * is_aug + permuted * (1 - is_aug) + return result + + def _fill_masks_with_gradient(self, masks): + batch_size, _, height, width = masks.shape + grad = torch.linspace(0, 1, steps=width * 2, device=masks.device, dtype=masks.dtype) \ + .view(1, 1, 1, -1).expand(batch_size, 1, height * 2, width * 2) + grad = self.grad_aug(grad) + grad = CenterCrop((height, width))(grad) + grad *= masks + + grad_for_min = grad + (1 - masks) * 10 + grad -= grad_for_min.view(batch_size, -1).min(-1).values[:, None, None, None] + grad /= grad.view(batch_size, -1).max(-1).values[:, None, None, None] + 1e-6 + grad.clamp_(min=0, max=1) + + return grad diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/ffc.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/ffc.py new file mode 100644 index 0000000000000000000000000000000000000000..42b3830ca4560881d3f0837f8423f0eb094674e0 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/ffc.py @@ -0,0 +1,485 @@ +# Fast Fourier Convolution NeurIPS 2020 +# original implementation https://github.com/pkumivision/FFC/blob/main/model_zoo/ffc.py +# paper https://proceedings.neurips.cc/paper/2020/file/2fd5d41ec6cfab47e32164d5624269b1-Paper.pdf + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from sd_bmab.external.lama.saicinpainting.training.modules.base import get_activation, BaseDiscriminator +from sd_bmab.external.lama.saicinpainting.training.modules.spatial_transform import LearnableSpatialTransformWrapper +from sd_bmab.external.lama.saicinpainting.training.modules.squeeze_excitation import SELayer +from sd_bmab.external.lama.saicinpainting.utils import get_shape + + +class FFCSE_block(nn.Module): + + def __init__(self, channels, ratio_g): + super(FFCSE_block, self).__init__() + in_cg = int(channels * ratio_g) + in_cl = channels - in_cg + r = 16 + + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.conv1 = nn.Conv2d(channels, channels // r, + kernel_size=1, bias=True) + self.relu1 = nn.ReLU(inplace=True) + self.conv_a2l = None if in_cl == 0 else nn.Conv2d( + channels // r, in_cl, kernel_size=1, bias=True) + self.conv_a2g = None if in_cg == 0 else nn.Conv2d( + channels // r, in_cg, kernel_size=1, bias=True) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + x = x if type(x) is tuple else (x, 0) + id_l, id_g = x + + x = id_l if type(id_g) is int else torch.cat([id_l, id_g], dim=1) + x = self.avgpool(x) + x = self.relu1(self.conv1(x)) + + x_l = 0 if self.conv_a2l is None else id_l * \ + self.sigmoid(self.conv_a2l(x)) + x_g = 0 if self.conv_a2g is None else id_g * \ + self.sigmoid(self.conv_a2g(x)) + return x_l, x_g + + +class FourierUnit(nn.Module): + + def __init__(self, in_channels, out_channels, groups=1, spatial_scale_factor=None, spatial_scale_mode='bilinear', + spectral_pos_encoding=False, use_se=False, se_kwargs=None, ffc3d=False, fft_norm='ortho'): + # bn_layer not used + super(FourierUnit, self).__init__() + self.groups = groups + + self.conv_layer = torch.nn.Conv2d(in_channels=in_channels * 2 + (2 if spectral_pos_encoding else 0), + out_channels=out_channels * 2, + kernel_size=1, stride=1, padding=0, groups=self.groups, bias=False) + self.bn = torch.nn.BatchNorm2d(out_channels * 2) + self.relu = torch.nn.ReLU(inplace=True) + + # squeeze and excitation block + self.use_se = use_se + if use_se: + if se_kwargs is None: + se_kwargs = {} + self.se = SELayer(self.conv_layer.in_channels, **se_kwargs) + + self.spatial_scale_factor = spatial_scale_factor + self.spatial_scale_mode = spatial_scale_mode + self.spectral_pos_encoding = spectral_pos_encoding + self.ffc3d = ffc3d + self.fft_norm = fft_norm + + def forward(self, x): + batch = x.shape[0] + + if self.spatial_scale_factor is not None: + orig_size = x.shape[-2:] + x = F.interpolate(x, scale_factor=self.spatial_scale_factor, mode=self.spatial_scale_mode, align_corners=False) + + r_size = x.size() + # (batch, c, h, w/2+1, 2) + fft_dim = (-3, -2, -1) if self.ffc3d else (-2, -1) + ffted = torch.fft.rfftn(x, dim=fft_dim, norm=self.fft_norm) + ffted = torch.stack((ffted.real, ffted.imag), dim=-1) + ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1) + ffted = ffted.view((batch, -1,) + ffted.size()[3:]) + + if self.spectral_pos_encoding: + height, width = ffted.shape[-2:] + coords_vert = torch.linspace(0, 1, height)[None, None, :, None].expand(batch, 1, height, width).to(ffted) + coords_hor = torch.linspace(0, 1, width)[None, None, None, :].expand(batch, 1, height, width).to(ffted) + ffted = torch.cat((coords_vert, coords_hor, ffted), dim=1) + + if self.use_se: + ffted = self.se(ffted) + + ffted = self.conv_layer(ffted) # (batch, c*2, h, w/2+1) + ffted = self.relu(self.bn(ffted)) + + ffted = ffted.view((batch, -1, 2,) + ffted.size()[2:]).permute( + 0, 1, 3, 4, 2).contiguous() # (batch,c, t, h, w/2+1, 2) + ffted = torch.complex(ffted[..., 0], ffted[..., 1]) + + ifft_shape_slice = x.shape[-3:] if self.ffc3d else x.shape[-2:] + output = torch.fft.irfftn(ffted, s=ifft_shape_slice, dim=fft_dim, norm=self.fft_norm) + + if self.spatial_scale_factor is not None: + output = F.interpolate(output, size=orig_size, mode=self.spatial_scale_mode, align_corners=False) + + return output + + +class SeparableFourierUnit(nn.Module): + + def __init__(self, in_channels, out_channels, groups=1, kernel_size=3): + # bn_layer not used + super(SeparableFourierUnit, self).__init__() + self.groups = groups + row_out_channels = out_channels // 2 + col_out_channels = out_channels - row_out_channels + self.row_conv = torch.nn.Conv2d(in_channels=in_channels * 2, + out_channels=row_out_channels * 2, + kernel_size=(kernel_size, 1), # kernel size is always like this, but the data will be transposed + stride=1, padding=(kernel_size // 2, 0), + padding_mode='reflect', + groups=self.groups, bias=False) + self.col_conv = torch.nn.Conv2d(in_channels=in_channels * 2, + out_channels=col_out_channels * 2, + kernel_size=(kernel_size, 1), # kernel size is always like this, but the data will be transposed + stride=1, padding=(kernel_size // 2, 0), + padding_mode='reflect', + groups=self.groups, bias=False) + self.row_bn = torch.nn.BatchNorm2d(row_out_channels * 2) + self.col_bn = torch.nn.BatchNorm2d(col_out_channels * 2) + self.relu = torch.nn.ReLU(inplace=True) + + def process_branch(self, x, conv, bn): + batch = x.shape[0] + + r_size = x.size() + # (batch, c, h, w/2+1, 2) + ffted = torch.fft.rfft(x, norm="ortho") + ffted = torch.stack((ffted.real, ffted.imag), dim=-1) + ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1) + ffted = ffted.view((batch, -1,) + ffted.size()[3:]) + + ffted = self.relu(bn(conv(ffted))) + + ffted = ffted.view((batch, -1, 2,) + ffted.size()[2:]).permute( + 0, 1, 3, 4, 2).contiguous() # (batch,c, t, h, w/2+1, 2) + ffted = torch.complex(ffted[..., 0], ffted[..., 1]) + + output = torch.fft.irfft(ffted, s=x.shape[-1:], norm="ortho") + return output + + + def forward(self, x): + rowwise = self.process_branch(x, self.row_conv, self.row_bn) + colwise = self.process_branch(x.permute(0, 1, 3, 2), self.col_conv, self.col_bn).permute(0, 1, 3, 2) + out = torch.cat((rowwise, colwise), dim=1) + return out + + +class SpectralTransform(nn.Module): + + def __init__(self, in_channels, out_channels, stride=1, groups=1, enable_lfu=True, separable_fu=False, **fu_kwargs): + # bn_layer not used + super(SpectralTransform, self).__init__() + self.enable_lfu = enable_lfu + if stride == 2: + self.downsample = nn.AvgPool2d(kernel_size=(2, 2), stride=2) + else: + self.downsample = nn.Identity() + + self.stride = stride + self.conv1 = nn.Sequential( + nn.Conv2d(in_channels, out_channels // + 2, kernel_size=1, groups=groups, bias=False), + nn.BatchNorm2d(out_channels // 2), + nn.ReLU(inplace=True) + ) + fu_class = SeparableFourierUnit if separable_fu else FourierUnit + self.fu = fu_class( + out_channels // 2, out_channels // 2, groups, **fu_kwargs) + if self.enable_lfu: + self.lfu = fu_class( + out_channels // 2, out_channels // 2, groups) + self.conv2 = torch.nn.Conv2d( + out_channels // 2, out_channels, kernel_size=1, groups=groups, bias=False) + + def forward(self, x): + + x = self.downsample(x) + x = self.conv1(x) + output = self.fu(x) + + if self.enable_lfu: + n, c, h, w = x.shape + split_no = 2 + split_s = h // split_no + xs = torch.cat(torch.split( + x[:, :c // 4], split_s, dim=-2), dim=1).contiguous() + xs = torch.cat(torch.split(xs, split_s, dim=-1), + dim=1).contiguous() + xs = self.lfu(xs) + xs = xs.repeat(1, 1, split_no, split_no).contiguous() + else: + xs = 0 + + output = self.conv2(x + output + xs) + + return output + + +class FFC(nn.Module): + + def __init__(self, in_channels, out_channels, kernel_size, + ratio_gin, ratio_gout, stride=1, padding=0, + dilation=1, groups=1, bias=False, enable_lfu=True, + padding_type='reflect', gated=False, **spectral_kwargs): + super(FFC, self).__init__() + + assert stride == 1 or stride == 2, "Stride should be 1 or 2." + self.stride = stride + + in_cg = int(in_channels * ratio_gin) + in_cl = in_channels - in_cg + out_cg = int(out_channels * ratio_gout) + out_cl = out_channels - out_cg + #groups_g = 1 if groups == 1 else int(groups * ratio_gout) + #groups_l = 1 if groups == 1 else groups - groups_g + + self.ratio_gin = ratio_gin + self.ratio_gout = ratio_gout + self.global_in_num = in_cg + + module = nn.Identity if in_cl == 0 or out_cl == 0 else nn.Conv2d + self.convl2l = module(in_cl, out_cl, kernel_size, + stride, padding, dilation, groups, bias, padding_mode=padding_type) + module = nn.Identity if in_cl == 0 or out_cg == 0 else nn.Conv2d + self.convl2g = module(in_cl, out_cg, kernel_size, + stride, padding, dilation, groups, bias, padding_mode=padding_type) + module = nn.Identity if in_cg == 0 or out_cl == 0 else nn.Conv2d + self.convg2l = module(in_cg, out_cl, kernel_size, + stride, padding, dilation, groups, bias, padding_mode=padding_type) + module = nn.Identity if in_cg == 0 or out_cg == 0 else SpectralTransform + self.convg2g = module( + in_cg, out_cg, stride, 1 if groups == 1 else groups // 2, enable_lfu, **spectral_kwargs) + + self.gated = gated + module = nn.Identity if in_cg == 0 or out_cl == 0 or not self.gated else nn.Conv2d + self.gate = module(in_channels, 2, 1) + + def forward(self, x): + x_l, x_g = x if type(x) is tuple else (x, 0) + out_xl, out_xg = 0, 0 + + if self.gated: + total_input_parts = [x_l] + if torch.is_tensor(x_g): + total_input_parts.append(x_g) + total_input = torch.cat(total_input_parts, dim=1) + + gates = torch.sigmoid(self.gate(total_input)) + g2l_gate, l2g_gate = gates.chunk(2, dim=1) + else: + g2l_gate, l2g_gate = 1, 1 + + if self.ratio_gout != 1: + out_xl = self.convl2l(x_l) + self.convg2l(x_g) * g2l_gate + if self.ratio_gout != 0: + out_xg = self.convl2g(x_l) * l2g_gate + self.convg2g(x_g) + + return out_xl, out_xg + + +class FFC_BN_ACT(nn.Module): + + def __init__(self, in_channels, out_channels, + kernel_size, ratio_gin, ratio_gout, + stride=1, padding=0, dilation=1, groups=1, bias=False, + norm_layer=nn.BatchNorm2d, activation_layer=nn.Identity, + padding_type='reflect', + enable_lfu=True, **kwargs): + super(FFC_BN_ACT, self).__init__() + self.ffc = FFC(in_channels, out_channels, kernel_size, + ratio_gin, ratio_gout, stride, padding, dilation, + groups, bias, enable_lfu, padding_type=padding_type, **kwargs) + lnorm = nn.Identity if ratio_gout == 1 else norm_layer + gnorm = nn.Identity if ratio_gout == 0 else norm_layer + global_channels = int(out_channels * ratio_gout) + self.bn_l = lnorm(out_channels - global_channels) + self.bn_g = gnorm(global_channels) + + lact = nn.Identity if ratio_gout == 1 else activation_layer + gact = nn.Identity if ratio_gout == 0 else activation_layer + self.act_l = lact(inplace=True) + self.act_g = gact(inplace=True) + + def forward(self, x): + x_l, x_g = self.ffc(x) + x_l = self.act_l(self.bn_l(x_l)) + x_g = self.act_g(self.bn_g(x_g)) + return x_l, x_g + + +class FFCResnetBlock(nn.Module): + def __init__(self, dim, padding_type, norm_layer, activation_layer=nn.ReLU, dilation=1, + spatial_transform_kwargs=None, inline=False, **conv_kwargs): + super().__init__() + self.conv1 = FFC_BN_ACT(dim, dim, kernel_size=3, padding=dilation, dilation=dilation, + norm_layer=norm_layer, + activation_layer=activation_layer, + padding_type=padding_type, + **conv_kwargs) + self.conv2 = FFC_BN_ACT(dim, dim, kernel_size=3, padding=dilation, dilation=dilation, + norm_layer=norm_layer, + activation_layer=activation_layer, + padding_type=padding_type, + **conv_kwargs) + if spatial_transform_kwargs is not None: + self.conv1 = LearnableSpatialTransformWrapper(self.conv1, **spatial_transform_kwargs) + self.conv2 = LearnableSpatialTransformWrapper(self.conv2, **spatial_transform_kwargs) + self.inline = inline + + def forward(self, x): + if self.inline: + x_l, x_g = x[:, :-self.conv1.ffc.global_in_num], x[:, -self.conv1.ffc.global_in_num:] + else: + x_l, x_g = x if type(x) is tuple else (x, 0) + + id_l, id_g = x_l, x_g + + x_l, x_g = self.conv1((x_l, x_g)) + x_l, x_g = self.conv2((x_l, x_g)) + + x_l, x_g = id_l + x_l, id_g + x_g + out = x_l, x_g + if self.inline: + out = torch.cat(out, dim=1) + return out + + +class ConcatTupleLayer(nn.Module): + def forward(self, x): + assert isinstance(x, tuple) + x_l, x_g = x + assert torch.is_tensor(x_l) or torch.is_tensor(x_g) + if not torch.is_tensor(x_g): + return x_l + return torch.cat(x, dim=1) + + +class FFCResNetGenerator(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, + padding_type='reflect', activation_layer=nn.ReLU, + up_norm_layer=nn.BatchNorm2d, up_activation=nn.ReLU(True), + init_conv_kwargs={}, downsample_conv_kwargs={}, resnet_conv_kwargs={}, + spatial_transform_layers=None, spatial_transform_kwargs={}, + add_out_act=True, max_features=1024, out_ffc=False, out_ffc_kwargs={}): + assert (n_blocks >= 0) + super().__init__() + + model = [nn.ReflectionPad2d(3), + FFC_BN_ACT(input_nc, ngf, kernel_size=7, padding=0, norm_layer=norm_layer, + activation_layer=activation_layer, **init_conv_kwargs)] + + ### downsample + for i in range(n_downsampling): + mult = 2 ** i + if i == n_downsampling - 1: + cur_conv_kwargs = dict(downsample_conv_kwargs) + cur_conv_kwargs['ratio_gout'] = resnet_conv_kwargs.get('ratio_gin', 0) + else: + cur_conv_kwargs = downsample_conv_kwargs + model += [FFC_BN_ACT(min(max_features, ngf * mult), + min(max_features, ngf * mult * 2), + kernel_size=3, stride=2, padding=1, + norm_layer=norm_layer, + activation_layer=activation_layer, + **cur_conv_kwargs)] + + mult = 2 ** n_downsampling + feats_num_bottleneck = min(max_features, ngf * mult) + + ### resnet blocks + for i in range(n_blocks): + cur_resblock = FFCResnetBlock(feats_num_bottleneck, padding_type=padding_type, activation_layer=activation_layer, + norm_layer=norm_layer, **resnet_conv_kwargs) + if spatial_transform_layers is not None and i in spatial_transform_layers: + cur_resblock = LearnableSpatialTransformWrapper(cur_resblock, **spatial_transform_kwargs) + model += [cur_resblock] + + model += [ConcatTupleLayer()] + + ### upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(min(max_features, ngf * mult), + min(max_features, int(ngf * mult / 2)), + kernel_size=3, stride=2, padding=1, output_padding=1), + up_norm_layer(min(max_features, int(ngf * mult / 2))), + up_activation] + + if out_ffc: + model += [FFCResnetBlock(ngf, padding_type=padding_type, activation_layer=activation_layer, + norm_layer=norm_layer, inline=True, **out_ffc_kwargs)] + + model += [nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + if add_out_act: + model.append(get_activation('tanh' if add_out_act is True else add_out_act)) + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + + +class FFCNLayerDiscriminator(BaseDiscriminator): + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, max_features=512, + init_conv_kwargs={}, conv_kwargs={}): + super().__init__() + self.n_layers = n_layers + + def _act_ctor(inplace=True): + return nn.LeakyReLU(negative_slope=0.2, inplace=inplace) + + kw = 3 + padw = int(np.ceil((kw-1.0)/2)) + sequence = [[FFC_BN_ACT(input_nc, ndf, kernel_size=kw, padding=padw, norm_layer=norm_layer, + activation_layer=_act_ctor, **init_conv_kwargs)]] + + nf = ndf + for n in range(1, n_layers): + nf_prev = nf + nf = min(nf * 2, max_features) + + cur_model = [ + FFC_BN_ACT(nf_prev, nf, + kernel_size=kw, stride=2, padding=padw, + norm_layer=norm_layer, + activation_layer=_act_ctor, + **conv_kwargs) + ] + sequence.append(cur_model) + + nf_prev = nf + nf = min(nf * 2, 512) + + cur_model = [ + FFC_BN_ACT(nf_prev, nf, + kernel_size=kw, stride=1, padding=padw, + norm_layer=norm_layer, + activation_layer=lambda *args, **kwargs: nn.LeakyReLU(*args, negative_slope=0.2, **kwargs), + **conv_kwargs), + ConcatTupleLayer() + ] + sequence.append(cur_model) + + sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]] + + for n in range(len(sequence)): + setattr(self, 'model'+str(n), nn.Sequential(*sequence[n])) + + def get_all_activations(self, x): + res = [x] + for n in range(self.n_layers + 2): + model = getattr(self, 'model' + str(n)) + res.append(model(res[-1])) + return res[1:] + + def forward(self, x): + act = self.get_all_activations(x) + feats = [] + for out in act[:-1]: + if isinstance(out, tuple): + if torch.is_tensor(out[1]): + out = torch.cat(out, dim=1) + else: + out = out[0] + feats.append(out) + return act[-1], feats diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/multidilated_conv.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/multidilated_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..7a5de1f04c55ec410b86fc1492ee7821017a9021 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/multidilated_conv.py @@ -0,0 +1,98 @@ +import torch +import torch.nn as nn +import random +from sd_bmab.external.lama.saicinpainting.training.modules.depthwise_sep_conv import DepthWiseSeperableConv + +class MultidilatedConv(nn.Module): + def __init__(self, in_dim, out_dim, kernel_size, dilation_num=3, comb_mode='sum', equal_dim=True, + shared_weights=False, padding=1, min_dilation=1, shuffle_in_channels=False, use_depthwise=False, **kwargs): + super().__init__() + convs = [] + self.equal_dim = equal_dim + assert comb_mode in ('cat_out', 'sum', 'cat_in', 'cat_both'), comb_mode + if comb_mode in ('cat_out', 'cat_both'): + self.cat_out = True + if equal_dim: + assert out_dim % dilation_num == 0 + out_dims = [out_dim // dilation_num] * dilation_num + self.index = sum([[i + j * (out_dims[0]) for j in range(dilation_num)] for i in range(out_dims[0])], []) + else: + out_dims = [out_dim // 2 ** (i + 1) for i in range(dilation_num - 1)] + out_dims.append(out_dim - sum(out_dims)) + index = [] + starts = [0] + out_dims[:-1] + lengths = [out_dims[i] // out_dims[-1] for i in range(dilation_num)] + for i in range(out_dims[-1]): + for j in range(dilation_num): + index += list(range(starts[j], starts[j] + lengths[j])) + starts[j] += lengths[j] + self.index = index + assert(len(index) == out_dim) + self.out_dims = out_dims + else: + self.cat_out = False + self.out_dims = [out_dim] * dilation_num + + if comb_mode in ('cat_in', 'cat_both'): + if equal_dim: + assert in_dim % dilation_num == 0 + in_dims = [in_dim // dilation_num] * dilation_num + else: + in_dims = [in_dim // 2 ** (i + 1) for i in range(dilation_num - 1)] + in_dims.append(in_dim - sum(in_dims)) + self.in_dims = in_dims + self.cat_in = True + else: + self.cat_in = False + self.in_dims = [in_dim] * dilation_num + + conv_type = DepthWiseSeperableConv if use_depthwise else nn.Conv2d + dilation = min_dilation + for i in range(dilation_num): + if isinstance(padding, int): + cur_padding = padding * dilation + else: + cur_padding = padding[i] + convs.append(conv_type( + self.in_dims[i], self.out_dims[i], kernel_size, padding=cur_padding, dilation=dilation, **kwargs + )) + if i > 0 and shared_weights: + convs[-1].weight = convs[0].weight + convs[-1].bias = convs[0].bias + dilation *= 2 + self.convs = nn.ModuleList(convs) + + self.shuffle_in_channels = shuffle_in_channels + if self.shuffle_in_channels: + # shuffle list as shuffling of tensors is nondeterministic + in_channels_permute = list(range(in_dim)) + random.shuffle(in_channels_permute) + # save as buffer so it is saved and loaded with checkpoint + self.register_buffer('in_channels_permute', torch.tensor(in_channels_permute)) + + def forward(self, x): + if self.shuffle_in_channels: + x = x[:, self.in_channels_permute] + + outs = [] + if self.cat_in: + if self.equal_dim: + x = x.chunk(len(self.convs), dim=1) + else: + new_x = [] + start = 0 + for dim in self.in_dims: + new_x.append(x[:, start:start+dim]) + start += dim + x = new_x + for i, conv in enumerate(self.convs): + if self.cat_in: + input = x[i] + else: + input = x + outs.append(conv(input)) + if self.cat_out: + out = torch.cat(outs, dim=1)[:, self.index] + else: + out = sum(outs) + return out diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/multiscale.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/multiscale.py new file mode 100644 index 0000000000000000000000000000000000000000..66233b8ccdf7a1a4e381bd043cd5e82e59aea55b --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/multiscale.py @@ -0,0 +1,244 @@ +from typing import List, Tuple, Union, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from sd_bmab.external.lama.saicinpainting.training.modules.base import get_conv_block_ctor, get_activation +from sd_bmab.external.lama.saicinpainting.training.modules.pix2pixhd import ResnetBlock + + +class ResNetHead(nn.Module): + def __init__(self, input_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, + padding_type='reflect', conv_kind='default', activation=nn.ReLU(True)): + assert (n_blocks >= 0) + super(ResNetHead, self).__init__() + + conv_layer = get_conv_block_ctor(conv_kind) + + model = [nn.ReflectionPad2d(3), + conv_layer(input_nc, ngf, kernel_size=7, padding=0), + norm_layer(ngf), + activation] + + ### downsample + for i in range(n_downsampling): + mult = 2 ** i + model += [conv_layer(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1), + norm_layer(ngf * mult * 2), + activation] + + mult = 2 ** n_downsampling + + ### resnet blocks + for i in range(n_blocks): + model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, + conv_kind=conv_kind)] + + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + + +class ResNetTail(nn.Module): + def __init__(self, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, + padding_type='reflect', conv_kind='default', activation=nn.ReLU(True), + up_norm_layer=nn.BatchNorm2d, up_activation=nn.ReLU(True), add_out_act=False, out_extra_layers_n=0, + add_in_proj=None): + assert (n_blocks >= 0) + super(ResNetTail, self).__init__() + + mult = 2 ** n_downsampling + + model = [] + + if add_in_proj is not None: + model.append(nn.Conv2d(add_in_proj, ngf * mult, kernel_size=1)) + + ### resnet blocks + for i in range(n_blocks): + model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, + conv_kind=conv_kind)] + + ### upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, + output_padding=1), + up_norm_layer(int(ngf * mult / 2)), + up_activation] + self.model = nn.Sequential(*model) + + out_layers = [] + for _ in range(out_extra_layers_n): + out_layers += [nn.Conv2d(ngf, ngf, kernel_size=1, padding=0), + up_norm_layer(ngf), + up_activation] + out_layers += [nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + + if add_out_act: + out_layers.append(get_activation('tanh' if add_out_act is True else add_out_act)) + + self.out_proj = nn.Sequential(*out_layers) + + def forward(self, input, return_last_act=False): + features = self.model(input) + out = self.out_proj(features) + if return_last_act: + return out, features + else: + return out + + +class MultiscaleResNet(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=2, n_blocks_head=2, n_blocks_tail=6, n_scales=3, + norm_layer=nn.BatchNorm2d, padding_type='reflect', conv_kind='default', activation=nn.ReLU(True), + up_norm_layer=nn.BatchNorm2d, up_activation=nn.ReLU(True), add_out_act=False, out_extra_layers_n=0, + out_cumulative=False, return_only_hr=False): + super().__init__() + + self.heads = nn.ModuleList([ResNetHead(input_nc, ngf=ngf, n_downsampling=n_downsampling, + n_blocks=n_blocks_head, norm_layer=norm_layer, padding_type=padding_type, + conv_kind=conv_kind, activation=activation) + for i in range(n_scales)]) + tail_in_feats = ngf * (2 ** n_downsampling) + ngf + self.tails = nn.ModuleList([ResNetTail(output_nc, + ngf=ngf, n_downsampling=n_downsampling, + n_blocks=n_blocks_tail, norm_layer=norm_layer, padding_type=padding_type, + conv_kind=conv_kind, activation=activation, up_norm_layer=up_norm_layer, + up_activation=up_activation, add_out_act=add_out_act, + out_extra_layers_n=out_extra_layers_n, + add_in_proj=None if (i == n_scales - 1) else tail_in_feats) + for i in range(n_scales)]) + + self.out_cumulative = out_cumulative + self.return_only_hr = return_only_hr + + @property + def num_scales(self): + return len(self.heads) + + def forward(self, ms_inputs: List[torch.Tensor], smallest_scales_num: Optional[int] = None) \ + -> Union[torch.Tensor, List[torch.Tensor]]: + """ + :param ms_inputs: List of inputs of different resolutions from HR to LR + :param smallest_scales_num: int or None, number of smallest scales to take at input + :return: Depending on return_only_hr: + True: Only the most HR output + False: List of outputs of different resolutions from HR to LR + """ + if smallest_scales_num is None: + assert len(self.heads) == len(ms_inputs), (len(self.heads), len(ms_inputs), smallest_scales_num) + smallest_scales_num = len(self.heads) + else: + assert smallest_scales_num == len(ms_inputs) <= len(self.heads), (len(self.heads), len(ms_inputs), smallest_scales_num) + + cur_heads = self.heads[-smallest_scales_num:] + ms_features = [cur_head(cur_inp) for cur_head, cur_inp in zip(cur_heads, ms_inputs)] + + all_outputs = [] + prev_tail_features = None + for i in range(len(ms_features)): + scale_i = -i - 1 + + cur_tail_input = ms_features[-i - 1] + if prev_tail_features is not None: + if prev_tail_features.shape != cur_tail_input.shape: + prev_tail_features = F.interpolate(prev_tail_features, size=cur_tail_input.shape[2:], + mode='bilinear', align_corners=False) + cur_tail_input = torch.cat((cur_tail_input, prev_tail_features), dim=1) + + cur_out, cur_tail_feats = self.tails[scale_i](cur_tail_input, return_last_act=True) + + prev_tail_features = cur_tail_feats + all_outputs.append(cur_out) + + if self.out_cumulative: + all_outputs_cum = [all_outputs[0]] + for i in range(1, len(ms_features)): + cur_out = all_outputs[i] + cur_out_cum = cur_out + F.interpolate(all_outputs_cum[-1], size=cur_out.shape[2:], + mode='bilinear', align_corners=False) + all_outputs_cum.append(cur_out_cum) + all_outputs = all_outputs_cum + + if self.return_only_hr: + return all_outputs[-1] + else: + return all_outputs[::-1] + + +class MultiscaleDiscriminatorSimple(nn.Module): + def __init__(self, ms_impl): + super().__init__() + self.ms_impl = nn.ModuleList(ms_impl) + + @property + def num_scales(self): + return len(self.ms_impl) + + def forward(self, ms_inputs: List[torch.Tensor], smallest_scales_num: Optional[int] = None) \ + -> List[Tuple[torch.Tensor, List[torch.Tensor]]]: + """ + :param ms_inputs: List of inputs of different resolutions from HR to LR + :param smallest_scales_num: int or None, number of smallest scales to take at input + :return: List of pairs (prediction, features) for different resolutions from HR to LR + """ + if smallest_scales_num is None: + assert len(self.ms_impl) == len(ms_inputs), (len(self.ms_impl), len(ms_inputs), smallest_scales_num) + smallest_scales_num = len(self.heads) + else: + assert smallest_scales_num == len(ms_inputs) <= len(self.ms_impl), \ + (len(self.ms_impl), len(ms_inputs), smallest_scales_num) + + return [cur_discr(cur_input) for cur_discr, cur_input in zip(self.ms_impl[-smallest_scales_num:], ms_inputs)] + + +class SingleToMultiScaleInputMixin: + def forward(self, x: torch.Tensor) -> List: + orig_height, orig_width = x.shape[2:] + factors = [2 ** i for i in range(self.num_scales)] + ms_inputs = [F.interpolate(x, size=(orig_height // f, orig_width // f), mode='bilinear', align_corners=False) + for f in factors] + return super().forward(ms_inputs) + + +class GeneratorMultiToSingleOutputMixin: + def forward(self, x): + return super().forward(x)[0] + + +class DiscriminatorMultiToSingleOutputMixin: + def forward(self, x): + out_feat_tuples = super().forward(x) + return out_feat_tuples[0][0], [f for _, flist in out_feat_tuples for f in flist] + + +class DiscriminatorMultiToSingleOutputStackedMixin: + def __init__(self, *args, return_feats_only_levels=None, **kwargs): + super().__init__(*args, **kwargs) + self.return_feats_only_levels = return_feats_only_levels + + def forward(self, x): + out_feat_tuples = super().forward(x) + outs = [out for out, _ in out_feat_tuples] + scaled_outs = [outs[0]] + [F.interpolate(cur_out, size=outs[0].shape[-2:], + mode='bilinear', align_corners=False) + for cur_out in outs[1:]] + out = torch.cat(scaled_outs, dim=1) + if self.return_feats_only_levels is not None: + feat_lists = [out_feat_tuples[i][1] for i in self.return_feats_only_levels] + else: + feat_lists = [flist for _, flist in out_feat_tuples] + feats = [f for flist in feat_lists for f in flist] + return out, feats + + +class MultiscaleDiscrSingleInput(SingleToMultiScaleInputMixin, DiscriminatorMultiToSingleOutputStackedMixin, MultiscaleDiscriminatorSimple): + pass + + +class MultiscaleResNetSingle(GeneratorMultiToSingleOutputMixin, SingleToMultiScaleInputMixin, MultiscaleResNet): + pass diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/pix2pixhd.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/pix2pixhd.py new file mode 100644 index 0000000000000000000000000000000000000000..9692bfd0cf7dae6ff460cc379ca1f908165d6dfa --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/pix2pixhd.py @@ -0,0 +1,669 @@ +# original: https://github.com/NVIDIA/pix2pixHD/blob/master/models/networks.py +import collections +from functools import partial +import functools +import logging +from collections import defaultdict + +import numpy as np +import torch.nn as nn + +from sd_bmab.external.lama.saicinpainting.training.modules.base import BaseDiscriminator, deconv_factory, get_conv_block_ctor, get_norm_layer, get_activation +from sd_bmab.external.lama.saicinpainting.training.modules.ffc import FFCResnetBlock +from sd_bmab.external.lama.saicinpainting.training.modules.multidilated_conv import MultidilatedConv + +class DotDict(defaultdict): + # https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary + """dot.notation access to dictionary attributes""" + __getattr__ = defaultdict.get + __setattr__ = defaultdict.__setitem__ + __delattr__ = defaultdict.__delitem__ + +class Identity(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class ResnetBlock(nn.Module): + def __init__(self, dim, padding_type, norm_layer, activation=nn.ReLU(True), use_dropout=False, conv_kind='default', + dilation=1, in_dim=None, groups=1, second_dilation=None): + super(ResnetBlock, self).__init__() + self.in_dim = in_dim + self.dim = dim + if second_dilation is None: + second_dilation = dilation + self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, activation, use_dropout, + conv_kind=conv_kind, dilation=dilation, in_dim=in_dim, groups=groups, + second_dilation=second_dilation) + + if self.in_dim is not None: + self.input_conv = nn.Conv2d(in_dim, dim, 1) + + self.out_channnels = dim + + def build_conv_block(self, dim, padding_type, norm_layer, activation, use_dropout, conv_kind='default', + dilation=1, in_dim=None, groups=1, second_dilation=1): + conv_layer = get_conv_block_ctor(conv_kind) + + conv_block = [] + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(dilation)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(dilation)] + elif padding_type == 'zero': + p = dilation + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + + if in_dim is None: + in_dim = dim + + conv_block += [conv_layer(in_dim, dim, kernel_size=3, padding=p, dilation=dilation), + norm_layer(dim), + activation] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(second_dilation)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(second_dilation)] + elif padding_type == 'zero': + p = second_dilation + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv_block += [conv_layer(dim, dim, kernel_size=3, padding=p, dilation=second_dilation, groups=groups), + norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + x_before = x + if self.in_dim is not None: + x = self.input_conv(x) + out = x + self.conv_block(x_before) + return out + +class ResnetBlock5x5(nn.Module): + def __init__(self, dim, padding_type, norm_layer, activation=nn.ReLU(True), use_dropout=False, conv_kind='default', + dilation=1, in_dim=None, groups=1, second_dilation=None): + super(ResnetBlock5x5, self).__init__() + self.in_dim = in_dim + self.dim = dim + if second_dilation is None: + second_dilation = dilation + self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, activation, use_dropout, + conv_kind=conv_kind, dilation=dilation, in_dim=in_dim, groups=groups, + second_dilation=second_dilation) + + if self.in_dim is not None: + self.input_conv = nn.Conv2d(in_dim, dim, 1) + + self.out_channnels = dim + + def build_conv_block(self, dim, padding_type, norm_layer, activation, use_dropout, conv_kind='default', + dilation=1, in_dim=None, groups=1, second_dilation=1): + conv_layer = get_conv_block_ctor(conv_kind) + + conv_block = [] + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(dilation * 2)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(dilation * 2)] + elif padding_type == 'zero': + p = dilation * 2 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + + if in_dim is None: + in_dim = dim + + conv_block += [conv_layer(in_dim, dim, kernel_size=5, padding=p, dilation=dilation), + norm_layer(dim), + activation] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(second_dilation * 2)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(second_dilation * 2)] + elif padding_type == 'zero': + p = second_dilation * 2 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv_block += [conv_layer(dim, dim, kernel_size=5, padding=p, dilation=second_dilation, groups=groups), + norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + x_before = x + if self.in_dim is not None: + x = self.input_conv(x) + out = x + self.conv_block(x_before) + return out + + +class MultidilatedResnetBlock(nn.Module): + def __init__(self, dim, padding_type, conv_layer, norm_layer, activation=nn.ReLU(True), use_dropout=False): + super().__init__() + self.conv_block = self.build_conv_block(dim, padding_type, conv_layer, norm_layer, activation, use_dropout) + + def build_conv_block(self, dim, padding_type, conv_layer, norm_layer, activation, use_dropout, dilation=1): + conv_block = [] + conv_block += [conv_layer(dim, dim, kernel_size=3, padding_mode=padding_type), + norm_layer(dim), + activation] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + conv_block += [conv_layer(dim, dim, kernel_size=3, padding_mode=padding_type), + norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + out = x + self.conv_block(x) + return out + + +class MultiDilatedGlobalGenerator(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, + n_blocks=3, norm_layer=nn.BatchNorm2d, + padding_type='reflect', conv_kind='default', + deconv_kind='convtranspose', activation=nn.ReLU(True), + up_norm_layer=nn.BatchNorm2d, affine=None, up_activation=nn.ReLU(True), + add_out_act=True, max_features=1024, multidilation_kwargs={}, + ffc_positions=None, ffc_kwargs={}): + assert (n_blocks >= 0) + super().__init__() + + conv_layer = get_conv_block_ctor(conv_kind) + resnet_conv_layer = functools.partial(get_conv_block_ctor('multidilated'), **multidilation_kwargs) + norm_layer = get_norm_layer(norm_layer) + if affine is not None: + norm_layer = partial(norm_layer, affine=affine) + up_norm_layer = get_norm_layer(up_norm_layer) + if affine is not None: + up_norm_layer = partial(up_norm_layer, affine=affine) + + model = [nn.ReflectionPad2d(3), + conv_layer(input_nc, ngf, kernel_size=7, padding=0), + norm_layer(ngf), + activation] + + identity = Identity() + ### downsample + for i in range(n_downsampling): + mult = 2 ** i + + model += [conv_layer(min(max_features, ngf * mult), + min(max_features, ngf * mult * 2), + kernel_size=3, stride=2, padding=1), + norm_layer(min(max_features, ngf * mult * 2)), + activation] + + mult = 2 ** n_downsampling + feats_num_bottleneck = min(max_features, ngf * mult) + + ### resnet blocks + for i in range(n_blocks): + if ffc_positions is not None and i in ffc_positions: + model += [FFCResnetBlock(feats_num_bottleneck, padding_type, norm_layer, activation_layer=nn.ReLU, + inline=True, **ffc_kwargs)] + model += [MultidilatedResnetBlock(feats_num_bottleneck, padding_type=padding_type, + conv_layer=resnet_conv_layer, activation=activation, + norm_layer=norm_layer)] + + ### upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += deconv_factory(deconv_kind, ngf, mult, up_norm_layer, up_activation, max_features) + model += [nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + if add_out_act: + model.append(get_activation('tanh' if add_out_act is True else add_out_act)) + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + +class ConfigGlobalGenerator(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, + n_blocks=3, norm_layer=nn.BatchNorm2d, + padding_type='reflect', conv_kind='default', + deconv_kind='convtranspose', activation=nn.ReLU(True), + up_norm_layer=nn.BatchNorm2d, affine=None, up_activation=nn.ReLU(True), + add_out_act=True, max_features=1024, + manual_block_spec=[], + resnet_block_kind='multidilatedresnetblock', + resnet_conv_kind='multidilated', + resnet_dilation=1, + multidilation_kwargs={}): + assert (n_blocks >= 0) + super().__init__() + + conv_layer = get_conv_block_ctor(conv_kind) + resnet_conv_layer = functools.partial(get_conv_block_ctor(resnet_conv_kind), **multidilation_kwargs) + norm_layer = get_norm_layer(norm_layer) + if affine is not None: + norm_layer = partial(norm_layer, affine=affine) + up_norm_layer = get_norm_layer(up_norm_layer) + if affine is not None: + up_norm_layer = partial(up_norm_layer, affine=affine) + + model = [nn.ReflectionPad2d(3), + conv_layer(input_nc, ngf, kernel_size=7, padding=0), + norm_layer(ngf), + activation] + + identity = Identity() + + ### downsample + for i in range(n_downsampling): + mult = 2 ** i + model += [conv_layer(min(max_features, ngf * mult), + min(max_features, ngf * mult * 2), + kernel_size=3, stride=2, padding=1), + norm_layer(min(max_features, ngf * mult * 2)), + activation] + + mult = 2 ** n_downsampling + feats_num_bottleneck = min(max_features, ngf * mult) + + if len(manual_block_spec) == 0: + manual_block_spec = [ + DotDict(lambda : None, { + 'n_blocks': n_blocks, + 'use_default': True}) + ] + + ### resnet blocks + for block_spec in manual_block_spec: + def make_and_add_blocks(model, block_spec): + block_spec = DotDict(lambda : None, block_spec) + if not block_spec.use_default: + resnet_conv_layer = functools.partial(get_conv_block_ctor(block_spec.resnet_conv_kind), **block_spec.multidilation_kwargs) + resnet_conv_kind = block_spec.resnet_conv_kind + resnet_block_kind = block_spec.resnet_block_kind + if block_spec.resnet_dilation is not None: + resnet_dilation = block_spec.resnet_dilation + for i in range(block_spec.n_blocks): + if resnet_block_kind == "multidilatedresnetblock": + model += [MultidilatedResnetBlock(feats_num_bottleneck, padding_type=padding_type, + conv_layer=resnet_conv_layer, activation=activation, + norm_layer=norm_layer)] + if resnet_block_kind == "resnetblock": + model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, + conv_kind=resnet_conv_kind)] + if resnet_block_kind == "resnetblock5x5": + model += [ResnetBlock5x5(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, + conv_kind=resnet_conv_kind)] + if resnet_block_kind == "resnetblockdwdil": + model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, + conv_kind=resnet_conv_kind, dilation=resnet_dilation, second_dilation=resnet_dilation)] + make_and_add_blocks(model, block_spec) + + ### upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += deconv_factory(deconv_kind, ngf, mult, up_norm_layer, up_activation, max_features) + model += [nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + if add_out_act: + model.append(get_activation('tanh' if add_out_act is True else add_out_act)) + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + + +def make_dil_blocks(dilated_blocks_n, dilation_block_kind, dilated_block_kwargs): + blocks = [] + for i in range(dilated_blocks_n): + if dilation_block_kind == 'simple': + blocks.append(ResnetBlock(**dilated_block_kwargs, dilation=2 ** (i + 1))) + elif dilation_block_kind == 'multi': + blocks.append(MultidilatedResnetBlock(**dilated_block_kwargs)) + else: + raise ValueError(f'dilation_block_kind could not be "{dilation_block_kind}"') + return blocks + + +class GlobalGenerator(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, + padding_type='reflect', conv_kind='default', activation=nn.ReLU(True), + up_norm_layer=nn.BatchNorm2d, affine=None, + up_activation=nn.ReLU(True), dilated_blocks_n=0, dilated_blocks_n_start=0, + dilated_blocks_n_middle=0, + add_out_act=True, + max_features=1024, is_resblock_depthwise=False, + ffc_positions=None, ffc_kwargs={}, dilation=1, second_dilation=None, + dilation_block_kind='simple', multidilation_kwargs={}): + assert (n_blocks >= 0) + super().__init__() + + conv_layer = get_conv_block_ctor(conv_kind) + norm_layer = get_norm_layer(norm_layer) + if affine is not None: + norm_layer = partial(norm_layer, affine=affine) + up_norm_layer = get_norm_layer(up_norm_layer) + if affine is not None: + up_norm_layer = partial(up_norm_layer, affine=affine) + + if ffc_positions is not None: + ffc_positions = collections.Counter(ffc_positions) + + model = [nn.ReflectionPad2d(3), + conv_layer(input_nc, ngf, kernel_size=7, padding=0), + norm_layer(ngf), + activation] + + identity = Identity() + ### downsample + for i in range(n_downsampling): + mult = 2 ** i + + model += [conv_layer(min(max_features, ngf * mult), + min(max_features, ngf * mult * 2), + kernel_size=3, stride=2, padding=1), + norm_layer(min(max_features, ngf * mult * 2)), + activation] + + mult = 2 ** n_downsampling + feats_num_bottleneck = min(max_features, ngf * mult) + + dilated_block_kwargs = dict(dim=feats_num_bottleneck, padding_type=padding_type, + activation=activation, norm_layer=norm_layer) + if dilation_block_kind == 'simple': + dilated_block_kwargs['conv_kind'] = conv_kind + elif dilation_block_kind == 'multi': + dilated_block_kwargs['conv_layer'] = functools.partial( + get_conv_block_ctor('multidilated'), **multidilation_kwargs) + + # dilated blocks at the start of the bottleneck sausage + if dilated_blocks_n_start is not None and dilated_blocks_n_start > 0: + model += make_dil_blocks(dilated_blocks_n_start, dilation_block_kind, dilated_block_kwargs) + + # resnet blocks + for i in range(n_blocks): + # dilated blocks at the middle of the bottleneck sausage + if i == n_blocks // 2 and dilated_blocks_n_middle is not None and dilated_blocks_n_middle > 0: + model += make_dil_blocks(dilated_blocks_n_middle, dilation_block_kind, dilated_block_kwargs) + + if ffc_positions is not None and i in ffc_positions: + for _ in range(ffc_positions[i]): # same position can occur more than once + model += [FFCResnetBlock(feats_num_bottleneck, padding_type, norm_layer, activation_layer=nn.ReLU, + inline=True, **ffc_kwargs)] + + if is_resblock_depthwise: + resblock_groups = feats_num_bottleneck + else: + resblock_groups = 1 + + model += [ResnetBlock(feats_num_bottleneck, padding_type=padding_type, activation=activation, + norm_layer=norm_layer, conv_kind=conv_kind, groups=resblock_groups, + dilation=dilation, second_dilation=second_dilation)] + + + # dilated blocks at the end of the bottleneck sausage + if dilated_blocks_n is not None and dilated_blocks_n > 0: + model += make_dil_blocks(dilated_blocks_n, dilation_block_kind, dilated_block_kwargs) + + # upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(min(max_features, ngf * mult), + min(max_features, int(ngf * mult / 2)), + kernel_size=3, stride=2, padding=1, output_padding=1), + up_norm_layer(min(max_features, int(ngf * mult / 2))), + up_activation] + model += [nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + if add_out_act: + model.append(get_activation('tanh' if add_out_act is True else add_out_act)) + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + + +class GlobalGeneratorGated(GlobalGenerator): + def __init__(self, *args, **kwargs): + real_kwargs=dict( + conv_kind='gated_bn_relu', + activation=nn.Identity(), + norm_layer=nn.Identity + ) + real_kwargs.update(kwargs) + super().__init__(*args, **real_kwargs) + + +class GlobalGeneratorFromSuperChannels(nn.Module): + def __init__(self, input_nc, output_nc, n_downsampling, n_blocks, super_channels, norm_layer="bn", padding_type='reflect', add_out_act=True): + super().__init__() + self.n_downsampling = n_downsampling + norm_layer = get_norm_layer(norm_layer) + if type(norm_layer) == functools.partial: + use_bias = (norm_layer.func == nn.InstanceNorm2d) + else: + use_bias = (norm_layer == nn.InstanceNorm2d) + + channels = self.convert_super_channels(super_channels) + self.channels = channels + + model = [nn.ReflectionPad2d(3), + nn.Conv2d(input_nc, channels[0], kernel_size=7, padding=0, bias=use_bias), + norm_layer(channels[0]), + nn.ReLU(True)] + + for i in range(n_downsampling): # add downsampling layers + mult = 2 ** i + model += [nn.Conv2d(channels[0+i], channels[1+i], kernel_size=3, stride=2, padding=1, bias=use_bias), + norm_layer(channels[1+i]), + nn.ReLU(True)] + + mult = 2 ** n_downsampling + + n_blocks1 = n_blocks // 3 + n_blocks2 = n_blocks1 + n_blocks3 = n_blocks - n_blocks1 - n_blocks2 + + for i in range(n_blocks1): + c = n_downsampling + dim = channels[c] + model += [ResnetBlock(dim, padding_type=padding_type, norm_layer=norm_layer)] + + for i in range(n_blocks2): + c = n_downsampling+1 + dim = channels[c] + kwargs = {} + if i == 0: + kwargs = {"in_dim": channels[c-1]} + model += [ResnetBlock(dim, padding_type=padding_type, norm_layer=norm_layer, **kwargs)] + + for i in range(n_blocks3): + c = n_downsampling+2 + dim = channels[c] + kwargs = {} + if i == 0: + kwargs = {"in_dim": channels[c-1]} + model += [ResnetBlock(dim, padding_type=padding_type, norm_layer=norm_layer, **kwargs)] + + for i in range(n_downsampling): # add upsampling layers + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(channels[n_downsampling+3+i], + channels[n_downsampling+3+i+1], + kernel_size=3, stride=2, + padding=1, output_padding=1, + bias=use_bias), + norm_layer(channels[n_downsampling+3+i+1]), + nn.ReLU(True)] + model += [nn.ReflectionPad2d(3)] + model += [nn.Conv2d(channels[2*n_downsampling+3], output_nc, kernel_size=7, padding=0)] + + if add_out_act: + model.append(get_activation('tanh' if add_out_act is True else add_out_act)) + self.model = nn.Sequential(*model) + + def convert_super_channels(self, super_channels): + n_downsampling = self.n_downsampling + result = [] + cnt = 0 + + if n_downsampling == 2: + N1 = 10 + elif n_downsampling == 3: + N1 = 13 + else: + raise NotImplementedError + + for i in range(0, N1): + if i in [1,4,7,10]: + channel = super_channels[cnt] * (2 ** cnt) + config = {'channel': channel} + result.append(channel) + logging.info(f"Downsample channels {result[-1]}") + cnt += 1 + + for i in range(3): + for counter, j in enumerate(range(N1 + i * 3, N1 + 3 + i * 3)): + if len(super_channels) == 6: + channel = super_channels[3] * 4 + else: + channel = super_channels[i + 3] * 4 + config = {'channel': channel} + if counter == 0: + result.append(channel) + logging.info(f"Bottleneck channels {result[-1]}") + cnt = 2 + + for i in range(N1+9, N1+21): + if i in [22, 25,28]: + cnt -= 1 + if len(super_channels) == 6: + channel = super_channels[5 - cnt] * (2 ** cnt) + else: + channel = super_channels[7 - cnt] * (2 ** cnt) + result.append(int(channel)) + logging.info(f"Upsample channels {result[-1]}") + return result + + def forward(self, input): + return self.model(input) + + +# Defines the PatchGAN discriminator with the specified arguments. +class NLayerDiscriminator(BaseDiscriminator): + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d,): + super().__init__() + self.n_layers = n_layers + + kw = 4 + padw = int(np.ceil((kw-1.0)/2)) + sequence = [[nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), + nn.LeakyReLU(0.2, True)]] + + nf = ndf + for n in range(1, n_layers): + nf_prev = nf + nf = min(nf * 2, 512) + + cur_model = [] + cur_model += [ + nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=2, padding=padw), + norm_layer(nf), + nn.LeakyReLU(0.2, True) + ] + sequence.append(cur_model) + + nf_prev = nf + nf = min(nf * 2, 512) + + cur_model = [] + cur_model += [ + nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=1, padding=padw), + norm_layer(nf), + nn.LeakyReLU(0.2, True) + ] + sequence.append(cur_model) + + sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]] + + for n in range(len(sequence)): + setattr(self, 'model'+str(n), nn.Sequential(*sequence[n])) + + def get_all_activations(self, x): + res = [x] + for n in range(self.n_layers + 2): + model = getattr(self, 'model' + str(n)) + res.append(model(res[-1])) + return res[1:] + + def forward(self, x): + act = self.get_all_activations(x) + return act[-1], act[:-1] + + +class MultidilatedNLayerDiscriminator(BaseDiscriminator): + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, multidilation_kwargs={}): + super().__init__() + self.n_layers = n_layers + + kw = 4 + padw = int(np.ceil((kw-1.0)/2)) + sequence = [[nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), + nn.LeakyReLU(0.2, True)]] + + nf = ndf + for n in range(1, n_layers): + nf_prev = nf + nf = min(nf * 2, 512) + + cur_model = [] + cur_model += [ + MultidilatedConv(nf_prev, nf, kernel_size=kw, stride=2, padding=[2, 3], **multidilation_kwargs), + norm_layer(nf), + nn.LeakyReLU(0.2, True) + ] + sequence.append(cur_model) + + nf_prev = nf + nf = min(nf * 2, 512) + + cur_model = [] + cur_model += [ + nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=1, padding=padw), + norm_layer(nf), + nn.LeakyReLU(0.2, True) + ] + sequence.append(cur_model) + + sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]] + + for n in range(len(sequence)): + setattr(self, 'model'+str(n), nn.Sequential(*sequence[n])) + + def get_all_activations(self, x): + res = [x] + for n in range(self.n_layers + 2): + model = getattr(self, 'model' + str(n)) + res.append(model(res[-1])) + return res[1:] + + def forward(self, x): + act = self.get_all_activations(x) + return act[-1], act[:-1] + + +class NLayerDiscriminatorAsGen(NLayerDiscriminator): + def forward(self, x): + return super().forward(x)[0] diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/spatial_transform.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/spatial_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..2de024ba08c549605a08b64d096f1f0db7b7722a --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/spatial_transform.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from kornia.geometry.transform import rotate + + +class LearnableSpatialTransformWrapper(nn.Module): + def __init__(self, impl, pad_coef=0.5, angle_init_range=80, train_angle=True): + super().__init__() + self.impl = impl + self.angle = torch.rand(1) * angle_init_range + if train_angle: + self.angle = nn.Parameter(self.angle, requires_grad=True) + self.pad_coef = pad_coef + + def forward(self, x): + if torch.is_tensor(x): + return self.inverse_transform(self.impl(self.transform(x)), x) + elif isinstance(x, tuple): + x_trans = tuple(self.transform(elem) for elem in x) + y_trans = self.impl(x_trans) + return tuple(self.inverse_transform(elem, orig_x) for elem, orig_x in zip(y_trans, x)) + else: + raise ValueError(f'Unexpected input type {type(x)}') + + def transform(self, x): + height, width = x.shape[2:] + pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef) + x_padded = F.pad(x, [pad_w, pad_w, pad_h, pad_h], mode='reflect') + x_padded_rotated = rotate(x_padded, angle=self.angle.to(x_padded)) + return x_padded_rotated + + def inverse_transform(self, y_padded_rotated, orig_x): + height, width = orig_x.shape[2:] + pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef) + + y_padded = rotate(y_padded_rotated, angle=-self.angle.to(y_padded_rotated)) + y_height, y_width = y_padded.shape[2:] + y = y_padded[:, :, pad_h : y_height - pad_h, pad_w : y_width - pad_w] + return y + + +if __name__ == '__main__': + layer = LearnableSpatialTransformWrapper(nn.Identity()) + x = torch.arange(2* 3 * 15 * 15).view(2, 3, 15, 15).float() + y = layer(x) + assert x.shape == y.shape + assert torch.allclose(x[:, :, 1:, 1:][:, :, :-1, :-1], y[:, :, 1:, 1:][:, :, :-1, :-1]) + print('all ok') diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/squeeze_excitation.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/squeeze_excitation.py new file mode 100644 index 0000000000000000000000000000000000000000..d1d902bb30c071acbc0fa919a134c80fed86bd6c --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/modules/squeeze_excitation.py @@ -0,0 +1,20 @@ +import torch.nn as nn + + +class SELayer(nn.Module): + def __init__(self, channel, reduction=16): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction, bias=False), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel, bias=False), + nn.Sigmoid() + ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + res = x * y.expand_as(x) + return res diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__init__.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6af176ab62f6b27f9501eee68fbee785b2ab329e --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__init__.py @@ -0,0 +1,29 @@ +import logging +import torch +from sd_bmab.external.lama.saicinpainting.training.trainers.default import DefaultInpaintingTrainingModule + + +def get_training_model_class(kind): + if kind == 'default': + return DefaultInpaintingTrainingModule + + raise ValueError(f'Unknown trainer module {kind}') + + +def make_training_model(config): + kind = config.training_model.kind + kwargs = dict(config.training_model) + kwargs.pop('kind') + kwargs['use_ddp'] = config.trainer.kwargs.get('accelerator', None) == 'ddp' + + logging.info(f'Make training model {kind}') + + cls = get_training_model_class(kind) + return cls(config, **kwargs) + + +def load_checkpoint(train_config, path, map_location='cuda', strict=True): + model = make_training_model(train_config).generator + state = torch.load(path, map_location=map_location) + model.load_state_dict(state, strict=strict) + return model diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..581ac53b21207aaf53954bb208ec5f6d9614cb71 Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__pycache__/base.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__pycache__/base.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..11d2f742c24128c6f4619c10dec0bc3d5ec3a5de Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__pycache__/base.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__pycache__/default.cpython-310.pyc b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__pycache__/default.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..701670bcce8dabcfa4c040e334112d5e889dfbdc Binary files /dev/null and b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/__pycache__/default.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/base.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..850bd4694abb942add0325c54739e0d1ee915086 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/base.py @@ -0,0 +1,293 @@ +import copy +import logging +from typing import Dict, Tuple + +import pandas as pd +import pytorch_lightning as ptl +import torch +import torch.nn as nn +import torch.nn.functional as F +# from torch.utils.data import DistributedSampler + +# from sd_bmab.external.lama.saicinpainting.evaluation import make_evaluator +# from sd_bmab.external.lama.saicinpainting.training.data.datasets import make_default_train_dataloader, make_default_val_dataloader +# from sd_bmab.external.lama.saicinpainting.training.losses.adversarial import make_discrim_loss +# from sd_bmab.external.lama.saicinpainting.training.losses.perceptual import PerceptualLoss, ResNetPL +from sd_bmab.external.lama.saicinpainting.training.modules import make_generator #, make_discriminator +# from sd_bmab.external.lama.saicinpainting.training.visualizers import make_visualizer +from sd_bmab.external.lama.saicinpainting.utils import add_prefix_to_keys, average_dicts, set_requires_grad, flatten_dict, \ + get_has_ddp_rank + +LOGGER = logging.getLogger(__name__) + + +def make_optimizer(parameters, kind='adamw', **kwargs): + if kind == 'adam': + optimizer_class = torch.optim.Adam + elif kind == 'adamw': + optimizer_class = torch.optim.AdamW + else: + raise ValueError(f'Unknown optimizer kind {kind}') + return optimizer_class(parameters, **kwargs) + + +def update_running_average(result: nn.Module, new_iterate_model: nn.Module, decay=0.999): + with torch.no_grad(): + res_params = dict(result.named_parameters()) + new_params = dict(new_iterate_model.named_parameters()) + + for k in res_params.keys(): + res_params[k].data.mul_(decay).add_(new_params[k].data, alpha=1 - decay) + + +def make_multiscale_noise(base_tensor, scales=6, scale_mode='bilinear'): + batch_size, _, height, width = base_tensor.shape + cur_height, cur_width = height, width + result = [] + align_corners = False if scale_mode in ('bilinear', 'bicubic') else None + for _ in range(scales): + cur_sample = torch.randn(batch_size, 1, cur_height, cur_width, device=base_tensor.device) + cur_sample_scaled = F.interpolate(cur_sample, size=(height, width), mode=scale_mode, align_corners=align_corners) + result.append(cur_sample_scaled) + cur_height //= 2 + cur_width //= 2 + return torch.cat(result, dim=1) + + +class BaseInpaintingTrainingModule(ptl.LightningModule): + def __init__(self, config, use_ddp, *args, predict_only=False, visualize_each_iters=100, + average_generator=False, generator_avg_beta=0.999, average_generator_start_step=30000, + average_generator_period=10, store_discr_outputs_for_vis=False, + **kwargs): + super().__init__(*args, **kwargs) + LOGGER.info('BaseInpaintingTrainingModule init called') + + self.config = config + + self.generator = make_generator(config, **self.config.generator) + self.use_ddp = use_ddp + + if not get_has_ddp_rank(): + LOGGER.info(f'Generator\n{self.generator}') + + # if not predict_only: + # self.save_hyperparameters(self.config) + # self.discriminator = make_discriminator(**self.config.discriminator) + # self.adversarial_loss = make_discrim_loss(**self.config.losses.adversarial) + # self.visualizer = make_visualizer(**self.config.visualizer) + # self.val_evaluator = make_evaluator(**self.config.evaluator) + # self.test_evaluator = make_evaluator(**self.config.evaluator) + # + # if not get_has_ddp_rank(): + # LOGGER.info(f'Discriminator\n{self.discriminator}') + # + # extra_val = self.config.data.get('extra_val', ()) + # if extra_val: + # self.extra_val_titles = list(extra_val) + # self.extra_evaluators = nn.ModuleDict({k: make_evaluator(**self.config.evaluator) + # for k in extra_val}) + # else: + # self.extra_evaluators = {} + # + # self.average_generator = average_generator + # self.generator_avg_beta = generator_avg_beta + # self.average_generator_start_step = average_generator_start_step + # self.average_generator_period = average_generator_period + # self.generator_average = None + # self.last_generator_averaging_step = -1 + # self.store_discr_outputs_for_vis = store_discr_outputs_for_vis + # + # if self.config.losses.get("l1", {"weight_known": 0})['weight_known'] > 0: + # self.loss_l1 = nn.L1Loss(reduction='none') + # + # if self.config.losses.get("mse", {"weight": 0})['weight'] > 0: + # self.loss_mse = nn.MSELoss(reduction='none') + # + # if self.config.losses.perceptual.weight > 0: + # self.loss_pl = PerceptualLoss() + # + # # if self.config.losses.get("resnet_pl", {"weight": 0})['weight'] > 0: + # # self.loss_resnet_pl = ResNetPL(**self.config.losses.resnet_pl) + # # else: + # # self.loss_resnet_pl = None + # + # self.loss_resnet_pl = None + + self.visualize_each_iters = visualize_each_iters + LOGGER.info('BaseInpaintingTrainingModule init done') + + def configure_optimizers(self): + discriminator_params = list(self.discriminator.parameters()) + return [ + dict(optimizer=make_optimizer(self.generator.parameters(), **self.config.optimizers.generator)), + dict(optimizer=make_optimizer(discriminator_params, **self.config.optimizers.discriminator)), + ] + + def train_dataloader(self): + kwargs = dict(self.config.data.train) + if self.use_ddp: + kwargs['ddp_kwargs'] = dict(num_replicas=self.trainer.num_nodes * self.trainer.num_processes, + rank=self.trainer.global_rank, + shuffle=True) + dataloader = make_default_train_dataloader(**self.config.data.train) + return dataloader + + def val_dataloader(self): + res = [make_default_val_dataloader(**self.config.data.val)] + + if self.config.data.visual_test is not None: + res = res + [make_default_val_dataloader(**self.config.data.visual_test)] + else: + res = res + res + + extra_val = self.config.data.get('extra_val', ()) + if extra_val: + res += [make_default_val_dataloader(**extra_val[k]) for k in self.extra_val_titles] + + return res + + def training_step(self, batch, batch_idx, optimizer_idx=None): + self._is_training_step = True + return self._do_step(batch, batch_idx, mode='train', optimizer_idx=optimizer_idx) + + def validation_step(self, batch, batch_idx, dataloader_idx): + extra_val_key = None + if dataloader_idx == 0: + mode = 'val' + elif dataloader_idx == 1: + mode = 'test' + else: + mode = 'extra_val' + extra_val_key = self.extra_val_titles[dataloader_idx - 2] + self._is_training_step = False + return self._do_step(batch, batch_idx, mode=mode, extra_val_key=extra_val_key) + + def training_step_end(self, batch_parts_outputs): + if self.training and self.average_generator \ + and self.global_step >= self.average_generator_start_step \ + and self.global_step >= self.last_generator_averaging_step + self.average_generator_period: + if self.generator_average is None: + self.generator_average = copy.deepcopy(self.generator) + else: + update_running_average(self.generator_average, self.generator, decay=self.generator_avg_beta) + self.last_generator_averaging_step = self.global_step + + full_loss = (batch_parts_outputs['loss'].mean() + if torch.is_tensor(batch_parts_outputs['loss']) # loss is not tensor when no discriminator used + else torch.tensor(batch_parts_outputs['loss']).float().requires_grad_(True)) + log_info = {k: v.mean() for k, v in batch_parts_outputs['log_info'].items()} + self.log_dict(log_info, on_step=True, on_epoch=False) + return full_loss + + def validation_epoch_end(self, outputs): + outputs = [step_out for out_group in outputs for step_out in out_group] + averaged_logs = average_dicts(step_out['log_info'] for step_out in outputs) + self.log_dict({k: v.mean() for k, v in averaged_logs.items()}) + + pd.set_option('display.max_columns', 500) + pd.set_option('display.width', 1000) + + # standard validation + val_evaluator_states = [s['val_evaluator_state'] for s in outputs if 'val_evaluator_state' in s] + val_evaluator_res = self.val_evaluator.evaluation_end(states=val_evaluator_states) + val_evaluator_res_df = pd.DataFrame(val_evaluator_res).stack(1).unstack(0) + val_evaluator_res_df.dropna(axis=1, how='all', inplace=True) + LOGGER.info(f'Validation metrics after epoch #{self.current_epoch}, ' + f'total {self.global_step} iterations:\n{val_evaluator_res_df}') + + for k, v in flatten_dict(val_evaluator_res).items(): + self.log(f'val_{k}', v) + + # standard visual test + test_evaluator_states = [s['test_evaluator_state'] for s in outputs + if 'test_evaluator_state' in s] + test_evaluator_res = self.test_evaluator.evaluation_end(states=test_evaluator_states) + test_evaluator_res_df = pd.DataFrame(test_evaluator_res).stack(1).unstack(0) + test_evaluator_res_df.dropna(axis=1, how='all', inplace=True) + LOGGER.info(f'Test metrics after epoch #{self.current_epoch}, ' + f'total {self.global_step} iterations:\n{test_evaluator_res_df}') + + for k, v in flatten_dict(test_evaluator_res).items(): + self.log(f'test_{k}', v) + + # extra validations + if self.extra_evaluators: + for cur_eval_title, cur_evaluator in self.extra_evaluators.items(): + cur_state_key = f'extra_val_{cur_eval_title}_evaluator_state' + cur_states = [s[cur_state_key] for s in outputs if cur_state_key in s] + cur_evaluator_res = cur_evaluator.evaluation_end(states=cur_states) + cur_evaluator_res_df = pd.DataFrame(cur_evaluator_res).stack(1).unstack(0) + cur_evaluator_res_df.dropna(axis=1, how='all', inplace=True) + LOGGER.info(f'Extra val {cur_eval_title} metrics after epoch #{self.current_epoch}, ' + f'total {self.global_step} iterations:\n{cur_evaluator_res_df}') + for k, v in flatten_dict(cur_evaluator_res).items(): + self.log(f'extra_val_{cur_eval_title}_{k}', v) + + def _do_step(self, batch, batch_idx, mode='train', optimizer_idx=None, extra_val_key=None): + if optimizer_idx == 0: # step for generator + set_requires_grad(self.generator, True) + set_requires_grad(self.discriminator, False) + elif optimizer_idx == 1: # step for discriminator + set_requires_grad(self.generator, False) + set_requires_grad(self.discriminator, True) + + batch = self(batch) + + total_loss = 0 + metrics = {} + + if optimizer_idx is None or optimizer_idx == 0: # step for generator + total_loss, metrics = self.generator_loss(batch) + + elif optimizer_idx is None or optimizer_idx == 1: # step for discriminator + if self.config.losses.adversarial.weight > 0: + total_loss, metrics = self.discriminator_loss(batch) + + if self.get_ddp_rank() in (None, 0) and (batch_idx % self.visualize_each_iters == 0 or mode == 'test'): + if self.config.losses.adversarial.weight > 0: + if self.store_discr_outputs_for_vis: + with torch.no_grad(): + self.store_discr_outputs(batch) + vis_suffix = f'_{mode}' + if mode == 'extra_val': + vis_suffix += f'_{extra_val_key}' + self.visualizer(self.current_epoch, batch_idx, batch, suffix=vis_suffix) + + metrics_prefix = f'{mode}_' + if mode == 'extra_val': + metrics_prefix += f'{extra_val_key}_' + result = dict(loss=total_loss, log_info=add_prefix_to_keys(metrics, metrics_prefix)) + if mode == 'val': + result['val_evaluator_state'] = self.val_evaluator.process_batch(batch) + elif mode == 'test': + result['test_evaluator_state'] = self.test_evaluator.process_batch(batch) + elif mode == 'extra_val': + result[f'extra_val_{extra_val_key}_evaluator_state'] = self.extra_evaluators[extra_val_key].process_batch(batch) + + return result + + def get_current_generator(self, no_average=False): + if not no_average and not self.training and self.average_generator and self.generator_average is not None: + return self.generator_average + return self.generator + + def forward(self, batch: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: + """Pass data through generator and obtain at leas 'predicted_image' and 'inpainted' keys""" + raise NotImplementedError() + + def generator_loss(self, batch) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + raise NotImplementedError() + + def discriminator_loss(self, batch) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + raise NotImplementedError() + + def store_discr_outputs(self, batch): + out_size = batch['image'].shape[2:] + discr_real_out, _ = self.discriminator(batch['image']) + discr_fake_out, _ = self.discriminator(batch['predicted_image']) + batch['discr_output_real'] = F.interpolate(discr_real_out, size=out_size, mode='nearest') + batch['discr_output_fake'] = F.interpolate(discr_fake_out, size=out_size, mode='nearest') + batch['discr_output_diff'] = batch['discr_output_real'] - batch['discr_output_fake'] + + def get_ddp_rank(self): + return self.trainer.global_rank if (self.trainer.num_nodes * self.trainer.num_processes) > 1 else None diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/default.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/default.py new file mode 100644 index 0000000000000000000000000000000000000000..063a1ddbec3f45dd27be7b0fd6025a1e3d6830ea --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/trainers/default.py @@ -0,0 +1,175 @@ +import logging + +import torch +import torch.nn.functional as F +from omegaconf import OmegaConf + +# from sd_bmab.external.lama.saicinpainting.training.data.datasets import make_constant_area_crop_params +from sd_bmab.external.lama.saicinpainting.training.losses.distance_weighting import make_mask_distance_weighter +from sd_bmab.external.lama.saicinpainting.training.losses.feature_matching import feature_matching_loss, masked_l1_loss +# from sd_bmab.external.lama.saicinpainting.training.modules.fake_fakes import FakeFakesGenerator +from sd_bmab.external.lama.saicinpainting.training.trainers.base import BaseInpaintingTrainingModule, make_multiscale_noise +from sd_bmab.external.lama.saicinpainting.utils import add_prefix_to_keys, get_ramp + +LOGGER = logging.getLogger(__name__) + + +def make_constant_area_crop_batch(batch, **kwargs): + crop_y, crop_x, crop_height, crop_width = make_constant_area_crop_params(img_height=batch['image'].shape[2], + img_width=batch['image'].shape[3], + **kwargs) + batch['image'] = batch['image'][:, :, crop_y : crop_y + crop_height, crop_x : crop_x + crop_width] + batch['mask'] = batch['mask'][:, :, crop_y: crop_y + crop_height, crop_x: crop_x + crop_width] + return batch + + +class DefaultInpaintingTrainingModule(BaseInpaintingTrainingModule): + def __init__(self, *args, concat_mask=True, rescale_scheduler_kwargs=None, image_to_discriminator='predicted_image', + add_noise_kwargs=None, noise_fill_hole=False, const_area_crop_kwargs=None, + distance_weighter_kwargs=None, distance_weighted_mask_for_discr=False, + fake_fakes_proba=0, fake_fakes_generator_kwargs=None, + **kwargs): + super().__init__(*args, **kwargs) + self.concat_mask = concat_mask + self.rescale_size_getter = get_ramp(**rescale_scheduler_kwargs) if rescale_scheduler_kwargs is not None else None + self.image_to_discriminator = image_to_discriminator + self.add_noise_kwargs = add_noise_kwargs + self.noise_fill_hole = noise_fill_hole + self.const_area_crop_kwargs = const_area_crop_kwargs + self.refine_mask_for_losses = make_mask_distance_weighter(**distance_weighter_kwargs) \ + if distance_weighter_kwargs is not None else None + self.distance_weighted_mask_for_discr = distance_weighted_mask_for_discr + + self.fake_fakes_proba = fake_fakes_proba + if self.fake_fakes_proba > 1e-3: + self.fake_fakes_gen = FakeFakesGenerator(**(fake_fakes_generator_kwargs or {})) + + def forward(self, batch): + if self.training and self.rescale_size_getter is not None: + cur_size = self.rescale_size_getter(self.global_step) + batch['image'] = F.interpolate(batch['image'], size=cur_size, mode='bilinear', align_corners=False) + batch['mask'] = F.interpolate(batch['mask'], size=cur_size, mode='nearest') + + if self.training and self.const_area_crop_kwargs is not None: + batch = make_constant_area_crop_batch(batch, **self.const_area_crop_kwargs) + + img = batch['image'] + mask = batch['mask'] + + masked_img = img * (1 - mask) + + if self.add_noise_kwargs is not None: + noise = make_multiscale_noise(masked_img, **self.add_noise_kwargs) + if self.noise_fill_hole: + masked_img = masked_img + mask * noise[:, :masked_img.shape[1]] + masked_img = torch.cat([masked_img, noise], dim=1) + + if self.concat_mask: + masked_img = torch.cat([masked_img, mask], dim=1) + + batch['predicted_image'] = self.generator(masked_img) + batch['inpainted'] = mask * batch['predicted_image'] + (1 - mask) * batch['image'] + + if self.fake_fakes_proba > 1e-3: + if self.training and torch.rand(1).item() < self.fake_fakes_proba: + batch['fake_fakes'], batch['fake_fakes_masks'] = self.fake_fakes_gen(img, mask) + batch['use_fake_fakes'] = True + else: + batch['fake_fakes'] = torch.zeros_like(img) + batch['fake_fakes_masks'] = torch.zeros_like(mask) + batch['use_fake_fakes'] = False + + batch['mask_for_losses'] = self.refine_mask_for_losses(img, batch['predicted_image'], mask) \ + if self.refine_mask_for_losses is not None and self.training \ + else mask + + return batch + + def generator_loss(self, batch): + img = batch['image'] + predicted_img = batch[self.image_to_discriminator] + original_mask = batch['mask'] + supervised_mask = batch['mask_for_losses'] + + # L1 + l1_value = masked_l1_loss(predicted_img, img, supervised_mask, + self.config.losses.l1.weight_known, + self.config.losses.l1.weight_missing) + + total_loss = l1_value + metrics = dict(gen_l1=l1_value) + + # vgg-based perceptual loss + if self.config.losses.perceptual.weight > 0: + pl_value = self.loss_pl(predicted_img, img, mask=supervised_mask).sum() * self.config.losses.perceptual.weight + total_loss = total_loss + pl_value + metrics['gen_pl'] = pl_value + + # discriminator + # adversarial_loss calls backward by itself + mask_for_discr = supervised_mask if self.distance_weighted_mask_for_discr else original_mask + self.adversarial_loss.pre_generator_step(real_batch=img, fake_batch=predicted_img, + generator=self.generator, discriminator=self.discriminator) + discr_real_pred, discr_real_features = self.discriminator(img) + discr_fake_pred, discr_fake_features = self.discriminator(predicted_img) + adv_gen_loss, adv_metrics = self.adversarial_loss.generator_loss(real_batch=img, + fake_batch=predicted_img, + discr_real_pred=discr_real_pred, + discr_fake_pred=discr_fake_pred, + mask=mask_for_discr) + total_loss = total_loss + adv_gen_loss + metrics['gen_adv'] = adv_gen_loss + metrics.update(add_prefix_to_keys(adv_metrics, 'adv_')) + + # feature matching + if self.config.losses.feature_matching.weight > 0: + need_mask_in_fm = OmegaConf.to_container(self.config.losses.feature_matching).get('pass_mask', False) + mask_for_fm = supervised_mask if need_mask_in_fm else None + fm_value = feature_matching_loss(discr_fake_features, discr_real_features, + mask=mask_for_fm) * self.config.losses.feature_matching.weight + total_loss = total_loss + fm_value + metrics['gen_fm'] = fm_value + + if self.loss_resnet_pl is not None: + resnet_pl_value = self.loss_resnet_pl(predicted_img, img) + total_loss = total_loss + resnet_pl_value + metrics['gen_resnet_pl'] = resnet_pl_value + + return total_loss, metrics + + def discriminator_loss(self, batch): + total_loss = 0 + metrics = {} + + predicted_img = batch[self.image_to_discriminator].detach() + self.adversarial_loss.pre_discriminator_step(real_batch=batch['image'], fake_batch=predicted_img, + generator=self.generator, discriminator=self.discriminator) + discr_real_pred, discr_real_features = self.discriminator(batch['image']) + discr_fake_pred, discr_fake_features = self.discriminator(predicted_img) + adv_discr_loss, adv_metrics = self.adversarial_loss.discriminator_loss(real_batch=batch['image'], + fake_batch=predicted_img, + discr_real_pred=discr_real_pred, + discr_fake_pred=discr_fake_pred, + mask=batch['mask']) + total_loss = total_loss + adv_discr_loss + metrics['discr_adv'] = adv_discr_loss + metrics.update(add_prefix_to_keys(adv_metrics, 'adv_')) + + + if batch.get('use_fake_fakes', False): + fake_fakes = batch['fake_fakes'] + self.adversarial_loss.pre_discriminator_step(real_batch=batch['image'], fake_batch=fake_fakes, + generator=self.generator, discriminator=self.discriminator) + discr_fake_fakes_pred, _ = self.discriminator(fake_fakes) + fake_fakes_adv_discr_loss, fake_fakes_adv_metrics = self.adversarial_loss.discriminator_loss( + real_batch=batch['image'], + fake_batch=fake_fakes, + discr_real_pred=discr_real_pred, + discr_fake_pred=discr_fake_fakes_pred, + mask=batch['mask'] + ) + total_loss = total_loss + fake_fakes_adv_discr_loss + metrics['discr_adv_fake_fakes'] = fake_fakes_adv_discr_loss + metrics.update(add_prefix_to_keys(fake_fakes_adv_metrics, 'adv_')) + + return total_loss, metrics diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/__init__.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..18590b0dcf3a8a935b86468bfe55a3feb4f1c4a6 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/__init__.py @@ -0,0 +1,15 @@ +import logging + +from sd_bmab.external.lama.saicinpainting.training.visualizers.directory import DirectoryVisualizer +from sd_bmab.external.lama.saicinpainting.training.visualizers.noop import NoopVisualizer + + +def make_visualizer(kind, **kwargs): + logging.info(f'Make visualizer {kind}') + + if kind == 'directory': + return DirectoryVisualizer(**kwargs) + if kind == 'noop': + return NoopVisualizer() + + raise ValueError(f'Unknown visualizer kind {kind}') diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/base.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..675f01682ddf5e31b6cc341735378c6f3b242e49 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/base.py @@ -0,0 +1,73 @@ +import abc +from typing import Dict, List + +import numpy as np +import torch +from skimage import color +from skimage.segmentation import mark_boundaries + +from . import colors + +COLORS, _ = colors.generate_colors(151) # 151 - max classes for semantic segmentation + + +class BaseVisualizer: + @abc.abstractmethod + def __call__(self, epoch_i, batch_i, batch, suffix='', rank=None): + """ + Take a batch, make an image from it and visualize + """ + raise NotImplementedError() + + +def visualize_mask_and_images(images_dict: Dict[str, np.ndarray], keys: List[str], + last_without_mask=True, rescale_keys=None, mask_only_first=None, + black_mask=False) -> np.ndarray: + mask = images_dict['mask'] > 0.5 + result = [] + for i, k in enumerate(keys): + img = images_dict[k] + img = np.transpose(img, (1, 2, 0)) + + if rescale_keys is not None and k in rescale_keys: + img = img - img.min() + img /= img.max() + 1e-5 + if len(img.shape) == 2: + img = np.expand_dims(img, 2) + + if img.shape[2] == 1: + img = np.repeat(img, 3, axis=2) + elif (img.shape[2] > 3): + img_classes = img.argmax(2) + img = color.label2rgb(img_classes, colors=COLORS) + + if mask_only_first: + need_mark_boundaries = i == 0 + else: + need_mark_boundaries = i < len(keys) - 1 or not last_without_mask + + if need_mark_boundaries: + if black_mask: + img = img * (1 - mask[0][..., None]) + img = mark_boundaries(img, + mask[0], + color=(1., 0., 0.), + outline_color=(1., 1., 1.), + mode='thick') + result.append(img) + return np.concatenate(result, axis=1) + + +def visualize_mask_and_images_batch(batch: Dict[str, torch.Tensor], keys: List[str], max_items=10, + last_without_mask=True, rescale_keys=None) -> np.ndarray: + batch = {k: tens.detach().cpu().numpy() for k, tens in batch.items() + if k in keys or k == 'mask'} + + batch_size = next(iter(batch.values())).shape[0] + items_to_vis = min(batch_size, max_items) + result = [] + for i in range(items_to_vis): + cur_dct = {k: tens[i] for k, tens in batch.items()} + result.append(visualize_mask_and_images(cur_dct, keys, last_without_mask=last_without_mask, + rescale_keys=rescale_keys)) + return np.concatenate(result, axis=0) diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/colors.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/colors.py new file mode 100644 index 0000000000000000000000000000000000000000..9e9e39182c58cb06a1c5e97a7e6c497cc3388ebe --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/colors.py @@ -0,0 +1,76 @@ +import random +import colorsys + +import numpy as np +import matplotlib +matplotlib.use('agg') +import matplotlib.pyplot as plt +from matplotlib.colors import LinearSegmentedColormap + + +def generate_colors(nlabels, type='bright', first_color_black=False, last_color_black=True, verbose=False): + # https://stackoverflow.com/questions/14720331/how-to-generate-random-colors-in-matplotlib + """ + Creates a random colormap to be used together with matplotlib. Useful for segmentation tasks + :param nlabels: Number of labels (size of colormap) + :param type: 'bright' for strong colors, 'soft' for pastel colors + :param first_color_black: Option to use first color as black, True or False + :param last_color_black: Option to use last color as black, True or False + :param verbose: Prints the number of labels and shows the colormap. True or False + :return: colormap for matplotlib + """ + if type not in ('bright', 'soft'): + print ('Please choose "bright" or "soft" for type') + return + + if verbose: + print('Number of labels: ' + str(nlabels)) + + # Generate color map for bright colors, based on hsv + if type == 'bright': + randHSVcolors = [(np.random.uniform(low=0.0, high=1), + np.random.uniform(low=0.2, high=1), + np.random.uniform(low=0.9, high=1)) for i in range(nlabels)] + + # Convert HSV list to RGB + randRGBcolors = [] + for HSVcolor in randHSVcolors: + randRGBcolors.append(colorsys.hsv_to_rgb(HSVcolor[0], HSVcolor[1], HSVcolor[2])) + + if first_color_black: + randRGBcolors[0] = [0, 0, 0] + + if last_color_black: + randRGBcolors[-1] = [0, 0, 0] + + random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels) + + # Generate soft pastel colors, by limiting the RGB spectrum + if type == 'soft': + low = 0.6 + high = 0.95 + randRGBcolors = [(np.random.uniform(low=low, high=high), + np.random.uniform(low=low, high=high), + np.random.uniform(low=low, high=high)) for i in range(nlabels)] + + if first_color_black: + randRGBcolors[0] = [0, 0, 0] + + if last_color_black: + randRGBcolors[-1] = [0, 0, 0] + random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels) + + # Display colorbar + if verbose: + from matplotlib import colors, colorbar + from matplotlib import pyplot as plt + fig, ax = plt.subplots(1, 1, figsize=(15, 0.5)) + + bounds = np.linspace(0, nlabels, nlabels + 1) + norm = colors.BoundaryNorm(bounds, nlabels) + + cb = colorbar.ColorbarBase(ax, cmap=random_colormap, norm=norm, spacing='proportional', ticks=None, + boundaries=bounds, format='%1i', orientation=u'horizontal') + + return randRGBcolors, random_colormap + diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/directory.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/directory.py new file mode 100644 index 0000000000000000000000000000000000000000..a93e01d47ee24d7b4a649473b48f6119014b3ba8 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/directory.py @@ -0,0 +1,36 @@ +import os + +import cv2 +import numpy as np + +from sd_bmab.external.lama.saicinpainting.training.visualizers.base import BaseVisualizer, visualize_mask_and_images_batch +from sd_bmab.external.lama.saicinpainting.utils import check_and_warn_input_range + + +class DirectoryVisualizer(BaseVisualizer): + DEFAULT_KEY_ORDER = 'image predicted_image inpainted'.split(' ') + + def __init__(self, outdir, key_order=DEFAULT_KEY_ORDER, max_items_in_batch=10, + last_without_mask=True, rescale_keys=None): + self.outdir = outdir + os.makedirs(self.outdir, exist_ok=True) + self.key_order = key_order + self.max_items_in_batch = max_items_in_batch + self.last_without_mask = last_without_mask + self.rescale_keys = rescale_keys + + def __call__(self, epoch_i, batch_i, batch, suffix='', rank=None): + check_and_warn_input_range(batch['image'], 0, 1, 'DirectoryVisualizer target image') + vis_img = visualize_mask_and_images_batch(batch, self.key_order, max_items=self.max_items_in_batch, + last_without_mask=self.last_without_mask, + rescale_keys=self.rescale_keys) + + vis_img = np.clip(vis_img * 255, 0, 255).astype('uint8') + + curoutdir = os.path.join(self.outdir, f'epoch{epoch_i:04d}{suffix}') + os.makedirs(curoutdir, exist_ok=True) + rank_suffix = f'_r{rank}' if rank is not None else '' + out_fname = os.path.join(curoutdir, f'batch{batch_i:07d}{rank_suffix}.jpg') + + vis_img = cv2.cvtColor(vis_img, cv2.COLOR_RGB2BGR) + cv2.imwrite(out_fname, vis_img) diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/noop.py b/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/noop.py new file mode 100644 index 0000000000000000000000000000000000000000..cae2eaba62abb3e8f8a30db7ada9cbd494b7ada1 --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/training/visualizers/noop.py @@ -0,0 +1,9 @@ +from sd_bmab.external.lama.saicinpainting.training.visualizers.base import BaseVisualizer + + +class NoopVisualizer(BaseVisualizer): + def __init__(self, *args, **kwargs): + pass + + def __call__(self, epoch_i, batch_i, batch, suffix='', rank=None): + pass diff --git a/3-bmab/sd_bmab/external/lama/saicinpainting/utils.py b/3-bmab/sd_bmab/external/lama/saicinpainting/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f36f5130d4c105b63689642da5321ce2e1863a9f --- /dev/null +++ b/3-bmab/sd_bmab/external/lama/saicinpainting/utils.py @@ -0,0 +1,174 @@ +import bisect +import functools +import logging +import numbers +import os +import signal +import sys +import traceback +import warnings + +import torch +from pytorch_lightning import seed_everything + +LOGGER = logging.getLogger(__name__) + + +def check_and_warn_input_range(tensor, min_value, max_value, name): + actual_min = tensor.min() + actual_max = tensor.max() + if actual_min < min_value or actual_max > max_value: + warnings.warn(f"{name} must be in {min_value}..{max_value} range, but it ranges {actual_min}..{actual_max}") + + +def sum_dict_with_prefix(target, cur_dict, prefix, default=0): + for k, v in cur_dict.items(): + target_key = prefix + k + target[target_key] = target.get(target_key, default) + v + + +def average_dicts(dict_list): + result = {} + norm = 1e-3 + for dct in dict_list: + sum_dict_with_prefix(result, dct, '') + norm += 1 + for k in list(result): + result[k] /= norm + return result + + +def add_prefix_to_keys(dct, prefix): + return {prefix + k: v for k, v in dct.items()} + + +def set_requires_grad(module, value): + for param in module.parameters(): + param.requires_grad = value + + +def flatten_dict(dct): + result = {} + for k, v in dct.items(): + if isinstance(k, tuple): + k = '_'.join(k) + if isinstance(v, dict): + for sub_k, sub_v in flatten_dict(v).items(): + result[f'{k}_{sub_k}'] = sub_v + else: + result[k] = v + return result + + +class LinearRamp: + def __init__(self, start_value=0, end_value=1, start_iter=-1, end_iter=0): + self.start_value = start_value + self.end_value = end_value + self.start_iter = start_iter + self.end_iter = end_iter + + def __call__(self, i): + if i < self.start_iter: + return self.start_value + if i >= self.end_iter: + return self.end_value + part = (i - self.start_iter) / (self.end_iter - self.start_iter) + return self.start_value * (1 - part) + self.end_value * part + + +class LadderRamp: + def __init__(self, start_iters, values): + self.start_iters = start_iters + self.values = values + assert len(values) == len(start_iters) + 1, (len(values), len(start_iters)) + + def __call__(self, i): + segment_i = bisect.bisect_right(self.start_iters, i) + return self.values[segment_i] + + +def get_ramp(kind='ladder', **kwargs): + if kind == 'linear': + return LinearRamp(**kwargs) + if kind == 'ladder': + return LadderRamp(**kwargs) + raise ValueError(f'Unexpected ramp kind: {kind}') + + +def print_traceback_handler(sig, frame): + LOGGER.warning(f'Received signal {sig}') + bt = ''.join(traceback.format_stack()) + LOGGER.warning(f'Requested stack trace:\n{bt}') + + +def register_debug_signal_handlers(sig=None, handler=print_traceback_handler): + LOGGER.warning(f'Setting signal {sig} handler {handler}') + signal.signal(sig, handler) + + +def handle_deterministic_config(config): + seed = dict(config).get('seed', None) + if seed is None: + return False + + seed_everything(seed) + return True + + +def get_shape(t): + if torch.is_tensor(t): + return tuple(t.shape) + elif isinstance(t, dict): + return {n: get_shape(q) for n, q in t.items()} + elif isinstance(t, (list, tuple)): + return [get_shape(q) for q in t] + elif isinstance(t, numbers.Number): + return type(t) + else: + raise ValueError('unexpected type {}'.format(type(t))) + + +def get_has_ddp_rank(): + master_port = os.environ.get('MASTER_PORT', None) + node_rank = os.environ.get('NODE_RANK', None) + local_rank = os.environ.get('LOCAL_RANK', None) + world_size = os.environ.get('WORLD_SIZE', None) + has_rank = master_port is not None or node_rank is not None or local_rank is not None or world_size is not None + return has_rank + + +def handle_ddp_subprocess(): + def main_decorator(main_func): + @functools.wraps(main_func) + def new_main(*args, **kwargs): + # Trainer sets MASTER_PORT, NODE_RANK, LOCAL_RANK, WORLD_SIZE + parent_cwd = os.environ.get('TRAINING_PARENT_WORK_DIR', None) + has_parent = parent_cwd is not None + has_rank = get_has_ddp_rank() + assert has_parent == has_rank, f'Inconsistent state: has_parent={has_parent}, has_rank={has_rank}' + + if has_parent: + # we are in the worker + sys.argv.extend([ + f'hydra.run.dir={parent_cwd}', + # 'hydra/hydra_logging=disabled', + # 'hydra/job_logging=disabled' + ]) + # do nothing if this is a top-level process + # TRAINING_PARENT_WORK_DIR is set in handle_ddp_parent_process after hydra initialization + + main_func(*args, **kwargs) + return new_main + return main_decorator + + +def handle_ddp_parent_process(): + parent_cwd = os.environ.get('TRAINING_PARENT_WORK_DIR', None) + has_parent = parent_cwd is not None + has_rank = get_has_ddp_rank() + assert has_parent == has_rank, f'Inconsistent state: has_parent={has_parent}, has_rank={has_rank}' + + if parent_cwd is None: + os.environ['TRAINING_PARENT_WORK_DIR'] = os.getcwd() + + return has_parent diff --git a/3-bmab/sd_bmab/masking/__init__.py b/3-bmab/sd_bmab/masking/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1090c59c83bdb464bd6285473d09fcc0c2a720ba --- /dev/null +++ b/3-bmab/sd_bmab/masking/__init__.py @@ -0,0 +1,25 @@ +from modules import shared + +from sd_bmab.base import MaskBase +from sd_bmab.masking.sam import SamPredictVitB +from sd_bmab.masking.sam_hq import SamHqPredictVitB +from sd_bmab.util import debug_print + + +masks = [SamPredictVitB(), SamHqPredictVitB()] +dmasks = {s.name: s for s in masks} + + +def get_mask_generator(name='None') -> MaskBase: + model = dmasks.get(name, dmasks[shared.opts.bmab_mask_model]) + debug_print(f'Use mask model {model.name}') + return model + + +def list_mask_names(): + return [s.name for s in masks] + + +def release(): + SamPredictVitB.release() + SamHqPredictVitB.release() diff --git a/3-bmab/sd_bmab/masking/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/masking/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f85e366bd638b305515a50cba88f5c5a10274908 Binary files /dev/null and b/3-bmab/sd_bmab/masking/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/masking/__pycache__/sam.cpython-310.pyc b/3-bmab/sd_bmab/masking/__pycache__/sam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8fee70d98608a48250980b1d5f783d2659f3d8a1 Binary files /dev/null and b/3-bmab/sd_bmab/masking/__pycache__/sam.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/masking/__pycache__/sam_hq.cpython-310.pyc b/3-bmab/sd_bmab/masking/__pycache__/sam_hq.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b94f673efa325da825002ee85c59e3bdc7e73b51 Binary files /dev/null and b/3-bmab/sd_bmab/masking/__pycache__/sam_hq.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/masking/sam.py b/3-bmab/sd_bmab/masking/sam.py new file mode 100644 index 0000000000000000000000000000000000000000..20cea6c8b60c324ce49e02788f9790fa9ef3ed20 --- /dev/null +++ b/3-bmab/sd_bmab/masking/sam.py @@ -0,0 +1,122 @@ +from sd_bmab.base import MaskBase +import cv2 +import numpy as np + +import torch + +from PIL import Image +from modules.safe import unsafe_torch_load, load +from modules.devices import device, torch_gc + +from segment_anything import SamPredictor +from segment_anything import sam_model_registry + +from sd_bmab import util + + +sam_model = None + + +class SamPredict(MaskBase): + + def __init__(self) -> None: + super().__init__() + + @property + def name(self): + return f'sam_{self.type}' + + @property + def type(self): + pass + + @property + def file(self): + pass + + @classmethod + def init(cls, model_type, filename, *args, **kwargs): + checkpoint_file = util.lazy_loader(filename) + global sam_model + if sam_model is None: + torch.load = unsafe_torch_load + sam_model = sam_model_registry[model_type](checkpoint=checkpoint_file) + sam_model.to(device=device) + torch.load = load + return sam_model + + def load(self): + return SamPredict.init(self.type, self.file) + + def predict(self, image, boxes): + sam = self.load() + mask_predictor = SamPredictor(sam) + + numpy_image = np.array(image) + opencv_image = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR) + mask_predictor.set_image(opencv_image) + + result = Image.new('L', image.size, 0) + for box in boxes: + x1, y1, x2, y2 = box + + box = np.array([int(x1), int(y1), int(x2), int(y2)]) + masks, scores, logits = mask_predictor.predict( + box=box, + multimask_output=False + ) + + mask = Image.fromarray(masks[0]) + result.paste(mask, mask=mask) + return result + + def predict_multiple(self, image, points, labels, boxes=None): + sam = self.load() + mask_predictor = SamPredictor(sam) + + numpy_image = np.array(image) + opencv_image = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR) + mask_predictor.set_image(opencv_image) + + result = Image.new('L', image.size, 0) + kwargs = dict(multimask_output=True) + if len(points) > 0: + kwargs['point_coords'] = points + kwargs['point_labels'] = labels + if boxes is not None and boxes: + for box in boxes: + x1, y1, x2, y2 = box + box = np.array([int(x1), int(y1), int(x2), int(y2)]) + masks, scores, logits = mask_predictor.predict( + box=box, + **kwargs + ) + for mask in masks: + mask = Image.fromarray(mask) + result.paste(mask, mask=mask) + else: + masks, scores, logits = mask_predictor.predict( + **kwargs + ) + for mask in masks: + mask = Image.fromarray(mask) + result.paste(mask, mask=mask) + return result + + @classmethod + def release(cls): + global sam_model + if sam_model is not None: + sam_model = None + torch_gc() + + +class SamPredictVitB(SamPredict): + + @property + def type(self): + return 'vit_b' + + @property + def file(self): + return 'sam_vit_b_01ec64.pth' diff --git a/3-bmab/sd_bmab/masking/sam_hq.py b/3-bmab/sd_bmab/masking/sam_hq.py new file mode 100644 index 0000000000000000000000000000000000000000..b43c28a8e269edf02c9ab4edc95856edc8912129 --- /dev/null +++ b/3-bmab/sd_bmab/masking/sam_hq.py @@ -0,0 +1,122 @@ +from sd_bmab.base import MaskBase +import cv2 +import numpy as np + +import torch + +from PIL import Image +from modules.safe import unsafe_torch_load, load +from modules.devices import device, torch_gc + +from segment_anything_hq import SamPredictor +from segment_anything_hq import sam_model_registry + +from sd_bmab import util + + +sam_hq_model = None + + +class SamHqPredict(MaskBase): + + def __init__(self) -> None: + super().__init__() + + @property + def name(self): + return f'sam_hq_{self.type}' + + @property + def type(self): + pass + + @property + def file(self): + pass + + @classmethod + def init(cls, model_type, filename, *args, **kwargs): + checkpoint_file = util.lazy_loader(filename) + global sam_hq_model + if sam_hq_model is None: + torch.load = unsafe_torch_load + sam_hq_model = sam_model_registry[model_type](checkpoint=checkpoint_file) + sam_hq_model.to(device=device) + torch.load = load + return sam_hq_model + + def load(self): + return SamHqPredict.init(self.type, self.file) + + def predict(self, image, boxes): + sam = self.load() + mask_predictor = SamPredictor(sam) + + numpy_image = np.array(image) + opencv_image = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR) + mask_predictor.set_image(opencv_image) + + result = Image.new('L', image.size, 0) + for box in boxes: + x1, y1, x2, y2 = box + + box = np.array([int(x1), int(y1), int(x2), int(y2)]) + masks, scores, logits = mask_predictor.predict( + box=box, + multimask_output=False + ) + + mask = Image.fromarray(masks[0]) + result.paste(mask, mask=mask) + return result + + def predict_multiple(self, image, points, labels, boxes=None): + sam = self.load() + mask_predictor = SamPredictor(sam) + + numpy_image = np.array(image) + opencv_image = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR) + mask_predictor.set_image(opencv_image) + + result = Image.new('L', image.size, 0) + kwargs = dict(multimask_output=True) + if len(points) > 0: + kwargs['point_coords'] = points + kwargs['point_labels'] = labels + if boxes is not None: + for box in boxes: + x1, y1, x2, y2 = box + box = np.array([int(x1), int(y1), int(x2), int(y2)]) + masks, scores, logits = mask_predictor.predict( + box=box, + **kwargs + ) + for mask in masks: + mask = Image.fromarray(mask) + result.paste(mask, mask=mask) + else: + masks, scores, logits = mask_predictor.predict( + **kwargs + ) + for mask in masks: + mask = Image.fromarray(mask) + result.paste(mask, mask=mask) + return result + + @classmethod + def release(cls): + global sam_hq_model + if sam_hq_model is not None: + sam_hq_model = None + torch_gc() + + +class SamHqPredictVitB(SamHqPredict): + + @property + def type(self): + return 'vit_b' + + @property + def file(self): + return 'sam_hq_vit_b.pth' diff --git a/3-bmab/sd_bmab/parameters.py b/3-bmab/sd_bmab/parameters.py new file mode 100644 index 0000000000000000000000000000000000000000..e935694b1779f12541a244914ed1096572d4d101 --- /dev/null +++ b/3-bmab/sd_bmab/parameters.py @@ -0,0 +1,388 @@ +import os +import json +from sd_bmab import constants +from sd_bmab.util import debug_print + + +def parse_args(args): + config = Parameters().load_preset(args) + ar = Parameters().get_dict(args, config) + return config, ar + + +class Parameters(object): + def __init__(self) -> None: + super().__init__() + + self.params = [ + ('enabled', False), + ('preprocess_checkpoint', constants.checkpoint_default), + ('preprocess_vae', constants.vae_default), + ('txt2img_noise_multiplier', 1), + ('txt2img_extra_noise_multiplier', 0), + ('module_config.preprocess_filter', 'None'), + ('txt2img_filter_hresfix_before_upscale', 'None'), + ('txt2img_filter_hresfix_after_upscale', 'None'), + ('module_config.kohyahiresfix.enabled', False), + ('module_config.kohyahiresfix.stop1', 0.15), + ('module_config.kohyahiresfix.depth1', 3), + ('module_config.kohyahiresfix.stop2', 0.4), + ('module_config.kohyahiresfix.depth2', 4), + ('module_config.kohyahiresfix.scaler', 'bicubic'), + ('module_config.kohyahiresfix.downsampling', 0.5), + ('module_config.kohyahiresfix.upsampling', 2.0), + ('module_config.kohyahiresfix.smooth_scaling', True), + ('module_config.kohyahiresfix.early_upsampling', False), + ('module_config.kohyahiresfix.disable_additional_pass', True), + ('resample_enabled', False), + ('module_config.resample_opt.save_image', False), + ('module_config.resample_opt.hiresfix_enabled', False), + ('module_config.resample_opt.checkpoint', constants.checkpoint_default), + ('module_config.resample_opt.vae', constants.vae_default), + ('module_config.resample_opt.method', 'txt2img-1pass'), + ('module_config.resample_opt.filter', 'None'), + ('module_config.resample_opt.prompt', ''), + ('module_config.resample_opt.negative_prompt', ''), + ('module_config.resample_opt.sampler', constants.sampler_default), + ('module_config.resample_opt.scheduler', constants.scheduler_default), + ('module_config.resample_opt.upscaler', constants.fast_upscaler), + ('module_config.resample_opt.steps', 20), + ('module_config.resample_opt.cfg_scale', 7), + ('module_config.resample_opt.denoising_strength', 0.75), + ('module_config.resample_opt.strength', 0.5), + ('module_config.resample_opt.begin', 0.0), + ('module_config.resample_opt.end', 1.0), + ('pretraining_enabled', False), + ('module_config.pretraining_opt.hiresfix_enabled', False), + ('module_config.pretraining_opt.checkpoint', constants.checkpoint_default), + ('module_config.pretraining_opt.vae', constants.vae_default), + ('module_config.pretraining_opt.pretraining_model', None), + ('module_config.pretraining_opt.filter', 'None'), + ('module_config.pretraining_opt.prompt', ''), + ('module_config.pretraining_opt.negative_prompt', ''), + ('module_config.pretraining_opt.sampler', constants.sampler_default), + ('module_config.pretraining_opt.scheduler', constants.scheduler_default), + ('module_config.pretraining_opt.steps', 20), + ('module_config.pretraining_opt.cfg_scale', 7), + ('module_config.pretraining_opt.denoising_strength', 0.75), + ('module_config.pretraining_opt.dilation', 4), + ('module_config.pretraining_opt.box_threshold', 0.35), + ('edge_flavor_enabled', False), + ('edge_low_threadhold', 50), + ('edge_high_threadhold', 200), + ('edge_strength', 0.5), + ('resize_intermediate_enabled', False), + ('module_config.resize_intermediate_opt.resize_by_person', True), + ('module_config.resize_intermediate_opt.method', 'stretching'), + ('module_config.resize_intermediate_opt.alignment', 'bottom'), + ('module_config.resize_intermediate_opt.filter', 'None'), + ('module_config.resize_intermediate_opt.scale', 0.85), + ('module_config.resize_intermediate_opt.denoising_strength', 0.75), + ('refiner_enabled', False), + ('module_config.refiner_opt.checkpoint', constants.checkpoint_default), + ('module_config.refiner_opt.vae', constants.vae_default), + ('module_config.refiner_opt.keep_checkpoint', True), + ('module_config.refiner_opt.prompt', ''), + ('module_config.refiner_opt.negative_prompt', ''), + ('module_config.refiner_opt.sampler', constants.sampler_default), + ('module_config.refiner_opt.scheduler', constants.scheduler_default), + ('module_config.refiner_opt.upscaler', constants.fast_upscaler), + ('module_config.refiner_opt.steps', 20), + ('module_config.refiner_opt.cfg_scale', 7), + ('module_config.refiner_opt.denoising_strength', 0.75), + ('module_config.refiner_opt.scale', 1), + ('module_config.refiner_opt.width', 0), + ('module_config.refiner_opt.height', 0), + ('contrast', 1), + ('brightness', 1), + ('sharpeness', 1), + ('color_saturation', 1), + ('color_temperature', 0), + ('noise_alpha', 0), + ('noise_alpha_final', 0), + ('input_image', None), + ('blend_enabled', False), + ('blend_alpha', 1), + ('detect_enabled', False), + ('masking_prompt', ''), + ('person_detailing_enabled', False), + ('module_config.person_detailing_opt.use_groudingdino', False), + ('module_config.person_detailing_opt.force_1:1', False), + ('module_config.person_detailing_opt.block_overscaled_image', True), + ('module_config.person_detailing_opt.auto_upscale', True), + ('module_config.person_detailing_opt.checkpoint', constants.checkpoint_default), + ('module_config.person_detailing_opt.vae', constants.vae_default), + ('module_config.person_detailing_opt.sampler', constants.sampler_default), + ('module_config.person_detailing_opt.scheduler', constants.scheduler_default), + ('module_config.person_detailing_opt.scale', 4), + ('module_config.person_detailing_opt.dilation', 2), + ('module_config.person_detailing_opt.area_ratio', 0.1), + ('module_config.person_detailing_opt.limit', 1), + ('module_config.person_detailing_opt.background_color', 1), + ('module_config.person_detailing_opt.background_blur', 0), + ('module_config.person_detailing.denoising_strength', 0.4), + ('module_config.person_detailing.cfg_scale', 7), + ('module_config.person_detailing.steps', 20), + ('face_detailing_enabled', False), + ('face_detailing_before_hiresfix_enabled', False), + ('module_config.face_detailing_opt.disable_extra_networks', False), + ('module_config.face_detailing_opt.sort_by', 'Score'), + ('module_config.face_detailing_opt.limit', 1), + ('module_config.face_detailing_opt.prompt0', ''), + ('module_config.face_detailing_opt.negative_prompt0', ''), + ('module_config.face_detailing_opt.prompt1', ''), + ('module_config.face_detailing_opt.negative_prompt1', ''), + ('module_config.face_detailing_opt.prompt2', ''), + ('module_config.face_detailing_opt.negative_prompt2', ''), + ('module_config.face_detailing_opt.prompt3', ''), + ('module_config.face_detailing_opt.negative_prompt3', ''), + ('module_config.face_detailing_opt.prompt4', ''), + ('module_config.face_detailing_opt.negative_prompt4', ''), + ('module_config.face_detailing_opt.override_parameter', False), + ('module_config.face_detailing.width', 512), + ('module_config.face_detailing.height', 512), + ('module_config.face_detailing.cfg_scale', 7), + ('module_config.face_detailing.steps', 20), + ('module_config.face_detailing.mask_blur', 4), + ('module_config.face_detailing_opt.checkpoint', constants.checkpoint_default), + ('module_config.face_detailing_opt.vae', constants.vae_default), + ('module_config.face_detailing_opt.sampler', constants.sampler_default), + ('module_config.face_detailing_opt.scheduler', constants.scheduler_default), + ('module_config.face_detailing.inpaint_full_res', 'Only masked'), + ('module_config.face_detailing.inpaint_full_res_padding', 32), + ('module_config.face_detailing_opt.detection_model', constants.face_detector_default), + ('module_config.face_detailing.denoising_strength', 0.4), + ('module_config.face_detailing_opt.dilation', 4), + ('module_config.face_detailing_opt.box_threshold', 0.3), + ('module_config.face_detailing_opt.skip_large_face', False), + ('module_config.face_detailing_opt.large_face_pixels', 0.26), + ('hand_detailing_enabled', False), + ('module_config.hand_detailing_opt.block_overscaled_image', True), + ('module_config.hand_detailing_opt.best_quality', False), + ('module_config.hand_detailing_opt.detailing_method', 'subframe'), + ('module_config.hand_detailing.prompt', ''), + ('module_config.hand_detailing.negative_prompt', ''), + ('module_config.hand_detailing.denoising_strength', 0.4), + ('module_config.hand_detailing.cfg_scale', 7), + ('module_config.hand_detailing_opt.auto_upscale', True), + ('module_config.hand_detailing_opt.scale', 2), + ('module_config.hand_detailing_opt.box_threshold', 0.3), + ('module_config.hand_detailing_opt.dilation', 0.1), + ('module_config.hand_detailing.inpaint_full_res', 'Whole picture'), + ('module_config.hand_detailing.inpaint_full_res_padding', 32), + ('module_config.hand_detailing_opt.additional_parameter', ''), + ('module_config.controlnet.enabled', False), + ('module_config.controlnet.noise', False), + ('module_config.controlnet.with_refiner', False), + ('module_config.controlnet.noise_strength', 0.4), + ('module_config.controlnet.noise_begin', 0.1), + ('module_config.controlnet.noise_end', 0.9), + ('module_config.controlnet.noise_hiresfix', 'Both'), + ('module_config.controlnet.pose', False), + ('module_config.controlnet.pose_strength', 0.3), + ('module_config.controlnet.pose_begin', 0.0), + ('module_config.controlnet.pose_end', 1.0), + ('module_config.controlnet.pose_face_only', False), + ('module_config.controlnet.pose_selected', 'Random'), + ('module_config.controlnet.ipadapter', False), + ('module_config.controlnet.ipadapter_strength', 0.3), + ('module_config.controlnet.ipadapter_begin', 0.0), + ('module_config.controlnet.ipadapter_end', 1.0), + ('module_config.controlnet.ipadapter_selected', 'Random'), + ('module_config.controlnet.ipadapter_weight_type', 'normal'), + ('module_config.iclight.enabled', False), + ('module_config.iclight.enable_before_upscale', False), + ('module_config.iclight.style', 'normal'), + ('module_config.iclight.prompt', ''), + ('module_config.iclight.preference', 'None'), + ('module_config.iclight.blending', 0.5), + ('module_config.iclight.use_background_image', False), + ('resize_by_person_enabled', False), + ('module_config.resize_by_person_opt.mode', constants.resize_mode_default), + ('module_config.resize_by_person_opt.scale', 0.85), + ('module_config.resize_by_person_opt.denoising_strength', 0.4), + ('module_config.resize_by_person_opt.dilation', 10), + ('upscale_enabled', False), + ('detailing_after_upscale', True), + ('upscaler_name', 'None'), + ('upscale_ratio', 1.5), + ('postprocess_final_filter', 'None'), + ('module_config.watermark.enabled', False), + ('module_config.watermark.font', ''), + ('module_config.watermark.alignment', 5), + ('module_config.watermark.text_alignment', 'left'), + ('module_config.watermark.rotate', '0'), + ('module_config.watermark.color', '#000000'), + ('module_config.watermark.background_color', '#000000'), + ('module_config.watermark.font_size', 12), + ('module_config.watermark.transparency', 100), + ('module_config.watermark.background_transparency', 0), + ('module_config.watermark.margin', 5), + ('module_config.watermark.text', ''), + ('config_file', ''), + ('preset', 'None'), + ] + + self.ext_params = [ + ('hand_detailing_before_hiresfix_enabled', False), + ] + + @staticmethod + def get_dict_from_args(args, d): + ar = {} + if d is not None: + ar = d + for p in args: + key = p[0] + value = p[1] + keys = key.split('.') + cur = ar + if len(keys) > 1: + key = keys[-1] + for k in keys[:-1]: + if k not in cur: + cur[k] = {} + cur = cur[k] + cur[key] = value + return ar + + @staticmethod + def get_param_from_dict(prefix, d): + arr = [] + for key, value in d.items(): + if isinstance(value, dict): + prefixz = prefix + key + '.' + sub = Parameters.get_param_from_dict(prefixz, value) + arr.extend(sub) + else: + arr.append((prefix + key, value)) + return arr + + def get_dict(self, args, external_config): + if isinstance(args[0], dict): + default_args = Parameters.get_dict_from_args(self.params, None) + default_args.update(args[0]) + return default_args + else: + if len(args) != len(self.params): + debug_print('Refresh webui first.') + raise Exception('Refresh webui first.') + + if args[0]: + args_list = [(self.params[idx][0], v) for idx, v in enumerate(args)] + args_list.extend(self.ext_params) + ar = Parameters.get_dict_from_args(args_list, None) + else: + self.params.extend(self.ext_params) + ar = Parameters.get_dict_from_args(self.params, None) + + if external_config: + cfgarg = Parameters.get_param_from_dict('', external_config) + ar = Parameters.get_dict_from_args(cfgarg, ar) + ar['enabled'] = True + + return ar + return None + + def get_default(self): + return [x[1] for x in self.params] + + def get_preset(self, prompt): + config_file = None + newprompt = [] + for line in prompt.split('\n'): + if line.startswith('##'): + config_file = line[2:] + continue + newprompt.append(line) + if config_file is None: + return prompt, {} + + cfg_dir = os.path.join(os.path.dirname(__file__), "../preset") + json_file = os.path.join(cfg_dir, f'{config_file}.json') + if not os.path.isfile(json_file): + debug_print(f'Not found configuration file {config_file}.json') + return '\n'.join(newprompt), {} + with open(json_file) as f: + config = json.load(f) + debug_print('Loading config', json.dumps(config, indent=2)) + return '\n'.join(newprompt), config + + def load_preset(self, args): + name = 'None' + for (key, value), a in zip(self.params, args): + if key == 'preset': + name = a + if name == 'None': + return {} + + cfg_dir = os.path.join(os.path.dirname(__file__), "../resources/preset") + json_file = os.path.join(cfg_dir, f'{name}.json') + if not os.path.isfile(json_file): + debug_print(f'Not found configuration file {name}.json') + return {} + with open(json_file) as f: + config = json.load(f) + debug_print('Loading config', json.dumps(config, indent=2)) + return config + + def get_save_config_name(self, args): + name = None + for (key, value), a in zip(self.params, args): + if key == 'config_file': + name = a + if name is None: + return 'noname' + return name + + def load_config(self, name): + save_dir = os.path.join(os.path.dirname(__file__), "../resources/saved") + with open(os.path.join(save_dir, f'{name}.json'), 'r') as f: + loaded_dict = json.load(f) + + detailing_opt = loaded_dict.get('module_config', {}).get('face_detailing_opt', {}) + detection_model = detailing_opt.get('detection_model', constants.face_detector_default) + if detection_model == 'GroundingDINO(face)': + detailing_opt['detection_model'] = constants.face_detector_default + default_args = Parameters.get_dict_from_args(self.params, None) + loaded_args = Parameters.get_param_from_dict('', loaded_dict) + final_dict = Parameters.get_dict_from_args(loaded_args, default_args) + final_args = Parameters.get_param_from_dict('', final_dict) + sort_dict = {a[0]: a[1] for a in final_args} + ret = [sort_dict[key] for key, value in self.params] + return ret + + def save_config(self, args): + name = 'noname' + for (key, value), a in zip(self.params, args): + if key == 'config_file': + name = a + + save_dir = os.path.join(os.path.dirname(__file__), "../resources/saved") + args_list = [(self.params[idx][0], v) for idx, v in enumerate(args)] + conf = Parameters.get_dict_from_args(args_list, None) + with open(os.path.join(save_dir, f'{name}.json'), 'w') as f: + json.dump(conf, f, indent=2) + + def list_config(self): + save_dir = os.path.join(os.path.dirname(__file__), "../resources/saved") + if not os.path.isdir(save_dir): + os.mkdir(save_dir) + + configs = [x for x in os.listdir(save_dir) if x.endswith('.json')] + + return [x[:-5] for x in configs] + + def list_preset(self): + presets = ['None'] + preset_dir = os.path.join(os.path.dirname(__file__), "../resources/preset") + configs = [x for x in os.listdir(preset_dir) if x.endswith('.json')] + presets.extend([x[:-5] for x in configs]) + return presets + + def get_config_value_by_key(self, key, configs): + for x, y in zip(self.params, configs): + if x[0] == key: + return y + return None + diff --git a/3-bmab/sd_bmab/pipeline/__init__.py b/3-bmab/sd_bmab/pipeline/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/sd_bmab/pipeline/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/pipeline/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e552ff1c60822e81e42d374230613d2f4f3753e Binary files /dev/null and b/3-bmab/sd_bmab/pipeline/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/pipeline/internal/__init__.py b/3-bmab/sd_bmab/pipeline/internal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9edd49ae63a1c5b10b295d4ef6898f99f742acc1 --- /dev/null +++ b/3-bmab/sd_bmab/pipeline/internal/__init__.py @@ -0,0 +1,2 @@ +from sd_bmab.pipeline.internal.internalpipe import process_img2img, process_intermediate_before_upscale, process_intermediate_after_upscale, is_controlnet_required +from sd_bmab.pipeline.internal.intermediate import Preprocess diff --git a/3-bmab/sd_bmab/pipeline/internal/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/pipeline/internal/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8648717d4539da2ff13ceeb7fb48e7e20a308a46 Binary files /dev/null and b/3-bmab/sd_bmab/pipeline/internal/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/pipeline/internal/__pycache__/intermediate.cpython-310.pyc b/3-bmab/sd_bmab/pipeline/internal/__pycache__/intermediate.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3530c9fe29a55bdec478f0552495433ac55ba845 Binary files /dev/null and b/3-bmab/sd_bmab/pipeline/internal/__pycache__/intermediate.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/pipeline/internal/__pycache__/internalpipe.cpython-310.pyc b/3-bmab/sd_bmab/pipeline/internal/__pycache__/internalpipe.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a46bcd2904b7fa1b1e9427ff93a17233f9b8532 Binary files /dev/null and b/3-bmab/sd_bmab/pipeline/internal/__pycache__/internalpipe.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/pipeline/internal/intermediate.py b/3-bmab/sd_bmab/pipeline/internal/intermediate.py new file mode 100644 index 0000000000000000000000000000000000000000..93d051aaa6618aaee7f107a90ae3b93c54665833 --- /dev/null +++ b/3-bmab/sd_bmab/pipeline/internal/intermediate.py @@ -0,0 +1,20 @@ +from PIL import Image + +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase +from sd_bmab.pipeline.internal import process_intermediate_before_upscale, process_intermediate_after_upscale + + +class Preprocess(ProcessorBase): + def __init__(self) -> None: + super().__init__() + + def preprocess(self, context: Context, image: Image): + return context.is_txtimg() and not context.is_hires_fix() + + def process(self, context: Context, image: Image): + image = process_intermediate_before_upscale(context, image) + return process_intermediate_after_upscale(context, image) + + def postprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/pipeline/internal/internalpipe.py b/3-bmab/sd_bmab/pipeline/internal/internalpipe.py new file mode 100644 index 0000000000000000000000000000000000000000..fe4e8aebeab3511aa6cc401f21289faf5313090a --- /dev/null +++ b/3-bmab/sd_bmab/pipeline/internal/internalpipe.py @@ -0,0 +1,132 @@ +import traceback +from functools import partial +from modules import images +from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img +from modules import shared + +from sd_bmab.util import debug_print +from sd_bmab.base import Context +from sd_bmab.processors.detailer import FaceDetailer, FaceDetailerBeforeUpsacle + +from sd_bmab.processors.basic import EdgeEnhancement, NoiseAlpha, Img2imgMasking, ICLightBeforeUpsacle +from sd_bmab.processors.preprocess import ResizeIntermidiateBeforeUpscale +from sd_bmab.processors.preprocess import ResamplePreprocessorBeforeUpscale, ResizeIntermidiateAfterUpsacle +from sd_bmab.processors.preprocess import PretrainingDetailerBeforeUpscale + + +def is_controlnet_required(context): + pipeline_modules = [ + ResamplePreprocessorBeforeUpscale(), + ResizeIntermidiateBeforeUpscale() + ] + for mod in pipeline_modules: + if mod.use_controlnet(context): + return True + return False + + +def process_intermediate_before_upscale(context, image): + pipeline_before_upscale = [ + ResamplePreprocessorBeforeUpscale(), + PretrainingDetailerBeforeUpscale(), + FaceDetailerBeforeUpsacle(), + ICLightBeforeUpsacle(), + ResizeIntermidiateBeforeUpscale(), + ] + + processed = image.copy() + for proc in pipeline_before_upscale: + try: + result = proc.preprocess(context, processed) + if result is None or not result: + continue + if shared.state.interrupted or shared.state.skipped: + break + ret = proc.process(context, processed) + if shared.state.interrupted or shared.state.skipped: + break + proc.postprocess(context, processed) + processed = ret + except Exception: + traceback.print_exc() + return processed + + +def process_intermediate_after_upscale(context, image): + pipeline_before_upscale = [ + EdgeEnhancement(), + ResizeIntermidiateAfterUpsacle(), + Img2imgMasking(), + NoiseAlpha(), + ] + + processed = image.copy() + for proc in pipeline_before_upscale: + result = proc.preprocess(context, processed) + if result is None or not result: + continue + if shared.state.interrupted or shared.state.skipped: + break + ret = proc.process(context, processed) + proc.postprocess(context, processed) + processed = ret + return processed + + +def process_img2img(ctx): + if not ctx.is_img2img(): + return + + image = ctx.sdprocessing.init_images[0] + debug_print('process img2img ', image.size) + image = process_intermediate_before_upscale(ctx, image) + image = process_intermediate_after_upscale(ctx, image) + debug_print('process img2img ', image.size) + ctx.sdprocessing.init_images[0] = image + + +''' +def process_hiresfix(ctx): + if not isinstance(ctx.sdprocessing, StableDiffusionProcessingTxt2Img): + return + + if hasattr(ctx.sdprocessing, '__sample'): + return + + all_processors = [ + FaceDetailer(), + EdgeEnhancement(), + ResizeIntermidiateBeforeUpscale(), + ResizeIntermidiateAfterUpsacle(), + NoiseAlpha() + ] + + if True not in [proc.preprocess(ctx, None) for proc in all_processors]: + return + + ctx.sdprocessing.__sample = ctx.sdprocessing.sample + + def resize(ctx: Context, resize_mode, img, width, height, upscaler_name=None): + images.resize_image = ctx.sdprocessing.resize_hook + pidx = ctx.sdprocessing.iteration * ctx.sdprocessing.batch_size + ctx.index += 1 + ctx.args['current_prompt'] = ctx.sdprocessing.all_prompts[pidx] + img = process_intermediate_step1(ctx, img) + im = ctx.sdprocessing.resize_hook(resize_mode, img, width, height, upscaler_name) + im = process_intermediate_step2(ctx, im) + images.resize_image = partial(resize, ctx) + return im + + def _sample(ctx: Context, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): + ctx.sdprocessing.resize_hook = images.resize_image + images.resize_image = partial(resize, ctx) + try: + ret = ctx.sdprocessing.__sample(conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts) + except Exception as e: + raise e + finally: + images.resize_image = ctx.sdprocessing.resize_hook + return ret + + ctx.sdprocessing.sample = partial(_sample, ctx) +''' diff --git a/3-bmab/sd_bmab/pipeline/post/__init__.py b/3-bmab/sd_bmab/pipeline/post/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..acf4fd7e56219b4073b387d4903083b2ec32ea2e --- /dev/null +++ b/3-bmab/sd_bmab/pipeline/post/__init__.py @@ -0,0 +1 @@ +from sd_bmab.pipeline.post.mainpipe import process, process_controlnet, release, is_controlnet_required diff --git a/3-bmab/sd_bmab/pipeline/post/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/pipeline/post/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..82f6e04ff9e6670ea56cb289e512f0b0a272e78b Binary files /dev/null and b/3-bmab/sd_bmab/pipeline/post/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/pipeline/post/__pycache__/mainpipe.cpython-310.pyc b/3-bmab/sd_bmab/pipeline/post/__pycache__/mainpipe.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49890584ce94a87be20edf7536b836ccde9e9ea0 Binary files /dev/null and b/3-bmab/sd_bmab/pipeline/post/__pycache__/mainpipe.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/pipeline/post/mainpipe.py b/3-bmab/sd_bmab/pipeline/post/mainpipe.py new file mode 100644 index 0000000000000000000000000000000000000000..0e5f03330236b983d3314b830fadca3406feea69 --- /dev/null +++ b/3-bmab/sd_bmab/pipeline/post/mainpipe.py @@ -0,0 +1,126 @@ +import traceback + +from modules import shared + +from sd_bmab.processors.postprocess import AfterProcessUpscaler, BeforeProcessUpscaler +from sd_bmab.processors.postprocess import InpaintResize, InpaintLamaResize, FinalFilter +from sd_bmab.processors.detailer import FaceDetailer, PersonDetailer, HandDetailer, PreprocessFaceDetailer +from sd_bmab.processors.utils import BeforeProcessFileSaver, AfterProcessFileSaver +from sd_bmab.processors.utils import ApplyModel, RollbackModel, CheckPointChanger, CheckPointRestore +from sd_bmab.processors.basic import FinalProcessorBasic, ICLightAfterUpsacle +from sd_bmab.processors.controlnet import LineartNoise, Openpose, IpAdapter +from sd_bmab.processors.preprocess import RefinerPreprocessor, PretrainingDetailer +from sd_bmab.processors.preprocess import ResamplePreprocessor, PreprocessFilter +from sd_bmab.processors.postprocess import Watermark +from sd_bmab.pipeline.internal import Preprocess +from sd_bmab.util import debug_print + + +def is_controlnet_required(context): + pipeline_modules = [ + ResamplePreprocessor(), + InpaintLamaResize(), + ] + for mod in pipeline_modules: + if mod.use_controlnet(context): + return True + return False + + +def process(context, image): + pipeline_modules = [ + BeforeProcessFileSaver(), + PreprocessFaceDetailer(), + CheckPointChanger(), + PreprocessFilter(), + ResamplePreprocessor(), + PretrainingDetailer(), + Preprocess(), + RefinerPreprocessor(), + ICLightAfterUpsacle(), + InpaintResize(), + InpaintLamaResize(), + BeforeProcessUpscaler(), + ApplyModel(), + PersonDetailer(), + FaceDetailer(), + HandDetailer(), + RollbackModel(), + CheckPointRestore(), + AfterProcessUpscaler(), + FinalProcessorBasic(), + FinalFilter(), + Watermark(), + AfterProcessFileSaver() + ] + + processed = image.copy() + + try: + for proc in pipeline_modules: + try: + if shared.state.interrupted or shared.state.skipped: + return + + result = proc.preprocess(context, processed) + if result is None or not result: + continue + if shared.state.interrupted or shared.state.skipped: + break + ret = proc.process(context, processed) + if shared.state.interrupted or shared.state.skipped: + break + proc.postprocess(context, processed) + processed = ret + except: + traceback.print_exc() + finally: + debug_print('Restore Checkpoint at final') + RollbackModel().process(context, processed) + CheckPointRestore().process(context, processed) + for proc in pipeline_modules: + try: + proc.finalprocess(context, processed) + except: + traceback.print_exc() + return processed + +''' +def process_intermediate(context, image): + all_processors = [ + FaceDetailer(), + EdgeEnhancement(), + ResizeIntermidiate(), + NoiseAlpha() + ] + + processed = image.copy() + + for proc in all_processors: + result = proc.preprocess(context, processed) + if result is None or not result: + continue + ret = proc.process(context, processed) + proc.postprocess(context, processed) + processed = ret + + return processed +''' + +def process_controlnet(context): + all_processors = [ + LineartNoise(), + Openpose(), + IpAdapter() + ] + + for proc in all_processors: + result = proc.preprocess(context, None) + if result is None or not result: + continue + proc.process(context, None) + proc.postprocess(context, None) + + +def release(): + pass diff --git a/3-bmab/sd_bmab/processors/__init__.py b/3-bmab/sd_bmab/processors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/3-bmab/sd_bmab/processors/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/processors/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f1b0be4d543a3ff1b73cfb272ea849eff5995438 Binary files /dev/null and b/3-bmab/sd_bmab/processors/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/basic/__init__.py b/3-bmab/sd_bmab/processors/basic/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dbf5de10b14c04cac534af34430915d44b1d7f36 --- /dev/null +++ b/3-bmab/sd_bmab/processors/basic/__init__.py @@ -0,0 +1,7 @@ +from sd_bmab.processors.basic.edge import EdgeEnhancement +from sd_bmab.processors.basic.final import FinalProcessorBasic +from sd_bmab.processors.basic.intermediate import NoiseAlpha +from sd_bmab.processors.basic.blend import BlendImage +from sd_bmab.processors.basic.img2imgmasking import Img2imgMasking +from sd_bmab.processors.basic.iclight import ICLight, ICLightBeforeUpsacle, ICLightAfterUpsacle + diff --git a/3-bmab/sd_bmab/processors/basic/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/processors/basic/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1dc4bc18581f39c7cf42b928562f8f274ad679c2 Binary files /dev/null and b/3-bmab/sd_bmab/processors/basic/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/basic/__pycache__/blend.cpython-310.pyc b/3-bmab/sd_bmab/processors/basic/__pycache__/blend.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f452a976a0d6710f76fcee63311fa0ceab3f373c Binary files /dev/null and b/3-bmab/sd_bmab/processors/basic/__pycache__/blend.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/basic/__pycache__/edge.cpython-310.pyc b/3-bmab/sd_bmab/processors/basic/__pycache__/edge.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..99fc676f7726bfe8ddb7911dfa2e31eb4c4e7e28 Binary files /dev/null and b/3-bmab/sd_bmab/processors/basic/__pycache__/edge.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/basic/__pycache__/final.cpython-310.pyc b/3-bmab/sd_bmab/processors/basic/__pycache__/final.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5c1298ea84258010711590f2fab29b2f9697259 Binary files /dev/null and b/3-bmab/sd_bmab/processors/basic/__pycache__/final.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/basic/__pycache__/iclight.cpython-310.pyc b/3-bmab/sd_bmab/processors/basic/__pycache__/iclight.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e549ef9b42de6195bfc00f19a9d2580bf976d01e Binary files /dev/null and b/3-bmab/sd_bmab/processors/basic/__pycache__/iclight.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/basic/__pycache__/img2imgmasking.cpython-310.pyc b/3-bmab/sd_bmab/processors/basic/__pycache__/img2imgmasking.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..949f2257f2766c2ae45ad0b3ebb5721b5b38d601 Binary files /dev/null and b/3-bmab/sd_bmab/processors/basic/__pycache__/img2imgmasking.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/basic/__pycache__/intermediate.cpython-310.pyc b/3-bmab/sd_bmab/processors/basic/__pycache__/intermediate.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..983f714fb38fdf04fceecd93750b1397da9b16ac Binary files /dev/null and b/3-bmab/sd_bmab/processors/basic/__pycache__/intermediate.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/basic/__pycache__/preprocessfilter.cpython-310.pyc b/3-bmab/sd_bmab/processors/basic/__pycache__/preprocessfilter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ae5b46bbeca3385edbc19e75097576cde6d4924 Binary files /dev/null and b/3-bmab/sd_bmab/processors/basic/__pycache__/preprocessfilter.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/basic/blend.py b/3-bmab/sd_bmab/processors/basic/blend.py new file mode 100644 index 0000000000000000000000000000000000000000..493bb3fe118b3021e99b92aa1f7bf622ed65a5e7 --- /dev/null +++ b/3-bmab/sd_bmab/processors/basic/blend.py @@ -0,0 +1,33 @@ +from PIL import Image + +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase +from sd_bmab.util import debug_print + + +class BlendImage(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.enabled = False + self.input_image = None + self.alpha = 0 + + def preprocess(self, context: Context, image: Image): + self.enabled = context.args['blend_enabled'] + self.input_image = context.args['input_image'] + self.alpha = context.args['blend_alpha'] + #return self.enabled and self.input_image is not None and 0 <= self.alpha <= 1 + return False + + def process(self, context: Context, image: Image): + context.add_generation_param('BMAB blend alpha', self.alpha) + #blend = Image.fromarray(self.input_image, mode='RGB') + blend = self.input_image + img = Image.new(mode='RGB', size=image.size) + img.paste(image, (0, 0)) + img.paste(blend) + image = Image.blend(image, img, alpha=self.alpha) + return image + + def postprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/processors/basic/edge.py b/3-bmab/sd_bmab/processors/basic/edge.py new file mode 100644 index 0000000000000000000000000000000000000000..e2b831316506b23d27dfc7453e35671b2968179b --- /dev/null +++ b/3-bmab/sd_bmab/processors/basic/edge.py @@ -0,0 +1,40 @@ +import cv2 +import numpy as np + +from PIL import Image +from PIL import ImageOps +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase + + +class EdgeEnhancement(ProcessorBase): + def __init__(self) -> None: + super().__init__() + + self.edge_low_threadhold = 50 + self.edge_high_threadhold = 200 + self.edge_strength = 0.5 + + def preprocess(self, context: Context, image: Image): + if context.args['edge_flavor_enabled']: + self.edge_low_threadhold = context.args['edge_low_threadhold'] + self.edge_high_threadhold = context.args['edge_high_threadhold'] + self.edge_strength = context.args['edge_strength'] + return context.args['edge_flavor_enabled'] + + def process(self, context: Context, image: Image): + context.add_generation_param('BMAB edge flavor low threadhold', self.edge_low_threadhold) + context.add_generation_param('BMAB edge flavor high threadhold', self.edge_high_threadhold) + context.add_generation_param('BMAB edge flavor strength', self.edge_strength) + + numpy_image = np.array(image) + base = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR) + arcanny = cv2.Canny(base, self.edge_low_threadhold, self.edge_high_threadhold) + canny = Image.fromarray(arcanny) + canny = ImageOps.invert(canny) + + img = image.convert('RGB') + newdata = [(0, 0, 0) if mdata == 0 else ndata for mdata, ndata in zip(canny.getdata(), img.getdata())] + newbase = Image.new('RGB', img.size) + newbase.putdata(newdata) + return Image.blend(img, newbase, alpha=self.edge_strength).convert("RGB") diff --git a/3-bmab/sd_bmab/processors/basic/final.py b/3-bmab/sd_bmab/processors/basic/final.py new file mode 100644 index 0000000000000000000000000000000000000000..b11081609b1fabd376dc5bb87dfe9620e2d3870f --- /dev/null +++ b/3-bmab/sd_bmab/processors/basic/final.py @@ -0,0 +1,111 @@ +import math + +from PIL import Image +from PIL import ImageEnhance + +from sd_bmab import util +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase + + +def calc_color_temperature(temp): + white = (255.0, 254.11008387561782, 250.0419083427406) + + temperature = temp / 100 + + if temperature <= 66: + red = 255.0 + else: + red = float(temperature - 60) + red = 329.698727446 * math.pow(red, -0.1332047592) + if red < 0: + red = 0 + if red > 255: + red = 255 + + if temperature <= 66: + green = temperature + green = 99.4708025861 * math.log(green) - 161.1195681661 + else: + green = float(temperature - 60) + green = 288.1221695283 * math.pow(green, -0.0755148492) + if green < 0: + green = 0 + if green > 255: + green = 255 + + if temperature >= 66: + blue = 255.0 + else: + if temperature <= 19: + blue = 0.0 + else: + blue = float(temperature - 10) + blue = 138.5177312231 * math.log(blue) - 305.0447927307 + if blue < 0: + blue = 0 + if blue > 255: + blue = 255 + + return red / white[0], green / white[1], blue / white[2] + + +class FinalProcessorBasic(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.noise_alpha_final = 0 + self.contrast = 1 + self.brightness = 1 + self.sharpeness = 1 + self.color_saturation = 1 + self.color_temperature = 0 + + def preprocess(self, context: Context, image: Image): + self.contrast = context.args['contrast'] + self.brightness = context.args['brightness'] + self.sharpeness = context.args['sharpeness'] + self.color_saturation = context.args['color_saturation'] + self.color_temperature = context.args['color_temperature'] + self.noise_alpha_final = context.args['noise_alpha_final'] + return True + + def process(self, context: Context, image: Image): + + if self.noise_alpha_final != 0: + context.add_generation_param('BMAB noise alpha final', self.noise_alpha_final) + img_noise = util.generate_noise(context.sdprocessing.seed, image.size[0], image.size[1]) + image = Image.blend(image, img_noise, alpha=self.noise_alpha_final) + + if self.contrast != 1: + context.add_generation_param('BMAB contrast', self.contrast) + enhancer = ImageEnhance.Contrast(image) + image = enhancer.enhance(self.contrast) + + if self.brightness != 1: + context.add_generation_param('BMAB brightness', self.brightness) + enhancer = ImageEnhance.Brightness(image) + image = enhancer.enhance(self.brightness) + + if self.sharpeness != 1: + context.add_generation_param('BMAB sharpeness', self.sharpeness) + enhancer = ImageEnhance.Sharpness(image) + image = enhancer.enhance(self.sharpeness) + + if self.color_saturation != 1: + context.add_generation_param('BMAB color', self.color_saturation) + enhancer = ImageEnhance.Color(image) + image = enhancer.enhance(self.color_saturation) + + if self.color_temperature != 0: + context.add_generation_param('BMAB color temperature', self.color_temperature) + temp = calc_color_temperature(6500 + self.color_temperature) + az = [] + for d in image.getdata(): + az.append((int(d[0] * temp[0]), int(d[1] * temp[1]), int(d[2] * temp[2]))) + image = Image.new('RGB', image.size) + image.putdata(az) + + return image + + def postprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/processors/basic/iclight.py b/3-bmab/sd_bmab/processors/basic/iclight.py new file mode 100644 index 0000000000000000000000000000000000000000..7af482d3f3cecd81a639284182cac9fea464f111 --- /dev/null +++ b/3-bmab/sd_bmab/processors/basic/iclight.py @@ -0,0 +1,68 @@ +from PIL import Image + +from sd_bmab.base import cache +from sd_bmab.base import ProcessorBase +from sd_bmab.base.context import Context +from sd_bmab.external import load_external_module + + +class ICLight(ProcessorBase): + + def __init__(self) -> None: + super().__init__() + self.iclight_opt = {} + self.enabled = False + self.enable_before_upscale = False + self.style = 'normal' + self.prompt = '' + self.preference = 'None' + self.use_background_image = False + self.blending = 0.5 + + def preprocess(self, context: Context, image: Image): + self.iclight_opt = context.args.get('module_config', {}).get('iclight', {}) + self.enabled = self.iclight_opt.get('enabled', self.enabled) + self.enable_before_upscale = self.iclight_opt.get('enable_before_upscale', self.enable_before_upscale) + self.style = self.iclight_opt.get('style', self.style) + self.prompt = self.iclight_opt.get('prompt', self.prompt) + self.preference = self.iclight_opt.get('preference', self.preference) + self.use_background_image = self.iclight_opt.get('use_background_image', self.use_background_image) + self.blending = self.iclight_opt.get('blending', self.blending) + return self.enabled + + def process(self, context: Context, image: Image): + mod = load_external_module('iclight', 'bmabiclight') + bg_image = self.get_background_image() if self.use_background_image else None + return mod.bmab_relight(context, self.style, image, bg_image, self.prompt, self.blending, self.preference) + + def postprocess(self, context: Context, image: Image): + pass + + @staticmethod + def get_background_image(): + img = cache.get_image_from_cache('iclight_background.png') + if img is not None: + return img + return Image.new('RGB', (512, 768), (0, 0, 0)) + + @staticmethod + def put_backgound_image(img): + cache.put_image_to_cache('iclight_background.png', img) + + @staticmethod + def get_styles(): + return ['intensive', 'less intensive', 'normal', 'soft'] + + +class ICLightBeforeUpsacle(ICLight): + + def preprocess(self, context: Context, image: Image): + super().preprocess(context, image) + return self.enabled and self.enable_before_upscale + + +class ICLightAfterUpsacle(ICLight): + + def preprocess(self, context: Context, image: Image): + super().preprocess(context, image) + return self.enabled and not self.enable_before_upscale diff --git a/3-bmab/sd_bmab/processors/basic/img2imgmasking.py b/3-bmab/sd_bmab/processors/basic/img2imgmasking.py new file mode 100644 index 0000000000000000000000000000000000000000..b8f71c731395ce52e670816f2dea5590094b5f14 --- /dev/null +++ b/3-bmab/sd_bmab/processors/basic/img2imgmasking.py @@ -0,0 +1,47 @@ +from PIL import Image + +from modules import devices +from modules.processing import StableDiffusionProcessingImg2Img + +from sd_bmab import masking +from sd_bmab import external +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase + + +class Img2imgMasking(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.enabled = False + self.prompt = '' + self.input_image = None + + def preprocess(self, context: Context, image: Image): + self.enabled = context.args['detect_enabled'] + self.prompt = context.args['masking_prompt'] + self.input_image = context.args['input_image'] + return not context.is_txtimg() and self.enabled + + def sam(self, context, prompt, input_image): + with external.ModuleAutoLoader('groundingdino', 'grdino') as dino: + boxes, logits, phrases = dino.dino_predict(input_image, prompt, 0.35, 0.25) + sam = masking.get_mask_generator() + mask = sam.predict(input_image, boxes) + return mask + + def process(self, context: Context, image: Image): + if context.sdprocessing.image_mask is not None: + context.sdprocessing.image_mask = self.sam(self.prompt, context.sdprocessing.init_images[0]) + context.script.extra_image.append(context.sdprocessing.image_mask) + if context.sdprocessing.image_mask is None and self.input_image is not None: + mask = self.sam(context, self.prompt, context.sdprocessing.init_images[0]) + newpil = Image.new('RGB', context.sdprocessing.init_images[0].size) + newdata = [bdata if mdata == 0 else ndata for mdata, ndata, bdata in + zip(mask.getdata(), context.sdprocessing.init_images[0].getdata(), self.input_image.getdata())] + newpil.putdata(newdata) + context.script.extra_image.append(newpil) + return newpil + return image + + def postprocess(self, context: Context, image: Image): + devices.torch_gc() diff --git a/3-bmab/sd_bmab/processors/basic/intermediate.py b/3-bmab/sd_bmab/processors/basic/intermediate.py new file mode 100644 index 0000000000000000000000000000000000000000..015263563f69c47eec475172c4f888b35577173c --- /dev/null +++ b/3-bmab/sd_bmab/processors/basic/intermediate.py @@ -0,0 +1,22 @@ +from PIL import Image +from sd_bmab import util +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase + + +class NoiseAlpha(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.noise_alpha = 0 + + def preprocess(self, context: Context, image: Image): + self.noise_alpha = context.args['noise_alpha'] + return self.noise_alpha != 0 + + def process(self, context: Context, image: Image): + context.add_generation_param('BMAB noise alpha', self.noise_alpha) + img_noise = util.generate_noise(context.sdprocessing.seed, image.size[0], image.size[1]) + return Image.blend(image, img_noise, alpha=self.noise_alpha) + + def postprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/processors/basic/preprocessfilter.py b/3-bmab/sd_bmab/processors/basic/preprocessfilter.py new file mode 100644 index 0000000000000000000000000000000000000000..23fb496bf53d8cda791d6f7aafb32e08fade753e --- /dev/null +++ b/3-bmab/sd_bmab/processors/basic/preprocessfilter.py @@ -0,0 +1,13 @@ +from sd_bmab.base import filter + + +def run_preprocess_filter(context): + module_config = context.args.get('module_config', {}) + filter_name = module_config.get('preprocess_filter', None) + if filter_name is None or filter_name == 'None': + return + + bmab_filter = filter.get_filter(filter_name) + filter.preprocess_filter(bmab_filter, context, None) + filter.process_filter(bmab_filter, context, None, None) + filter.postprocess_filter(bmab_filter, context) diff --git a/3-bmab/sd_bmab/processors/controlnet/__init__.py b/3-bmab/sd_bmab/processors/controlnet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b38db3b983836e8e7f9fbf7948aadf5693fefa40 --- /dev/null +++ b/3-bmab/sd_bmab/processors/controlnet/__init__.py @@ -0,0 +1,3 @@ +from sd_bmab.processors.controlnet.noise import LineartNoise +from sd_bmab.processors.controlnet.pose import Openpose +from sd_bmab.processors.controlnet.ipadapter import IpAdapter diff --git a/3-bmab/sd_bmab/processors/controlnet/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/processors/controlnet/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5a898262d21c86b827b684b817cf854b62aafd7 Binary files /dev/null and b/3-bmab/sd_bmab/processors/controlnet/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/controlnet/__pycache__/ipadapter.cpython-310.pyc b/3-bmab/sd_bmab/processors/controlnet/__pycache__/ipadapter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c07de1be67d6f8564c4e728412e78fa33b3650af Binary files /dev/null and b/3-bmab/sd_bmab/processors/controlnet/__pycache__/ipadapter.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/controlnet/__pycache__/noise.cpython-310.pyc b/3-bmab/sd_bmab/processors/controlnet/__pycache__/noise.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..69eaa358f8cd48e36f5d980f0697717a8b6ce1f6 Binary files /dev/null and b/3-bmab/sd_bmab/processors/controlnet/__pycache__/noise.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/controlnet/__pycache__/pose.cpython-310.pyc b/3-bmab/sd_bmab/processors/controlnet/__pycache__/pose.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..452c0431dce4cda3c5839d666b91ed6fd7c71d78 Binary files /dev/null and b/3-bmab/sd_bmab/processors/controlnet/__pycache__/pose.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/controlnet/ipadapter.py b/3-bmab/sd_bmab/processors/controlnet/ipadapter.py new file mode 100644 index 0000000000000000000000000000000000000000..eff0cf1bed906b3d18ed69df599df892e2cfb6f2 --- /dev/null +++ b/3-bmab/sd_bmab/processors/controlnet/ipadapter.py @@ -0,0 +1,168 @@ +import os +import glob +import random + +from PIL import Image + +from modules import shared + +import sd_bmab +from sd_bmab import util, controlnet +from sd_bmab.util import debug_print +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase + +weight_type = [ + ('normal', [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), + ('ease in', [1.0, 0.94, 0.88, 0.82, 0.76, 0.7, 0.64, 0.58, 0.53, 0.47, 0.41, 0.35, 0.29, 0.23, 0.17, 0.11]), + ('ease out', [0.05, 0.11, 0.17, 0.23, 0.29, 0.35, 0.41, 0.47, 0.53, 0.58, 0.64, 0.7, 0.76, 0.82, 0.88, 0.94]), + ('ease in-out', [0.05, 0.17, 0.29, 0.41, 0.53, 0.64, 0.76, 0.88, 1.0, 0.88, 0.76, 0.64, 0.53, 0.41, 0.29, 0.17]), + ('reverse in-out', [1.0, 0.88, 0.76, 0.64, 0.53, 0.41, 0.29, 0.17, 0.05, 0.17, 0.29, 0.41, 0.53, 0.64, 0.76, 0.88]), + ('weak input', [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), + ('weak output', [1, 1, 1, 1, 1, 1, 1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2]), + ('weak middle', [1, 1, 1, 1, 1, 1, 0.2, 1, 1, 1, 1, 1, 1, 1, 1, 1]), + ('strong middle', [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2]), + ('style transfer', [1, 1, 1, 1, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 1, 1, 1, 1, 1, 1]), + ('composition', [0.0, 0.0, 0.0, 0.0, 0.25, 1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), + ('strong style transfer', [1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) +] + + +class IpAdapter(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.controlnet_opt = {} + self.enabled = False + self.ipadapter_enabled = False + self.ipadapter_strength = 0.3 + self.ipadapter_begin = 0.0 + self.ipadapter_end = 1.0 + self.ipadapter_face_only = False + self.ipadapter_selected = 'Random' + self.ipadapter_weight_type = 'normal' + + def preprocess(self, context: Context, image: Image): + self.controlnet_opt = context.args.get('module_config', {}).get('controlnet', {}) + self.enabled = self.controlnet_opt.get('enabled', False) + self.ipadapter_enabled = self.controlnet_opt.get('ipadapter', False) + self.ipadapter_strength = self.controlnet_opt.get('ipadapter_strength', self.ipadapter_strength) + self.ipadapter_begin = self.controlnet_opt.get('ipadapter_begin', self.ipadapter_begin) + self.ipadapter_end = self.controlnet_opt.get('ipadapter_end', self.ipadapter_end) + self.ipadapter_face_only = self.controlnet_opt.get('ipadapter_face_only', self.ipadapter_face_only) + self.ipadapter_selected = self.controlnet_opt.get('ipadapter_selected', self.ipadapter_selected) + self.ipadapter_weight_type = self.controlnet_opt.get('ipadapter_weight_type', self.ipadapter_weight_type) + return self.enabled and self.ipadapter_enabled + + def get_openipadapter_args(self, image): + cn_args = { + 'enabled': True, + 'image': image if isinstance(image, str) and os.path.exists(image) else util.b64_encoding(image.convert('RGB')), + 'module': 'ip-adapter-auto', + 'model': shared.opts.bmab_cn_ipadapter, + 'weight': self.ipadapter_strength, + "guidance_start": self.ipadapter_begin, + "guidance_end": self.ipadapter_end, + 'resize_mode': 'Just Resize', + 'pixel_perfect': False, + 'control_mode': 'My prompt is more important', + 'processor_res': 1024, + 'threshold_a': 0.5, + 'threshold_b': 0.5, + 'hr_option': 'Low res only', + 'advanced_weighting': self.get_weight_type(self.ipadapter_weight_type, self.ipadapter_strength) + } + return cn_args + + def process(self, context: Context, image: Image): + context.add_generation_param('BMAB controlnet ipadapter mode', 'ip-adapter-auto') + context.add_generation_param('BMAB ipadapter strength', self.ipadapter_strength) + context.add_generation_param('BMAB ipadapter begin', self.ipadapter_begin) + context.add_generation_param('BMAB ipadapter end', self.ipadapter_end) + context.add_generation_param('BMAB ipadapter image', self.ipadapter_selected) + context.add_generation_param('BMAB ipadapter weight type', self.ipadapter_weight_type) + + img = self.load_image(context) + if img is None: + return + + index = controlnet.get_controlnet_index(context.sdprocessing) + cn_op_arg = self.get_openipadapter_args(img) + debug_print(f'IpAdapter Enabled {index}') + sc_args = list(context.sdprocessing.script_args) + sc_args[index] = cn_op_arg + context.sdprocessing.script_args = tuple(sc_args) + + def postprocess(self, context: Context, image: Image): + pass + + def load_image(self, context): + if self.ipadapter_selected == 'Random': + images = IpAdapter.list_images() + img = random.choice(images) + return self.get_image(img) + else: + return self.get_image(self.ipadapter_selected) + + @staticmethod + def list_images(): + root_path = os.path.dirname(sd_bmab.__file__) + root_path = os.path.normpath(os.path.join(root_path, '../resources/ipadapter')) + if not os.path.exists(root_path) or not os.path.isdir(root_path): + return [] + return [os.path.relpath(f, root_path) for f in IpAdapter.list_images_in_dir(root_path)] + + @staticmethod + def list_images_in_dir(path): + files = [] + dirs = [] + for file in glob.glob(f'{path}/*'): + if os.path.isdir(file): + dirs.append(file) + continue + if not file.endswith('.txt'): + files.append(file) + + files = sorted(files) + for dir in dirs: + files.append(dir) + files.extend(IpAdapter.list_images_in_dir(dir)) + + return files + + @staticmethod + def get_image(f, displayed=False): + if displayed and (f is None or f == 'Random'): + return Image.new('RGB', (512, 512)) + root_path = os.path.dirname(sd_bmab.__file__) + root_path = os.path.normpath(os.path.join(root_path, '../resources/ipadapter')) + image_path = os.path.join(root_path, f) + if os.path.isdir(image_path): + if displayed: + return Image.new('RGB', (512, 512)) + files = [os.path.relpath(f, root_path) for f in IpAdapter.list_images_in_dir(image_path) if not f.endswith('.txt')] + if not files: + debug_print(f'Not found ipadapter files in {image_path}') + return Image.new('RGB', (512, 512)) if displayed else None + file = random.choice(files) + return IpAdapter.get_image(file) + else: + if os.path.exists(image_path): + return Image.open(image_path) + else: + return Image.new('RGB', (512, 512)) if displayed else None + + @staticmethod + def get_weight_type_list(): + return [wt[0] for wt in weight_type] + + @staticmethod + def get_weight_type(weight_type_name, weight): + for wt in weight_type: + if wt[0] == weight_type_name: + return [x * weight for x in wt[1]] + return [x * weight for x in weight_type[0][1]] + + @staticmethod + def ipadapter_selected(*args): + return IpAdapter.get_image(args[0], displayed=True) + diff --git a/3-bmab/sd_bmab/processors/controlnet/noise.py b/3-bmab/sd_bmab/processors/controlnet/noise.py new file mode 100644 index 0000000000000000000000000000000000000000..d351a5c59e6069bc73659ea7ae58c6f1a457e4bc --- /dev/null +++ b/3-bmab/sd_bmab/processors/controlnet/noise.py @@ -0,0 +1,92 @@ +import os +from PIL import Image + +from modules import shared + +import sd_bmab +from sd_bmab import util, controlnet +from sd_bmab.util import debug_print +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase +from sd_bmab.base import cache + + +class LineartNoise(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.controlnet_opt = {} + self.enabled = False + self.with_refiner = False + self.noise_enabled = False + self.noise_strength = 0.4 + self.noise_begin = 0.1 + self.noise_end = 0.9 + self.noise_hiresfix = 'Both' + + @staticmethod + def with_refiner(context: Context): + controlnet_opt = context.args.get('module_config', {}).get('controlnet', {}) + enabled = controlnet_opt.get('enabled', False) + with_refiner = controlnet_opt.get('with_refiner', False) + debug_print('with refiner', enabled, with_refiner) + return enabled and with_refiner + + def preprocess(self, context: Context, image: Image): + self.controlnet_opt = context.args.get('module_config', {}).get('controlnet', {}) + self.enabled = self.controlnet_opt.get('enabled', False) + self.with_refiner = self.controlnet_opt.get('with_refiner', False) + self.noise_enabled = self.controlnet_opt.get('noise', False) + self.noise_strength = self.controlnet_opt.get('noise_strength', 0.4) + self.noise_begin = self.controlnet_opt.get('noise_begin', 0.1) + self.noise_end = self.controlnet_opt.get('noise_end', 0.9) + self.noise_hiresfix = self.controlnet_opt.get('noise_hiresfix', self.noise_hiresfix) + + debug_print('Noise', context.is_refiner_context(), context.with_refiner(), self.with_refiner) + if context.is_refiner_context(): + return self.enabled and self.noise_enabled and self.with_refiner + elif context.with_refiner() and self.with_refiner: + return False + return self.enabled and self.noise_enabled + + @staticmethod + def get_noise_args(image, weight, begin, end, hr_option): + cn_args = { + 'enabled': True, + 'image': image if isinstance(image, str) and os.path.exists(image) else util.b64_encoding(image.convert('RGB')), + 'model': shared.opts.bmab_cn_lineart, + 'weight': weight, + "guidance_start": begin, + "guidance_end": end, + 'resize_mode': 'Just Resize', + 'pixel_perfect': False, + 'control_mode': 'ControlNet is more important', + 'processor_res': 512, + 'threshold_a': 0.5, + 'threshold_b': 0.5, + 'hr_option': hr_option + } + return cn_args + + def get_controlnet_args(self, context): + img = util.generate_noise(context.sdprocessing.seed, context.sdprocessing.width, context.sdprocessing.height) + noise = img.convert('L').convert('RGB') + return self.get_noise_args(noise, self.noise_strength, self.noise_begin, self.noise_end, self.noise_hiresfix) + + def process(self, context: Context, image: Image): + context.add_generation_param('BMAB controlnet mode', 'lineart') + context.add_generation_param('BMAB noise strength', self.noise_strength) + context.add_generation_param('BMAB noise begin', self.noise_begin) + context.add_generation_param('BMAB noise end', self.noise_end) + + index = controlnet.get_controlnet_index(context.sdprocessing) + img = cache.get_noise_from_cache(context.sdprocessing.seed, context.sdprocessing.width, context.sdprocessing.height) + cn_op_arg = self.get_noise_args(img, self.noise_strength, self.noise_begin, self.noise_end, self.noise_hiresfix) + debug_print(f'Noise Enabled {index}') + sc_args = list(context.sdprocessing.script_args) + sc_args[index] = cn_op_arg + context.sdprocessing.script_args = tuple(sc_args) + + def postprocess(self, context: Context, image: Image): + pass + + diff --git a/3-bmab/sd_bmab/processors/controlnet/pose.py b/3-bmab/sd_bmab/processors/controlnet/pose.py new file mode 100644 index 0000000000000000000000000000000000000000..ccbfeb52e5346f0dc93e1280917d010aa0a55a73 --- /dev/null +++ b/3-bmab/sd_bmab/processors/controlnet/pose.py @@ -0,0 +1,136 @@ +import os +import glob +import random + +from PIL import Image + +from modules import shared + +import sd_bmab +from sd_bmab import util, controlnet +from sd_bmab.util import debug_print +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase + + +class Openpose(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.controlnet_opt = {} + self.enabled = False + self.pose_enabled = False + self.pose_strength = 0.3 + self.pose_begin = 0.0 + self.pose_end = 1.0 + self.pose_face_only = False + self.pose_selected = 'Random' + + def preprocess(self, context: Context, image: Image): + self.controlnet_opt = context.args.get('module_config', {}).get('controlnet', {}) + self.enabled = self.controlnet_opt.get('enabled', False) + self.pose_enabled = self.controlnet_opt.get('pose', False) + self.pose_strength = self.controlnet_opt.get('pose_strength', self.pose_strength) + self.pose_begin = self.controlnet_opt.get('pose_begin', self.pose_begin) + self.pose_end = self.controlnet_opt.get('pose_end', self.pose_end) + self.pose_face_only = self.controlnet_opt.get('pose_face_only', self.pose_face_only) + self.pose_selected = self.controlnet_opt.get('pose_selected', self.pose_selected) + return self.enabled and self.pose_enabled + + def get_openpose_args(self, image): + cn_args = { + 'enabled': True, + 'image': image if isinstance(image, str) and os.path.exists(image) else util.b64_encoding(image.convert('RGB')), + 'module': 'openpose_faceonly' if self.pose_face_only else 'openpose', + 'model': shared.opts.bmab_cn_openpose, + 'weight': self.pose_strength, + "guidance_start": self.pose_begin, + "guidance_end": self.pose_end, + 'resize_mode': 'Just Resize', + 'pixel_perfect': False, + 'control_mode': 'My prompt is more important', + 'processor_res': 512, + 'threshold_a': 0.5, + 'threshold_b': 0.5, + 'hr_option': 'Low res only' + } + return cn_args + + def process(self, context: Context, image: Image): + context.add_generation_param('BMAB controlnet pose mode', 'openpose') + context.add_generation_param('BMAB pose strength', self.pose_strength) + context.add_generation_param('BMAB pose begin', self.pose_begin) + context.add_generation_param('BMAB pose end', self.pose_end) + + img = self.load_random_image(context) + if img is None: + return + + index = controlnet.get_controlnet_index(context.sdprocessing) + cn_op_arg = self.get_openpose_args(img) + debug_print(f'Pose Enabled {index}') + sc_args = list(context.sdprocessing.script_args) + sc_args[index] = cn_op_arg + context.sdprocessing.script_args = tuple(sc_args) + + def postprocess(self, context: Context, image: Image): + pass + + def load_random_image(self, context): + path = os.path.dirname(sd_bmab.__file__) + path = os.path.normpath(os.path.join(path, '../resources/pose')) + if os.path.exists(path) and os.path.isdir(path): + file_mask = f'{path}/*.*' + files = glob.glob(file_mask) + if not files: + debug_print(f'Not found pose files in {path}') + return None + img = context.load('preprocess_image') + if img is not None: + return img + if self.pose_selected == 'Random': + file = random.choice(files) + debug_print(f'Random pose {file}') + return self.get_cache(context, file) + else: + img_name = f'{path}/{self.pose_selected}' + return self.get_cache(context, img_name) + debug_print(f'Not found directory {path}') + return None + + def get_cache(self, context, file): + if self.pose_face_only: + path = os.path.dirname(sd_bmab.__file__) + path = os.path.normpath(os.path.join(path, '../resources/cache')) + if os.path.exists(path) and os.path.isdir(path): + b = os.path.basename(file) + file_mask = f'{path}/pose_face_{b}' + if os.path.exists(file_mask): + return Image.open(file_mask) + return Image.open(file) + + + @staticmethod + def list_pose(): + path = os.path.dirname(sd_bmab.__file__) + path = os.path.normpath(os.path.join(path, '../resources/pose')) + if os.path.exists(path) and os.path.isdir(path): + file_mask = f'{path}/*.*' + files = glob.glob(file_mask) + return [os.path.basename(f) for f in files] + debug_print(f'Not found directory {path}') + return [] + + @staticmethod + def get_pose(f): + if f == 'Random' or f == 'Preprocess': + return Image.new('RGB', (512, 512), 0) + path = os.path.dirname(sd_bmab.__file__) + path = os.path.normpath(os.path.join(path, '../resources/pose')) + if os.path.exists(path) and os.path.isdir(path): + img_name = f'{path}/{f}' + return Image.open(img_name) + return Image.new('RGB', (512, 512), 0) + + @staticmethod + def pose_selected(*args): + return Openpose.get_pose(args[0]) diff --git a/3-bmab/sd_bmab/processors/detailer/__init__.py b/3-bmab/sd_bmab/processors/detailer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6843eea59ec5c6d1a1f0395d611a25061444c458 --- /dev/null +++ b/3-bmab/sd_bmab/processors/detailer/__init__.py @@ -0,0 +1,3 @@ +from sd_bmab.processors.detailer.face import FaceDetailer, PreprocessFaceDetailer, FaceDetailerBeforeUpsacle +from sd_bmab.processors.detailer.hand import HandDetailer +from sd_bmab.processors.detailer.person import PersonDetailer diff --git a/3-bmab/sd_bmab/processors/detailer/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/processors/detailer/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f19aeb0caa6b163dc0969f411fddac5d517c9a0 Binary files /dev/null and b/3-bmab/sd_bmab/processors/detailer/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/detailer/__pycache__/face.cpython-310.pyc b/3-bmab/sd_bmab/processors/detailer/__pycache__/face.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8cdb10d96a4d2f77f7fc8c975981b334f1f7377f Binary files /dev/null and b/3-bmab/sd_bmab/processors/detailer/__pycache__/face.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/detailer/__pycache__/hand.cpython-310.pyc b/3-bmab/sd_bmab/processors/detailer/__pycache__/hand.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0829481a5731d3169baf77266a159e0131112dc9 Binary files /dev/null and b/3-bmab/sd_bmab/processors/detailer/__pycache__/hand.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/detailer/__pycache__/person.cpython-310.pyc b/3-bmab/sd_bmab/processors/detailer/__pycache__/person.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..98413dd8fa037d7255af1454f06a4f7b4eb8ac6a Binary files /dev/null and b/3-bmab/sd_bmab/processors/detailer/__pycache__/person.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/detailer/face.py b/3-bmab/sd_bmab/processors/detailer/face.py new file mode 100644 index 0000000000000000000000000000000000000000..6cda7400ee49d673d20cbb567eaa8e5b304ce053 --- /dev/null +++ b/3-bmab/sd_bmab/processors/detailer/face.py @@ -0,0 +1,233 @@ +from math import sqrt + +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFilter + +from modules import shared +from modules import devices +from modules import extra_networks +from modules.processing import StableDiffusionProcessingImg2Img + +from sd_bmab import constants, util +from sd_bmab.base import process_img2img, Context, ProcessorBase, VAEMethodOverride + +from sd_bmab.util import debug_print +from sd_bmab.detectors.detector import get_detector + + +class FaceDetailer(ProcessorBase): + def __init__(self) -> None: + super().__init__() + + self.enabled = False + self.hiresfix_enabled = False + self.detailing_opt = None + self.parameters = None + self.override_parameter = False + self.dilation = 4 + self.box_threshold = 0.35 + self.order = 'Score' + self.limit = 1 + self.checkpoint = constants.checkpoint_default + self.vae = constants.vae_default + self.sampler = constants.sampler_default + self.scheduler = constants.scheduler_default + self.disable_extra_networks = False + self.detection_model = 'Ultralytics(face_yolov8n.pt)' + self.max_element = shared.opts.bmab_max_detailing_element + self.skip_large_face = False + self.large_face_pixels = 0.26 + + def preprocess(self, context: Context, image: Image): + self.enabled = context.args['face_detailing_enabled'] + self.hiresfix_enabled = context.args['face_detailing_before_hiresfix_enabled'] + self.detailing_opt = context.args.get('module_config', {}).get('face_detailing_opt', {}) + self.parameters = dict(context.args.get('module_config', {}).get('face_detailing', {})) + self.override_parameter = self.detailing_opt.get('override_parameter', self.override_parameter) + self.dilation = self.detailing_opt.get('dilation', self.dilation) + self.box_threshold = self.detailing_opt.get('box_threshold', self.box_threshold) + self.order = self.detailing_opt.get('sort_by', self.order) + self.limit = self.detailing_opt.get('limit', self.limit) + self.checkpoint = self.detailing_opt.get('checkpoint', self.checkpoint) + self.vae = self.detailing_opt.get('vae', self.vae) + self.sampler = self.detailing_opt.get('sampler', self.sampler) + self.scheduler = self.detailing_opt.get('scheduler', self.scheduler) + self.disable_extra_networks = self.detailing_opt.get('disable_extra_networks', self.disable_extra_networks) + self.detection_model = self.detailing_opt.get('detection_model', self.detection_model) + self.skip_large_face = self.detailing_opt.get('skip_large_face', self.skip_large_face) + self.large_face_pixels = self.detailing_opt.get('large_face_pixels', self.large_face_pixels) + + return self.enabled + + def process(self, context: Context, image: Image): + + detector = get_detector(context, self.detection_model, box_threshold=self.box_threshold) + boxes, logits = detector.predict(context, image) + + org_size = image.size + debug_print('size', org_size, len(boxes), len(logits)) + + face_config = { + 'denoising_strength': self.parameters['denoising_strength'], + 'inpaint_full_res': self.parameters['inpaint_full_res'], + 'inpaint_full_res_padding': self.parameters['inpaint_full_res_padding'], + 'mask_blur': self.parameters['mask_blur'], + 'cfg_scale': context.sdprocessing.cfg_scale, + 'steps': context.sdprocessing.steps, + } + + if self.override_parameter: + face_config = dict(self.parameters) + else: + if shared.opts.bmab_keep_original_setting: + face_config['width'] = image.width + face_config['height'] = image.height + else: + face_config['width'] = 512 + face_config['height'] = 512 + if isinstance(context.sdprocessing, StableDiffusionProcessingImg2Img): + face_config['mask_blur'] = context.sdprocessing.mask_blur + + if self.sampler != constants.sampler_default: + face_config['sampler_name'] = self.sampler + if self.scheduler != constants.scheduler_default: + face_config['scheduler'] = self.scheduler + + context.add_generation_param('BMAB_face_option', util.dict_to_str(self.detailing_opt)) + context.add_generation_param('BMAB_face_parameter', util.dict_to_str(face_config)) + + candidate = [] + if self.order == 'Left': + for box, logit in zip(boxes, logits): + x1, y1, x2, y2 = box + value = x1 + (x2 - x1) // 2 + debug_print('detected(from left)', float(logit), value) + candidate.append((value, box, logit)) + candidate = sorted(candidate, key=lambda c: c[0]) + elif self.order == 'Right': + for box, logit in zip(boxes, logits): + x1, y1, x2, y2 = box + value = x1 + (x2 - x1) // 2 + debug_print('detected(from right)', float(logit), value) + candidate.append((value, box, logit)) + candidate = sorted(candidate, key=lambda c: c[0], reverse=True) + elif self.order == 'Center': + for box, logit in zip(boxes, logits): + x1, y1, x2, y2 = box + cx = image.width / 2 + cy = image.height / 2 + ix = x1 + (x2 - x1) // 2 + iy = y1 + (y2 - y1) // 2 + value = sqrt(abs(cx - ix) ** 2 + abs(cy - iy) ** 2) + debug_print('detected(from center)', float(logit), value) + candidate.append((value, box, logit)) + candidate = sorted(candidate, key=lambda c: c[0]) + elif self.order == 'Size': + for box, logit in zip(boxes, logits): + x1, y1, x2, y2 = box + value = (x2 - x1) * (y2 - y1) + debug_print('detected(size)', float(logit), value) + candidate.append((value, box, logit)) + candidate = sorted(candidate, key=lambda c: c[0], reverse=True) + else: + for box, logit in zip(boxes, logits): + value = float(logit) + debug_print(f'detected({self.order})', float(logit), value) + candidate.append((value, box, logit)) + candidate = sorted(candidate, key=lambda c: c[0], reverse=True) + + for idx, (size, box, logit) in enumerate(candidate): + if self.limit != 0 and idx >= self.limit: + debug_print(f'Over limit {self.limit}') + break + + if self.max_element != 0 and idx >= self.max_element: + debug_print(f'Over limit MAX Element {self.max_element}') + break + + if self.skip_large_face: + x1, y1, x2, y2 = box + mega_pixels = ((x2 - x1) * (y2 - y1)) / 1000000 + debug_print('skip large face', mega_pixels, self.large_face_pixels) + if mega_pixels > self.large_face_pixels: + debug_print(f'Skip face detailing {mega_pixels}') + break + + context.add_job(1) + + prompt = self.detailing_opt.get(f'prompt{idx}') + if prompt is not None: + if prompt.find('#!org!#') >= 0: + current_prompt = context.get_hires_prompt_by_index() if context.is_hires_fix() else context.get_prompt_by_index() + face_config['prompt'] = prompt.replace('#!org!#', current_prompt) + debug_print('prompt for face', face_config['prompt']) + elif prompt != '': + face_config['prompt'] = prompt + else: + face_config['prompt'] = context.get_hires_prompt_by_index() if context.is_hires_fix() else context.get_prompt_by_index() + + ne_prompt = self.detailing_opt.get(f'negative_prompt{idx}') + if ne_prompt is not None and ne_prompt != '': + face_config['negative_prompt'] = ne_prompt + else: + face_config['negative_prompt'] = context.get_hires_negative_prompt_by_index() if context.is_hires_fix() else context.get_negative_prompt_by_index() + + debug_print('render', float(logit)) + debug_print('delation', self.dilation) + + face_mask = Image.new('L', image.size, color=0) + dr = ImageDraw.Draw(face_mask, 'L') + dr.rectangle(box, fill=255) + face_mask = util.dilate_mask(face_mask, self.dilation) + + seed, subseed = context.get_seeds() + options = dict(mask=face_mask, seed=seed, subseed=subseed, **face_config) + + if self.checkpoint is not None and self.checkpoint != constants.checkpoint_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_model_checkpoint'] = self.checkpoint + options['override_settings'] = override_settings + if self.vae != constants.vae_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_vae'] = self.vae + options['override_settings'] = override_settings + + if self.disable_extra_networks: + prompt, extra_network_data = extra_networks.parse_prompts([options['prompt']]) + options['prompt'] = prompt + with VAEMethodOverride(): + img2img_imgage = process_img2img(context, image, options=options) + + x1, y1, x2, y2 = util.fix_box_size(box) + face_mask = Image.new('L', image.size, color=0) + dr = ImageDraw.Draw(face_mask, 'L') + dr.rectangle((x1, y1, x2, y2), fill=255) + blur = ImageFilter.GaussianBlur(3) + mask = face_mask.filter(blur) + image.paste(img2img_imgage, mask=mask) + return image + + def postprocess(self, context: Context, image: Image): + devices.torch_gc() + + +class PreprocessFaceDetailer(FaceDetailer): + + def __init__(self) -> None: + super().__init__() + + def preprocess(self, context: Context, image: Image): + super().preprocess(context, image) + return not context.is_hires_fix() and self.hiresfix_enabled + + +class FaceDetailerBeforeUpsacle(FaceDetailer): + + def __init__(self) -> None: + super().__init__() + + def preprocess(self, context: Context, image: Image): + super().preprocess(context, image) + return self.enabled and self.hiresfix_enabled and (context.is_hires_fix() or context.is_img2img()) + diff --git a/3-bmab/sd_bmab/processors/detailer/hand.py b/3-bmab/sd_bmab/processors/detailer/hand.py new file mode 100644 index 0000000000000000000000000000000000000000..c95649339b96deb59533c9fea81e24e54ef69bf0 --- /dev/null +++ b/3-bmab/sd_bmab/processors/detailer/hand.py @@ -0,0 +1,427 @@ +import math +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFilter + +from modules import shared +from modules import devices + +from sd_bmab import detectors +from sd_bmab import external +from sd_bmab.base import process_img2img, Context, ProcessorBase, VAEMethodOverride, process_img2img_with_controlnet + +from sd_bmab.util import debug_print + +from sd_bmab import util + + +class Obj(object): + name = None + + def __init__(self, xyxy) -> None: + super().__init__() + self.parent = None + self.xyxy = xyxy + self.objects = [] + self.inbox = xyxy + + def is_in(self, obj) -> bool: + x1, y1, x2, y2 = self.inbox + mx1, my1, mx2, my2 = obj.xyxy + + x = int(x1 + (x2 - x1) / 2) + y = int(y1 + (y2 - y1) / 2) + + return mx1 <= x <= mx2 and my1 <= y <= my2 + + def append(self, obj): + obj.parent = self + for ch in self.objects: + if obj.is_in(ch): + obj.parent = ch + break + self.objects.append(obj) + + def is_valid(self): + return True + + def size(self): + x1, y1, x2, y2 = self.xyxy + return (x2 - x1) * (y2 - y1) + + def put(self, mask): + for xg in self.objects: + if not xg.is_valid(): + continue + if xg.name == 'hand': + dr = ImageDraw.Draw(mask, 'L') + dr.rectangle(xg.xyxy, fill=255) + + def get_box(self): + if not self.objects: + return self.xyxy + + x1, y1, x2, y2 = self.xyxy + ret = [x2, y2, x1, y1] + for xg in self.objects: + if not xg.is_valid(): + continue + x = xg.xyxy + ret[0] = x[0] if x[0] < ret[0] else ret[0] + ret[1] = x[1] if x[1] < ret[1] else ret[1] + ret[2] = x[2] if x[2] > ret[2] else ret[2] + ret[3] = x[3] if x[3] > ret[3] else ret[3] + + return x1, y1, x2, ret[3] + + def log(self): + debug_print(self.name, self.xyxy) + for x in self.objects: + x.log() + + +class Person(Obj): + name = 'person' + + def __init__(self, xyxy, dilation) -> None: + super().__init__(xyxy) + self.inbox = util.fix_box_by_scale(xyxy, dilation) + + def is_valid(self): + face = False + hand = False + for xg in self.objects: + if xg.name == 'face': + face = True + if xg.name == 'hand': + hand = True + return face and hand + + def cleanup(self): + debug_print([xg.name for xg in self.objects]) + nw = [] + for xg in self.objects: + if xg.name == 'person': + if len(self.objects) == 1 and xg.is_valid(): + self.xyxy = xg.xyxy + self.objects = xg.objects + return + else: + self.objects.extend(xg.objects) + else: + nw.append(xg) + self.objects = nw + + +class Head(Obj): + name = 'head' + + +class Face(Obj): + name = 'face' + + +class Hand(Obj): + name = 'hand' + + +def ultralytics_predict(context, image, boxth, txtth): + + ret_boxes = [] + ret_logits = [] + ret_phrases = [] + + detector = detectors.UltralyticsPersonDetector8n(box_threshold=boxth) + boxes, logits = detector.predict(context, image) + ret_boxes.extend(boxes) + ret_logits.extend(logits) + ret_phrases.extend(['person'] * len(boxes)) + + detector = detectors.UltralyticsFaceDetector8n(box_threshold=boxth) + boxes, logits = detector.predict(context, image) + ret_boxes.extend(boxes) + ret_logits.extend(logits) + ret_phrases.extend(['face'] * len(boxes)) + + detector = detectors.BmabHandDetector(box_threshold=boxth) + boxes, logits = detector.predict(context, image) + ret_boxes.extend(boxes) + ret_logits.extend(logits) + ret_phrases.extend(['hand'] * len(boxes)) + + return ret_boxes, ret_logits, ret_phrases + + +def get_subframe(context, pilimg, dilation, box_threshold=0.30, text_threshold=0.20): + text_prompt = "person . head . face . hand ." + debug_print('threshold', box_threshold) + + if shared.opts.bmab_use_dino_predict: + with external.ModuleAutoLoader('groundingdino', 'grdino') as dino: + boxes, logits, phrases = dino.dino_predict(pilimg, text_prompt, box_threshold, text_threshold) + else: + boxes, logits, phrases = ultralytics_predict(context, pilimg, box_threshold, text_threshold) + + people = [] + + def find_person(o): + for person in people: + if o.is_in(person): + return person + return None + + for idx, (box, logit, phrase) in enumerate(zip(boxes, logits, phrases)): + if phrase == 'person': + p = Person(tuple(int(x) for x in box), dilation) + parent = find_person(p) + if parent: + parent.append(p) + else: + people.append(p) + people = sorted(people, key=lambda c: c.size(), reverse=True) + + for idx, (box, logit, phrase) in enumerate(zip(boxes, logits, phrases)): + debug_print(float(logit), phrase) + bb = tuple(int(x) for x in box) + + if phrase == 'head': + o = Head(bb) + parent = find_person(o) + if parent: + parent.append(o) + elif phrase == 'face' or phrase == 'head face': + o = Face(bb) + parent = find_person(o) + if parent: + parent.append(o) + elif phrase == 'hand': + o = Hand(bb) + parent = find_person(o) + if parent: + parent.append(o) + + for person in people: + person.cleanup() + + boxes = [] + masks = [] + for person in people: + if person.is_valid(): + mask = Image.new('L', pilimg.size, color=0) + person.log() + person.put(mask) + boxes.append(person.get_box()) + masks.append(mask) + return boxes, masks + + +class HandDetailer(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.hand_detailing = None + self.detailing_opt = None + self.dilation = 0.1 + self.box_threshold = 0.3 + self.detailing_method = '' + self.best_quality = False + self.block_overscaled_image = True + self.auto_upscale = True + self.scale = 2 + + def preprocess(self, context: Context, image: Image): + if context.args['hand_detailing_enabled']: + self.hand_detailing = dict(context.args.get('module_config', {}).get('hand_detailing', {})) + self.detailing_opt = context.args.get('module_config', {}).get('hand_detailing_opt', {}) + self.dilation = self.hand_detailing.get('dilation', self.dilation) + self.box_threshold = self.hand_detailing.get('box_threshold', self.box_threshold) + self.detailing_method = self.detailing_opt.get('detailing_method', self.detailing_method) + self.best_quality = self.detailing_opt.get('best_quality', self.best_quality) + self.block_overscaled_image = self.detailing_opt.get('block_overscaled_image', self.block_overscaled_image) + self.auto_upscale = self.detailing_opt.get('auto_upscale', self.auto_upscale) + self.scale = self.detailing_opt.get('scale', self.scale) + + return context.args['hand_detailing_enabled'] + + @staticmethod + def get_depth_hand_refiner(weight, begin, end): + cn_args = { + 'enabled': True, + 'module': 'depth_hand_refiner', + 'model': shared.opts.bmab_cn_inpaint_depth_hand, + 'weight': weight, + "guidance_start": begin, + "guidance_end": end, + 'resize_mode': 'Just Resize', + 'pixel_perfect': False, + 'control_mode': 'ControlNet is more important', + 'processor_res': 512, + 'threshold_a': 0.5, + 'threshold_b': 0.5, + } + return cn_args + + def process(self, context: Context, image: Image): + + context.add_generation_param('BMAB_hand_option', util.dict_to_str(self.detailing_opt)) + context.add_generation_param('BMAB_hand_parameter', util.dict_to_str(self.hand_detailing)) + + if self.detailing_method == 'subframe': + return self.process_hand_detailing_subframe(context, image) + elif self.detailing_method == 'depth hand refiner': + mask = Image.new('L', image.size, 0) + dr = ImageDraw.Draw(mask, 'L') + if shared.opts.bmab_use_dino_predict: + with external.ModuleAutoLoader('groundingdino', 'grdino') as dino: + boxes, logits, phrases = dino.dino_predict(image, 'person . hand', self.box_threshold, 0.3) + else: + boxes, logits, phrases = ultralytics_predict(context, image, self.box_threshold, 0.3) + for idx, (box, logit, phrase) in enumerate(zip(boxes, logits, phrases)): + if phrase == 'hand': + b = util.fix_box_size(box) + dr.rectangle(b, fill=255) + options = dict(mask=mask) + options.update(self.hand_detailing) + context.add_job() + with VAEMethodOverride(): + controlnet = self.get_depth_hand_refiner(1, 0, 1) + image = process_img2img_with_controlnet(context, image, options, controlnet=[controlnet]) + elif self.detailing_method == 'at once': + mask = Image.new('L', image.size, 0) + dr = ImageDraw.Draw(mask, 'L') + if shared.opts.bmab_use_dino_predict: + with external.ModuleAutoLoader('groundingdino', 'grdino') as dino: + boxes, logits, phrases = dino.dino_predict(image, 'person . hand', self.box_threshold, 0.3) + else: + boxes, logits, phrases = ultralytics_predict(context, image, self.box_threshold, 0.3) + for idx, (box, logit, phrase) in enumerate(zip(boxes, logits, phrases)): + if phrase=='hand': + b = util.fix_box_size(box) + dr.rectangle(b, fill=255) + options = dict(mask=mask) + options.update(self.hand_detailing) + context.add_job() + with VAEMethodOverride(): + image = process_img2img(context, image, options=options) + elif self.detailing_method == 'each hand' or self.detailing_method == 'inpaint each hand': + if shared.opts.bmab_use_dino_predict: + with external.ModuleAutoLoader('groundingdino', 'grdino') as dino: + boxes, logits, phrases = dino.dino_predict(image, 'person . hand', self.box_threshold, 0.3) + else: + boxes, logits, phrases = ultralytics_predict(context, image, self.box_threshold, 0.3) + for idx, (box, logit, phrase) in enumerate(zip(boxes, logits, phrases)): + if phrase != 'hand': + continue + debug_print(float(logit)) + + x1, y1, x2, y2 = tuple(int(x) for x in box) + + width = x2 - x1 + height = y2 - y1 + + mbox = (int(x1 - width), int(y1 - height), int(x2 + width), int(y2 + height)) + mbox = util.fix_box_size(mbox) + debug_print(mbox) + + hbox = (width, height, width * 2, height * 2) + cropped_hand = image.crop(box=mbox) + cropped_hand_mask = Image.new('L', cropped_hand.size, 0) + dr = ImageDraw.Draw(cropped_hand_mask, 'L') + dr.rectangle(hbox, fill=255) + + options = dict(seed=-1) + scale = self.detailing_opt.get('scale', -1) + if scale < 1: + normalize = self.detailing_opt.get('normalize', 768) + if width > height: + scale = normalize / cropped_hand.width + else: + scale = normalize / cropped_hand.height + if self.detailing_method == 'inpaint each hand': + options['mask'] = cropped_hand_mask + + options.update(self.hand_detailing) + w, h = util.fix_size_by_scale(cropped_hand.width, cropped_hand.height, scale) + options['width'] = w + options['height'] = h + debug_print(f'scale {scale} width {w} height {h}') + shared.state.job_count += 1 + with VAEMethodOverride(hiresfix=self.best_quality): + img2img_result = process_img2img(context, cropped_hand, options=options) + img2img_result = img2img_result.resize(cropped_hand.size, resample=util.LANCZOS) + + debug_print('resize to', img2img_result.size, cropped_hand_mask.size) + blur = ImageFilter.GaussianBlur(3) + cropped_hand_mask = cropped_hand_mask.filter(blur) + image.paste(img2img_result, (mbox[0], mbox[1]), mask=cropped_hand_mask) + else: + debug_print('no such method') + return image + + return image + + def process_hand_detailing_subframe(self, context, image): + + boxes, masks = get_subframe(context, image, self.dilation, box_threshold=self.box_threshold) + if not boxes: + return image + + if not hasattr(context, 'hand_mask_image'): + c1 = image.copy() + for box, mask in zip(boxes, masks): + box = util.fix_box_by_scale(box, self.dilation) + draw = ImageDraw.Draw(c1, 'RGBA') + draw.rectangle(box, outline=(0, 255, 0, 255), fill=(0, 255, 0, 50), width=3) + c2 = image.copy() + draw = ImageDraw.Draw(c2, 'RGBA') + draw.rectangle(box, outline=(255, 0, 0, 255), fill=(255, 0, 0, 50), width=3) + c1.paste(c2, mask=mask) + context.script.extra_image.append(c1) + context.hand_mask_image = c1 + + for box, mask in zip(boxes, masks): + box = util.fix_box_by_scale(box, self.dilation) + box = util.fix_box_size(box) + box = util.fix_box_limit(box, image.size) + x1, y1, x2, y2 = box + + cropped = image.crop(box=box) + cropped_mask = mask.crop(box=box) + + options = dict(mask=cropped_mask, seed=-1) + options.update(self.hand_detailing) + w, h = util.fix_size_by_scale(cropped.width, cropped.height, self.scale) + options['width'] = w + options['height'] = h + debug_print(f'Scale x{self.scale} ({cropped.width},{cropped.height}) -> ({w},{h})') + + if self.block_overscaled_image: + area_org = context.get_max_area() + area_scaled = w * h + if area_scaled > area_org: + debug_print(f'It is too large to process.') + if not self.auto_upscale: + context.add_generation_param( + 'BMAB_hand_SKIP', f'Image too large to process {cropped.width}x{cropped.height} {w}x{h}') + return image + new_scale = math.sqrt(area_org / (cropped.width * cropped.height)) + w, h = util.fix_size_by_scale(cropped.width, cropped.height, new_scale) + options['width'] = w + options['height'] = h + debug_print(f'Auto Scale x{new_scale} ({cropped.width},{cropped.height}) -> ({w},{h})') + if new_scale < 1.05: + debug_print(f'Scale {new_scale} has no effect. skip!!!!!') + context.add_generation_param('BMAB_hand_SKIP', f'{new_scale} < 1.2') + return image + shared.state.job_count += 1 + with VAEMethodOverride(): + img2img_result = process_img2img(context, cropped, options=options) + img2img_result = img2img_result.resize((cropped.width, cropped.height), resample=util.LANCZOS) + blur = ImageFilter.GaussianBlur(3) + cropped_mask = cropped_mask.filter(blur) + image.paste(img2img_result, (x1, y1), mask=cropped_mask) + devices.torch_gc() + + return image + + def postprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/processors/detailer/person.py b/3-bmab/sd_bmab/processors/detailer/person.py new file mode 100644 index 0000000000000000000000000000000000000000..970b6614902a80b3dae6c4d7901cab13d484efc2 --- /dev/null +++ b/3-bmab/sd_bmab/processors/detailer/person.py @@ -0,0 +1,199 @@ +import math + +from PIL import Image +from PIL import ImageFilter +from PIL import ImageEnhance + +from modules import shared +from modules import devices + +from sd_bmab import util, masking +from sd_bmab import external +from sd_bmab.base import process_img2img, Context, ProcessorBase, VAEMethodOverride + +from sd_bmab.util import debug_print +from sd_bmab.detectors import UltralyticsPersonDetector8n +from sd_bmab import constants + + +class PersonDetailer(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.detailing_opt = None + self.parameters = None + + self.dilation = 3 + self.area_ratio = 0.1 + self.limit = 1 + self.force_one_on_one = False + self.background_color = 1 + self.background_blur = 0 + self.use_groudingdino = False + self.detection_model = 'Ultralytics(person_yolov8n-seg.pt)' + self.max_element = shared.opts.bmab_max_detailing_element + self.checkpoint = constants.checkpoint_default + self.vae = constants.vae_default + self.sampler = constants.sampler_default + self.scheduler = constants.scheduler_default + + def preprocess(self, context: Context, image: Image): + if context.args['person_detailing_enabled']: + self.detailing_opt = context.args.get('module_config', {}).get('person_detailing_opt', {}) + self.parameters = dict(context.args.get('module_config', {}).get('person_detailing', {})) + self.dilation = self.detailing_opt.get('dilation', self.dilation) + self.area_ratio = self.detailing_opt.get('area_ratio', self.area_ratio) + self.limit = self.detailing_opt.get('limit', self.limit) + self.force_one_on_one = self.detailing_opt.get('force_1:1', self.force_one_on_one) + self.background_color = self.detailing_opt.get('background_color', self.background_color) + self.background_blur = self.detailing_opt.get('background_blur', self.background_blur) + self.use_groudingdino = self.detailing_opt.get('use_groudingdino', self.use_groudingdino) + self.detection_model = self.detailing_opt.get('detection_model', self.detection_model) + self.checkpoint = self.detailing_opt.get('checkpoint', self.checkpoint) + self.vae = self.detailing_opt.get('vae', self.vae) + self.sampler = self.detailing_opt.get('sampler', self.sampler) + self.scheduler = self.detailing_opt.get('scheduler', self.scheduler) + + return context.args['person_detailing_enabled'] + + def get_cropped_mask(self, image, boxes, box): + sam = masking.get_mask_generator() + mask = sam.predict(image, boxes) + mask = util.dilate_mask(mask, self.dilation) + cropped_mask = mask.crop(box=box).convert('L') + return cropped_mask + + def process(self, context: Context, image: Image): + context.add_generation_param('BMAB_person_option', util.dict_to_str(self.detailing_opt)) + if self.use_groudingdino: + text_prompt = "person . head . face . hand ." + debug_print('prepare detector groundingdino') + with external.ModuleAutoLoader('groundingdino', 'grdino') as dino: + boxes, logits, phrases = dino.dino_predict(image, text_prompt, 0.30, 0.20) + else: + debug_print('prepare detector Ultralytics') + detector = UltralyticsPersonDetector8n() + boxes, logits = detector.predict(context, image) + phrases = ['person'] * len(boxes) + + org_size = image.size + debug_print('size', org_size) + + i2i_config = self.parameters + debug_print(f'Max element {self.max_element}') + + context.add_job(min(self.limit, len(boxes))) + + if self.sampler != constants.sampler_default: + i2i_config['sampler_name'] = self.sampler + if self.scheduler != constants.scheduler_default: + i2i_config['scheduler'] = self.scheduler + + processed = [] + for idx, (box, logit, phrase) in enumerate(zip(boxes, logits, phrases)): + if phrase != 'person': + continue + + if self.limit != 0 and idx >= self.limit: + debug_print(f'Over limit {self.limit}') + break + + if self.max_element != 0 and idx >= self.max_element: + debug_print(f'Over limit MAX Element {self.max_element}') + break + + debug_print('render', float(logit)) + box2 = util.fix_box_size(box) + x1, y1, x2, y2 = box2 + + cropped = image.crop(box=box) + + scale = self.detailing_opt.get('scale', 4) + if self.force_one_on_one: + scale = 1.0 + + area_person = cropped.width * cropped.height + area_image = image.width * image.height + ratio = area_person / area_image + debug_print(f'Ratio {ratio}') + if scale > 1 and ratio >= self.area_ratio: + debug_print(f'Person is too big to process. {ratio} >= {self.area_ratio}.') + if self.background_color != 1: + cropped_mask = self.get_cropped_mask(image, boxes, box) + processed.append((cropped, (x1, y1), cropped_mask)) + continue + context.add_generation_param( + 'BMAB_person_SKIP', f'Person is too big to process. {ratio} >= {self.area_ratio}.') + return image + + context.add_generation_param('BMAB person ratio', '%.3f' % ratio) + + w = int(cropped.width * scale) + h = int(cropped.height * scale) + debug_print(f'Trying x{scale} ({cropped.width},{cropped.height}) -> ({w},{h})') + + if scale > 1 and self.detailing_opt.get('block_overscaled_image', True): + area_org = context.get_max_area() + area_scaled = w * h + if area_scaled > area_org: + debug_print(f'It is too large to process.') + auto_upscale = self.detailing_opt.get('auto_upscale', True) + if not auto_upscale: + if self.background_color != 1: + cropped_mask = self.get_cropped_mask(image, boxes, box) + processed.append((cropped, (x1, y1), cropped_mask)) + continue + context.add_generation_param('BMAB_person_SKIP', f'It is too large to process.') + return image + scale = math.sqrt(area_org / (cropped.width * cropped.height)) + w, h = util.fix_size_by_scale(cropped.width, cropped.height, scale) + debug_print(f'Auto Scale x{scale} ({cropped.width},{cropped.height}) -> ({w},{h})') + if scale < 1.2: + debug_print(f'Scale {scale} has no effect. skip!!!!!') + if self.background_color != 1: + cropped_mask = self.get_cropped_mask(image, boxes, box) + processed.append((cropped, (x1, y1), cropped_mask)) + continue + context.add_generation_param('BMAB_person_SKIP', f'Scale {scale} has no effect. skip!!!!!') + return image + + cropped_mask = self.get_cropped_mask(image, boxes, box) + options = dict(mask=cropped_mask, **i2i_config) + options['width'] = w + options['height'] = h + options['inpaint_full_res'] = 1 + options['inpaint_full_res_padding'] = 32 + + if context.is_hires_fix(): + options['prompt'] = context.get_hires_prompt_by_index() + debug_print(options['prompt']) + + if self.checkpoint is not None and self.checkpoint != constants.checkpoint_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_model_checkpoint'] = self.checkpoint + options['override_settings'] = override_settings + if self.vae != constants.vae_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_vae'] = self.vae + options['override_settings'] = override_settings + + with VAEMethodOverride(): + img2img_result = process_img2img(context, cropped, options=options) + img2img_result = img2img_result.resize(cropped.size, resample=util.LANCZOS) + blur = ImageFilter.GaussianBlur(3) + cropped_mask = cropped_mask.filter(blur) + processed.append((img2img_result, (x1, y1), cropped_mask)) + + if self.background_color != 1: + enhancer = ImageEnhance.Color(image) + image = enhancer.enhance(self.background_color) + if self.background_blur > 3: + blur = ImageFilter.GaussianBlur(self.background_blur) + image = image.filter(blur) + + for img2img_result, pos, cropped_mask in processed: + image.paste(img2img_result, pos, mask=cropped_mask) + + return image + + def postprocess(self, context: Context, image: Image): + devices.torch_gc() diff --git a/3-bmab/sd_bmab/processors/postprocess/__init__.py b/3-bmab/sd_bmab/processors/postprocess/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0c109ce6449adc418ad62d349d06e885d22e7bf3 --- /dev/null +++ b/3-bmab/sd_bmab/processors/postprocess/__init__.py @@ -0,0 +1,6 @@ +from sd_bmab.processors.postprocess.upscaleafterprocess import AfterProcessUpscaler +from sd_bmab.processors.postprocess.upscalebeforeprocess import BeforeProcessUpscaler +from sd_bmab.processors.postprocess.inpaint import InpaintResize +from sd_bmab.processors.postprocess.inpaintlama import InpaintLamaResize +from sd_bmab.processors.postprocess.finalfilter import FinalFilter +from sd_bmab.processors.postprocess.watermark import Watermark diff --git a/3-bmab/sd_bmab/processors/postprocess/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/processors/postprocess/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..03eed7bdbe81974c8a09fe69f548ff2d38c69e0f Binary files /dev/null and b/3-bmab/sd_bmab/processors/postprocess/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/postprocess/__pycache__/finalfilter.cpython-310.pyc b/3-bmab/sd_bmab/processors/postprocess/__pycache__/finalfilter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5d4483d202fb3f23c9fb5c05a82f9a2131d5857 Binary files /dev/null and b/3-bmab/sd_bmab/processors/postprocess/__pycache__/finalfilter.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/postprocess/__pycache__/inpaint.cpython-310.pyc b/3-bmab/sd_bmab/processors/postprocess/__pycache__/inpaint.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5220749c2baf975e428c5f0db139ec6e62970165 Binary files /dev/null and b/3-bmab/sd_bmab/processors/postprocess/__pycache__/inpaint.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/postprocess/__pycache__/inpaintlama.cpython-310.pyc b/3-bmab/sd_bmab/processors/postprocess/__pycache__/inpaintlama.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e9fc71ee074d8ca242e05eae16b951ad4318480 Binary files /dev/null and b/3-bmab/sd_bmab/processors/postprocess/__pycache__/inpaintlama.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/postprocess/__pycache__/upscaleafterprocess.cpython-310.pyc b/3-bmab/sd_bmab/processors/postprocess/__pycache__/upscaleafterprocess.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8245a1ee6a459032974c246aac26d97b22c0326d Binary files /dev/null and b/3-bmab/sd_bmab/processors/postprocess/__pycache__/upscaleafterprocess.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/postprocess/__pycache__/upscalebeforeprocess.cpython-310.pyc b/3-bmab/sd_bmab/processors/postprocess/__pycache__/upscalebeforeprocess.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3cdd57146e48b573060854bc60d82b78f1517056 Binary files /dev/null and b/3-bmab/sd_bmab/processors/postprocess/__pycache__/upscalebeforeprocess.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/postprocess/__pycache__/watermark.cpython-310.pyc b/3-bmab/sd_bmab/processors/postprocess/__pycache__/watermark.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e9dc8c613e8d33b980137dc666c6a08bd55236f3 Binary files /dev/null and b/3-bmab/sd_bmab/processors/postprocess/__pycache__/watermark.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/postprocess/finalfilter.py b/3-bmab/sd_bmab/processors/postprocess/finalfilter.py new file mode 100644 index 0000000000000000000000000000000000000000..8fec123dbe567e759842868dab4c925fe7f4ef9d --- /dev/null +++ b/3-bmab/sd_bmab/processors/postprocess/finalfilter.py @@ -0,0 +1,27 @@ +from PIL import Image +from sd_bmab.base import Context, ProcessorBase +from sd_bmab.base import filter + + +class FinalFilter(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.filter_name = 'None' + self.filter = None + + def preprocess(self, context: Context, image: Image): + self.filter_name = context.args.get('postprocess_final_filter', self.filter_name) + return self.filter_name != 'None' + + def process(self, context: Context, image: Image): + self.filter = filter.get_filter(self.filter_name) + if self.filter is not None: + filter.preprocess_filter(self.filter, context, image) + image = filter.process_filter(self.filter, context, None, image) + filter.postprocess_filter(self.filter, context) + return image + + def finalprocess(self, context: Context, image: Image): + if self.filter is not None: + filter.finalprocess_filter(self.filter, context) + self.filter = None diff --git a/3-bmab/sd_bmab/processors/postprocess/inpaint.py b/3-bmab/sd_bmab/processors/postprocess/inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..df2f51689634f3523dd8e1de8255eb1b0b8ea821 --- /dev/null +++ b/3-bmab/sd_bmab/processors/postprocess/inpaint.py @@ -0,0 +1,100 @@ +from PIL import Image +from PIL import ImageDraw + +from modules import devices +from modules.processing import process_images, StableDiffusionProcessingImg2Img + +from sd_bmab import util +from sd_bmab.base import apply_extensions, build_img2img, Context, ProcessorBase + +from sd_bmab.util import debug_print +from sd_bmab.detectors import UltralyticsPersonDetector8n + + +class InpaintResize(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.resize_by_person_opt = None + self.value = 0, + self.denoising_strength = 0.4 + self.dilation = 4 + self.mode = 'Inpaint' + + def preprocess(self, context: Context, image: Image): + enabled = context.args.get('resize_by_person_enabled', False) + self.resize_by_person_opt = context.args.get('module_config', {}).get('resize_by_person_opt', {}) + self.value = self.resize_by_person_opt.get('scale', 0) + self.denoising_strength = self.resize_by_person_opt.get('denoising_strength', 0.4) + self.dilation = self.resize_by_person_opt.get('dilation', 0.4) + self.mode = self.resize_by_person_opt.get('mode', self.mode) + + return enabled and self.mode == 'Inpaint' + + def process(self, context: Context, image: Image): + debug_print('prepare detector') + detector = UltralyticsPersonDetector8n() + boxes, logits = detector.predict(context, image) + + org_size = image.size + debug_print('size', org_size) + + largest = (0, None) + for box in boxes: + x1, y1, x2, y2 = box + size = (x2 - x1) * (y2 - y1) + if size > largest[0]: + largest = (size, box) + + if largest[0] == 0: + return image + + x1, y1, x2, y2 = largest[1] + ratio = (y2 - y1) / image.height + debug_print('ratio', ratio) + debug_print('org_size', org_size) + if ratio <= self.value: + return image + image_ratio = ratio / self.value + if image_ratio < 1.0: + return image + debug_print('scale', image_ratio) + ratio = image_ratio + + org_size = image.size + dw, dh = org_size + + context.add_generation_param('BMAB controlnet mode', 'inpaint') + context.add_generation_param('BMAB resize by person ratio', '%.3s' % ratio) + + resized_width = int(dw / ratio) + resized_height = int(dh / ratio) + resized = image.resize((resized_width, resized_height), resample=util.LANCZOS) + context.sdprocessing.resize_mode = 2 + input_image = util.resize_image(2, resized, dw, dh) + + offset_x = int((dw - resized_width) / 2) + offset_y = dh - resized_height + + mask = Image.new('L', (dw, dh), 255) + dr = ImageDraw.Draw(mask, 'L') + dr.rectangle((offset_x, offset_y, offset_x + resized_width, offset_y + resized_height), fill=0) + mask = mask.resize(org_size, resample=util.LANCZOS) + mask = util.dilate_mask(mask, self.dilation) + + opt = dict(mask=mask, denoising_strength=self.denoising_strength) + i2i_param = build_img2img(context.sdprocessing, input_image, opt) + + img2img = StableDiffusionProcessingImg2Img(**i2i_param) + img2img.cached_c = [None, None] + img2img.cached_uc = [None, None] + img2img.scripts, img2img.script_args = apply_extensions(context.sdprocessing, cn_enabled=False) + + processed = process_images(img2img) + img = processed.images[0] + + img2img.close() + + return img + + def postprocess(self, context: Context, image: Image): + devices.torch_gc() diff --git a/3-bmab/sd_bmab/processors/postprocess/inpaintlama.py b/3-bmab/sd_bmab/processors/postprocess/inpaintlama.py new file mode 100644 index 0000000000000000000000000000000000000000..df34e091a80da12f8f880247d22783fb67821cc9 --- /dev/null +++ b/3-bmab/sd_bmab/processors/postprocess/inpaintlama.py @@ -0,0 +1,147 @@ +from PIL import Image +from PIL import ImageDraw + +from modules import shared +from modules import devices +from modules.processing import process_images, StableDiffusionProcessingImg2Img + +from sd_bmab import util +from sd_bmab.base import apply_extensions, build_img2img, Context, ProcessorBase +from sd_bmab.util import debug_print +from sd_bmab.detectors import UltralyticsPersonDetector8n + + +class InpaintLamaResize(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.resize_by_person_opt = None + self.value = 0, + self.denoising_strength = 0.4 + self.dilation = 4 + self.mode = 'ControlNet inpaint+lama' + self.enabled = False + + def use_controlnet(self, context: Context): + self.preprocess(context, None) + return self.enabled + + def preprocess(self, context: Context, image: Image): + self.enabled = context.args.get('resize_by_person_enabled', self.enabled) + self.resize_by_person_opt = context.args.get('module_config', {}).get('resize_by_person_opt', {}) + self.value = self.resize_by_person_opt.get('scale', 0) + self.denoising_strength = self.resize_by_person_opt.get('denoising_strength', 0.4) + self.dilation = self.resize_by_person_opt.get('dilation', 0.4) + self.mode = self.resize_by_person_opt.get('mode', self.mode) + + return self.enabled and self.mode == 'ControlNet inpaint+lama' + + @staticmethod + def get_inpaint_lama_args(image, mask): + cn_args = { + 'input_image': util.b64_encoding(image), + 'mask': util.b64_encoding(mask), + 'module': 'inpaint_only+lama', + 'model': shared.opts.bmab_cn_inpaint, + 'weight': 1, + "guidance_start": 0, + "guidance_end": 1, + 'resize_mode': 'Resize and Fill', + 'pixel_perfect': False, + 'control_mode': 'ControlNet is more important', + 'processor_res': 512, + 'threshold_a': 64, + 'threshold_b': 64, + } + return cn_args + + def get_ratio(self, context, img, p): + p.extra_generation_params['BMAB process_resize_by_person'] = self.value + + final_ratio = 1 + debug_print('prepare detector') + detector = UltralyticsPersonDetector8n() + boxes, logits = detector.predict(context, img) + + largest = (0, None) + for box in boxes: + x1, y1, x2, y2 = box + size = (x2 - x1) * (y2 - y1) + if size > largest[0]: + largest = (size, box) + + if largest[0] == 0: + return final_ratio + + x1, y1, x2, y2 = largest[1] + ratio = (y2 - y1) / img.height + debug_print('ratio', ratio) + + if ratio > self.value: + image_ratio = ratio / self.value + if image_ratio < 1.0: + return final_ratio + final_ratio = image_ratio + return final_ratio + + def resize_by_person_using_controlnet(self, context, p): + if not isinstance(p, StableDiffusionProcessingImg2Img): + return False + + cn_args = util.get_cn_args(p) + + debug_print('resize_by_person_enabled_inpaint', self.value) + img = p.init_images[0] + context.script.extra_image.append(img) + + ratio = self.get_ratio(context, img, p) + debug_print('image resize ratio', ratio) + org_size = img.size + dw, dh = org_size + + if ratio == 1: + return False + + p.extra_generation_params['BMAB controlnet mode'] = 'inpaint+lama' + p.extra_generation_params['BMAB resize by person ratio'] = '%.3s' % ratio + + resized_width = int(dw / ratio) + resized_height = int(dh / ratio) + resized = img.resize((resized_width, resized_height), resample=util.LANCZOS) + p.resize_mode = 2 + input_image = util.resize_image(2, resized, dw, dh) + p.init_images[0] = input_image + + offset_x = int((dw - resized_width) / 2) + offset_y = dh - resized_height + + mask = Image.new('L', (dw, dh), 255) + dr = ImageDraw.Draw(mask, 'L') + dr.rectangle((offset_x, offset_y, offset_x + resized_width, offset_y + resized_height), fill=0) + mask = mask.resize(org_size, resample=util.LANCZOS) + mask = util.dilate_mask(mask, self.dilation) + + cn_op_arg = self.get_inpaint_lama_args(input_image, mask) + idx = cn_args[0] + sc_args = list(p.script_args) + sc_args[idx] = cn_op_arg + p.script_args = tuple(sc_args) + return True + + def process(self, context: Context, image: Image): + opt = dict(denoising_strength=self.denoising_strength) + i2i_param = build_img2img(context.sdprocessing, image, opt) + + img2img = StableDiffusionProcessingImg2Img(**i2i_param) + img2img.cached_c = [None, None] + img2img.cached_uc = [None, None] + img2img.scripts, img2img.script_args = apply_extensions(context.sdprocessing, cn_enabled=True) + + if self.resize_by_person_using_controlnet(context, img2img): + processed = process_images(img2img) + image = processed.images[0] + img2img.close() + devices.torch_gc() + return image + + def postprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/processors/postprocess/upscaleafterprocess.py b/3-bmab/sd_bmab/processors/postprocess/upscaleafterprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..8d1804b52168f65b45fe3571a02098578b9af8e7 --- /dev/null +++ b/3-bmab/sd_bmab/processors/postprocess/upscaleafterprocess.py @@ -0,0 +1,39 @@ +from PIL import Image + +from modules import images + +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase +from sd_bmab.util import debug_print + + +class AfterProcessUpscaler(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.ratio = 1.5 + self.upscaler = 'None' + + def preprocess(self, context: Context, image: Image): + self.ratio = context.args['upscale_ratio'] + self.upscaler = context.args['upscaler_name'] + return context.args['upscale_enabled'] and context.args['detailing_after_upscale'] + + def process(self, context: Context, image: Image): + debug_print(f'Upscale ratio {self.ratio} Upscaler {self.upscaler}') + context.add_generation_param('BMAB_upscale_option', f'Upscale ratio {self.ratio} Upscaler {self.upscaler}') + + if self.ratio < 1.0 or self.ratio > 4.0: + debug_print('upscale out of range') + return image + image = image.convert('RGB') + context.add_generation_param('BMAB process upscale', self.ratio) + context.args['max_area'] = image.width * image.height + context.args['upscale_limit'] = True + + w = image.width + h = image.height + img = images.resize_image(0, image, int(w * self.ratio), int(h * self.ratio), self.upscaler) + return img.convert('RGB') + + def postprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/processors/postprocess/upscalebeforeprocess.py b/3-bmab/sd_bmab/processors/postprocess/upscalebeforeprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..1454ca5082bc7abe9de6663f87a0fe157dfc1e9b --- /dev/null +++ b/3-bmab/sd_bmab/processors/postprocess/upscalebeforeprocess.py @@ -0,0 +1,38 @@ +from PIL import Image + +from modules import images + +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase +from sd_bmab.util import debug_print + + +class BeforeProcessUpscaler(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.ratio = 1.5 + self.upscaler = 'None' + + def preprocess(self, context: Context, image: Image): + self.ratio = context.args['upscale_ratio'] + self.upscaler = context.args['upscaler_name'] + return context.args['upscale_enabled'] and not context.args['detailing_after_upscale'] + + def process(self, context: Context, image: Image): + debug_print(f'Upscale ratio {self.ratio} Upscaler {self.upscaler}') + context.add_generation_param('BMAB_upscale_option', f'Upscale ratio {self.ratio} Upscaler {self.upscaler}') + + if self.ratio < 1.0 or self.ratio > 4.0: + debug_print('upscale out of range') + return image + image = image.convert('RGB') + context.add_generation_param('BMAB process upscale', self.ratio) + context.args['upscale_limit'] = True + + w = image.width + h = image.height + img = images.resize_image(0, image, int(w * self.ratio), int(h * self.ratio), self.upscaler) + return img.convert('RGB') + + def postprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/processors/postprocess/watermark.py b/3-bmab/sd_bmab/processors/postprocess/watermark.py new file mode 100644 index 0000000000000000000000000000000000000000..956e4ce4af384645b30c3e190285d56903274e69 --- /dev/null +++ b/3-bmab/sd_bmab/processors/postprocess/watermark.py @@ -0,0 +1,131 @@ +import os +import sys +import glob + +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFont +from sd_bmab.base import Context, ProcessorBase + + +class Watermark(ProcessorBase): + alignment = { + 'top': lambda w, h, cx, cy: (w / 2 - cx / 2, 0), + 'top-right': lambda w, h, cx, cy: (w - cx, 0), + 'right': lambda w, h, cx, cy: (w - cx, h / 2 - cy / 2), + 'bottom-right': lambda w, h, cx, cy: (w - cx, h - cy), + 'bottom': lambda w, h, cx, cy: (w / 2 - cx / 2, h - cy), + 'bottom-left': lambda w, h, cx, cy: (0, h - cy), + 'left': lambda w, h, cx, cy: (0, h / 2 - cy / 2), + 'top-left': lambda w, h, cx, cy: (0, 0), + 'center': lambda w, h, cx, cy: (w / 2 - cx / 2, h / 2 - cy / 2), + } + + def __init__(self) -> None: + super().__init__() + self.enabled = False + self.font = None + self.alignment = 'bottom-left' + self.text_alignment = 'left' + self.rotate = 0 + self.color = '#000000' + self.background_color = '#00000000' + self.font_size = 12 + self.transparency = 100 + self.background_transparency = 0 + self.margin = 5 + self.text = '' + + def preprocess(self, context: Context, image: Image): + watermark_opt = context.args.get('module_config', {}).get('watermark', {}) + self.enabled = watermark_opt.get('enabled', self.enabled) + self.font = watermark_opt.get('font', self.font) + self.alignment = watermark_opt.get('alignment', self.alignment) + self.text_alignment = watermark_opt.get('text_alignment', self.text_alignment) + _rotate = watermark_opt.get('rotate', self.rotate) + self.rotate = int(_rotate) + self.color = watermark_opt.get('color', self.color) + self.background_color = watermark_opt.get('background_color', self.background_color) + self.font_size = watermark_opt.get('font_size', self.font_size) + self.transparency = watermark_opt.get('transparency', self.transparency) + self.background_transparency = watermark_opt.get('background_transparency', self.background_transparency) + self.margin = watermark_opt.get('margin', self.margin) + self.text = watermark_opt.get('text', self.text) + + return self.enabled + + def process(self, context: Context, image: Image): + + background_color = self.color_hex_to_rgb(self.background_color, int(255 * (self.background_transparency / 100))) + + if os.path.isfile(self.text): + cropped = Image.open(self.text) + else: + font = self.get_font(self.font, self.font_size) + color = self.color_hex_to_rgb(self.color, int(255 * (self.transparency / 100))) + + # 1st + base = Image.new('RGBA', image.size, background_color) + draw = ImageDraw.Draw(base) + bbox = draw.textbbox((0, 0), self.text, font=font) + draw.text((0, 0), self.text, font=font, fill=color, align=self.text_alignment) + cropped = base.crop(bbox) + + # 2st margin + base = Image.new('RGBA', (cropped.width + self.margin * 2, cropped.height + self.margin * 2), background_color) + base.paste(cropped, (self.margin, self.margin)) + + # 3rd rotate + base = base.rotate(self.rotate, expand=True) + + # 4th + image = image.convert('RGBA') + image2 = image.copy() + x, y = Watermark.alignment[self.alignment](image.width, image.height, base.width, base.height) + image2.paste(base, (int(x), int(y))) + image = Image.alpha_composite(image, image2) + + return image.convert('RGB') + + @staticmethod + def color_hex_to_rgb(value, transparency): + value = value.lstrip('#') + lv = len(value) + r, g, b = tuple(int(value[i:i + 2], 16) for i in range(0, lv, 2)) + return r, g, b, transparency + + @staticmethod + def list_fonts(): + if sys.platform == 'win32': + path = 'C:\\Windows\\Fonts\\*.ttf' + files = glob.glob(path) + return [os.path.basename(f) for f in files] + if sys.platform == 'darwin': + path = '/System/Library/Fonts/*' + files = glob.glob(path) + return [os.path.basename(f) for f in files] + if sys.platform == 'linux': + path = '/usr/share/fonts/*' + files = glob.glob(path) + fonts = [os.path.basename(f) for f in files] + if 'SAGEMAKER_INTERNAL_IMAGE_URI' in os.environ: + path = '/opt/conda/envs/sagemaker-distribution/fonts/*' + files = glob.glob(path) + fonts.extend([os.path.basename(f) for f in files]) + return fonts + return [''] + + @staticmethod + def get_font(font, size): + if sys.platform == 'win32': + path = f'C:\\Windows\\Fonts\\{font}' + return ImageFont.truetype(path, size, encoding="unic") + if sys.platform == 'darwin': + path = f'/System/Library/Fonts/{font}' + return ImageFont.truetype(path, size, encoding="unic") + if sys.platform == 'linux': + if 'SAGEMAKER_INTERNAL_IMAGE_URI' in os.environ: + path = f'/opt/conda/envs/sagemaker-distribution/fonts/{font}' + else: + path = f'/usr/share/fonts/{font}' + return ImageFont.truetype(path, size, encoding="unic") diff --git a/3-bmab/sd_bmab/processors/preprocess/__init__.py b/3-bmab/sd_bmab/processors/preprocess/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7048f5d1f902295f0547e82cb390642c69f635c3 --- /dev/null +++ b/3-bmab/sd_bmab/processors/preprocess/__init__.py @@ -0,0 +1,6 @@ +from sd_bmab.processors.preprocess.resize import ResizeIntermidiateBeforeUpscale, ResizeIntermidiateAfterUpsacle +from sd_bmab.processors.preprocess.refiner import RefinerPreprocessor +from sd_bmab.processors.preprocess.pretraining import PretrainingDetailer, PretrainingDetailerBeforeUpscale +from sd_bmab.processors.preprocess.resample import ResamplePreprocessor, ResamplePreprocessorBeforeUpscale +from sd_bmab.processors.preprocess.preprocessfilter import PreprocessFilter + diff --git a/3-bmab/sd_bmab/processors/preprocess/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/processors/preprocess/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fcb90d6829da77b66283e6abf403c4395c45c366 Binary files /dev/null and b/3-bmab/sd_bmab/processors/preprocess/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/preprocess/__pycache__/preprocessfilter.cpython-310.pyc b/3-bmab/sd_bmab/processors/preprocess/__pycache__/preprocessfilter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..351b9ef8c2cd5a274b62a2e881ec9fcdec8b0a24 Binary files /dev/null and b/3-bmab/sd_bmab/processors/preprocess/__pycache__/preprocessfilter.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/preprocess/__pycache__/pretraining.cpython-310.pyc b/3-bmab/sd_bmab/processors/preprocess/__pycache__/pretraining.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eb925d5a6912082dc3eec11e756b83530e6df115 Binary files /dev/null and b/3-bmab/sd_bmab/processors/preprocess/__pycache__/pretraining.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/preprocess/__pycache__/refiner.cpython-310.pyc b/3-bmab/sd_bmab/processors/preprocess/__pycache__/refiner.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ffbcbc4f4298efbc7e3cc4417edceadd1a6c2944 Binary files /dev/null and b/3-bmab/sd_bmab/processors/preprocess/__pycache__/refiner.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/preprocess/__pycache__/resample.cpython-310.pyc b/3-bmab/sd_bmab/processors/preprocess/__pycache__/resample.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56927638f9df6915a51574cd61d7fdd46e5a7b94 Binary files /dev/null and b/3-bmab/sd_bmab/processors/preprocess/__pycache__/resample.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/preprocess/__pycache__/resize.cpython-310.pyc b/3-bmab/sd_bmab/processors/preprocess/__pycache__/resize.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..950ed8488333d54132c9c0684fd3938bfe8f3acc Binary files /dev/null and b/3-bmab/sd_bmab/processors/preprocess/__pycache__/resize.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/preprocess/preprocessfilter.py b/3-bmab/sd_bmab/processors/preprocess/preprocessfilter.py new file mode 100644 index 0000000000000000000000000000000000000000..b27c35c66d87037b40d4ff26bc6d990903afd8d0 --- /dev/null +++ b/3-bmab/sd_bmab/processors/preprocess/preprocessfilter.py @@ -0,0 +1,27 @@ +from PIL import Image +from sd_bmab.base import Context, ProcessorBase +from sd_bmab.base import filter + + +class PreprocessFilter(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.filter_name = 'None' + self.filter = None + + def preprocess(self, context: Context, image: Image): + self.filter_name = context.args.get('txt2img_filter_hresfix_before_upscale', self.filter_name) + return not context.is_hires_fix() and self.filter_name != 'None' + + def process(self, context: Context, image: Image): + self.filter = filter.get_filter(self.filter_name) + if self.filter is not None: + filter.preprocess_filter(self.filter, context, image) + image = filter.process_filter(self.filter, context, None, image) + filter.postprocess_filter(self.filter, context) + return image + + def finalprocess(self, context: Context, image: Image): + if self.filter is not None: + filter.finalprocess_filter(self.filter, context) + self.filter = None diff --git a/3-bmab/sd_bmab/processors/preprocess/pretraining.py b/3-bmab/sd_bmab/processors/preprocess/pretraining.py new file mode 100644 index 0000000000000000000000000000000000000000..45168a08fa837ac47d396a217040d7a6cdffca10 --- /dev/null +++ b/3-bmab/sd_bmab/processors/preprocess/pretraining.py @@ -0,0 +1,179 @@ +import torch +import modules +from ultralytics import YOLO + +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFilter + +from modules import devices + +from sd_bmab import constants, util +from sd_bmab.base import filter +from sd_bmab.base import process_img2img, Context, ProcessorBase, VAEMethodOverride +from sd_bmab.util import debug_print + + +class PretrainingDetailer(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.pretraining_opt = {} + + self.enabled = False + self.hiresfix_enabled = False + self.checkpoint = constants.checkpoint_default + self.vae = constants.vae_default + self.pretraining_model = None + self.filter = 'None' + self.prompt = '' + self.negative_prompt = '' + self.sampler = constants.sampler_default + self.scheduler = constants.scheduler_default + self.steps = 20 + self.cfg_scale = 7 + self.denoising_strength = 0.75 + self.confidence = 0.35 + self.dilation = 4 + + def predict(self, context: Context, image: Image, ptmodel, confidence): + yolo = util.load_pretraining_model(ptmodel) + boxes = [] + confs = [] + load = torch.load + torch.load = modules.safe.unsafe_torch_load + try: + model = YOLO(yolo) + pred = model(image, conf=confidence, device='') + boxes = pred[0].boxes.xyxy.cpu().numpy() + boxes = boxes.tolist() + confs = pred[0].boxes.conf.tolist() + except: + pass + torch.load = load + devices.torch_gc() + + return boxes, confs + + def preprocess(self, context: Context, image: Image): + self.enabled = context.args['pretraining_enabled'] + self.pretraining_opt = context.args.get('module_config', {}).get('pretraining_opt', {}) + self.hiresfix_enabled = self.pretraining_opt.get('hiresfix_enabled', self.hiresfix_enabled) + self.checkpoint = self.pretraining_opt.get('checkpoint', self.checkpoint) + self.vae = self.pretraining_opt.get('vae', self.vae) + self.pretraining_model = self.pretraining_opt.get('pretraining_model', self.pretraining_model) + self.filter = self.pretraining_opt.get('filter', self.filter) + self.prompt = self.pretraining_opt.get('prompt', self.prompt) + self.negative_prompt = self.pretraining_opt.get('negative_prompt', self.negative_prompt) + self.sampler = self.pretraining_opt.get('sampler', self.sampler) + self.scheduler = self.pretraining_opt.get('scheduler', self.scheduler) + self.steps = self.pretraining_opt.get('steps', self.steps) + self.cfg_scale = self.pretraining_opt.get('cfg_scale', self.cfg_scale) + self.denoising_strength = self.pretraining_opt.get('denoising_strength', self.denoising_strength) + self.confidence = self.pretraining_opt.get('box_threshold', 0.35) + self.dilation = self.pretraining_opt.get('dilation', self.dilation) + + if self.enabled and self.hiresfix_enabled: + return False + return self.enabled + + def process(self, context: Context, image: Image): + bmab_filter = filter.get_filter(self.filter) + filter.preprocess_filter(bmab_filter, context, image) + image = self.process_pretraining(context, image) + image = filter.process_filter(bmab_filter, context, image, image) + filter.postprocess_filter(bmab_filter, context) + return image + + def process_pretraining(self, context: Context, image: Image): + boxes, logits = self.predict(context, image, self.pretraining_model, self.confidence) + + org_size = image.size + debug_print('size', org_size, len(boxes), len(logits)) + debug_print('sampler', context.sdprocessing.sampler_name if self.sampler == constants.sampler_default else self.sampler) + + pretraining_config = { + 'steps': self.steps, + 'cfg_scale': self.cfg_scale, + 'sampler_name': context.sdprocessing.sampler_name if self.sampler == constants.sampler_default else self.sampler, + 'scheduler': util.get_scheduler(context.sdprocessing) if self.scheduler == constants.scheduler_default else self.scheduler, + 'denoising_strength': self.denoising_strength, + 'width': image.width, + 'height': image.height, + } + + candidate = [] + for box, logit in zip(boxes, logits): + value = float(logit) + candidate.append((value, box, logit)) + candidate = sorted(candidate, key=lambda c: c[0], reverse=True) + + for idx, (size, box, logit) in enumerate(candidate): + context.add_job() + ''' + prompt = self.detailing_opt.get(f'prompt{idx}') + if prompt is not None: + + ''' + prompt = self.prompt + if prompt.find('#!org!#') >= 0: + current_prompt = context.get_prompt_by_index() + pretraining_config['prompt'] = prompt.replace('#!org!#', current_prompt) + debug_print('prompt for detection', pretraining_config['prompt']) + elif prompt != '': + pretraining_config['prompt'] = prompt + else: + pretraining_config['prompt'] = context.get_prompt_by_index() + + pretraining_config['negative_prompt'] = context.get_negative_prompt_by_index() + + debug_print('prompt', pretraining_config['prompt']) + debug_print('negative_prompt', pretraining_config['negative_prompt']) + + debug_print('render', float(logit)) + debug_print('delation', self.dilation) + + debug_print('box', box) + + detected_mask = Image.new('L', image.size, color=0) + dr = ImageDraw.Draw(detected_mask, 'L') + dr.rectangle(box, fill=255) + detected_mask = util.dilate_mask(detected_mask, self.dilation) + + seed, subseed = context.get_seeds() + options = dict(mask=detected_mask, seed=seed, subseed=subseed, **pretraining_config) + + if self.checkpoint != constants.checkpoint_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_model_checkpoint'] = self.checkpoint + options['override_settings'] = override_settings + if self.vae != constants.vae_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_vae'] = self.vae + options['override_settings'] = override_settings + + with VAEMethodOverride(): + img2img_imgage = process_img2img(context, image, options=options) + + x1, y1, x2, y2 = util.fix_box_size(box) + x1 -= int(detected_mask.width / 2) + x2 += int(detected_mask.width / 2) + y1 -= int(detected_mask.height / 2) + y2 += int(detected_mask.height / 2) + + detected_mask = Image.new('L', image.size, color=0) + dr = ImageDraw.Draw(detected_mask, 'L') + dr.rectangle((x1, y1, x2, y2), fill=255) + blur = ImageFilter.GaussianBlur(3) + mask = detected_mask.filter(blur) + image.paste(img2img_imgage, mask=mask) + return image + + def postprocess(self, context: Context, image: Image): + devices.torch_gc() + + +class PretrainingDetailerBeforeUpscale(PretrainingDetailer): + + def preprocess(self, context: Context, image: Image): + super().preprocess(context, image) + return self.enabled and self.hiresfix_enabled and (context.is_hires_fix() or context.is_img2img()) diff --git a/3-bmab/sd_bmab/processors/preprocess/refiner.py b/3-bmab/sd_bmab/processors/preprocess/refiner.py new file mode 100644 index 0000000000000000000000000000000000000000..9fc4c7e2f3952507bc871294209fb8e399995b7b --- /dev/null +++ b/3-bmab/sd_bmab/processors/preprocess/refiner.py @@ -0,0 +1,154 @@ +from PIL import Image + +from modules import devices +from modules import images + +from sd_bmab import util +from sd_bmab import constants +from sd_bmab.util import debug_print +from sd_bmab.base import process_img2img, Context, ProcessorBase, process_img2img_with_controlnet +from sd_bmab.processors.controlnet import LineartNoise + + +class RefinerPreprocessor(ProcessorBase): + def __init__(self) -> None: + super().__init__() + + self.refiner_opt = {} + self.enabled = False + self.checkpoint = constants.checkpoint_default + self.vae = constants.vae_default + self.keep_checkpoint = True + self.prompt = None + self.negative_prompt = None + self.sampler = None + self.scheduler = None + self.upscaler = None + self.steps = 20 + self.cfg_scale = 0.7 + self.denoising_strength = 0.75 + self.scale = 1 + self.width = 0 + self.height = 0 + + def preprocess(self, context: Context, image: Image): + self.enabled = context.args['refiner_enabled'] + self.refiner_opt = context.args.get('module_config', {}).get('refiner_opt', {}) + + self.checkpoint = self.refiner_opt.get('checkpoint', self.checkpoint) + self.vae = self.refiner_opt.get('vae', self.vae) + self.keep_checkpoint = self.refiner_opt.get('keep_checkpoint', True) + self.prompt = self.refiner_opt.get('prompt', '') + self.negative_prompt = self.refiner_opt.get('negative_prompt', '') + self.sampler = self.refiner_opt.get('sampler', None) + self.scheduler = self.refiner_opt.get('scheduler', None) + self.upscaler = self.refiner_opt.get('upscaler', None) + self.steps = self.refiner_opt.get('steps', None) + self.cfg_scale = self.refiner_opt.get('cfg_scale', None) + self.denoising_strength = self.refiner_opt.get('denoising_strength', None) + self.scale = self.refiner_opt.get('scale', None) + self.width = self.refiner_opt.get('width', None) + self.height = self.refiner_opt.get('height', None) + + if self.enabled: + context.refiner = self + + return self.enabled + + def process(self, context: Context, image: Image): + output_width = image.width + output_height = image.height + + if not (self.width == 0 and self.height == 0 and self.scale == 1): + if (self.width == 0 or self.height == 0) and self.scale != 1: + output_width = int(image.width * self.scale) + output_height = int(image.height * self.scale) + elif self.width != 0 and self.height != 0: + output_width = self.width + output_height = self.height + + if image.width != output_width or image.height != output_height: + LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS) + if self.upscaler == constants.fast_upscaler: + image = image.resize((output_width, output_height), resample=LANCZOS) + else: + image = images.resize_image(0, image, output_width, output_height, self.upscaler) + + if self.prompt == '': + self.prompt = context.get_prompt_by_index() + debug_print('prompt', self.prompt) + elif self.prompt.find('#!org!#') >= 0: + current_prompt = context.get_prompt_by_index() + self.prompt = self.prompt.replace('#!org!#', current_prompt) + debug_print('Prompt', self.prompt) + if self.negative_prompt == '': + self.negative_prompt = context.sdprocessing.negative_prompt + if self.sampler == constants.sampler_default: + self.sampler = context.sdprocessing.sampler_name + if self.scheduler == constants.scheduler_default: + self.scheduler = util.get_scheduler(context.sdprocessing) + + seed, subseed = context.get_seeds() + options = dict( + seed=seed, subseed=subseed, + denoising_strength=self.denoising_strength, + resize_mode=0, + mask=None, + mask_blur=4, + inpainting_fill=1, + inpaint_full_res=True, + inpaint_full_res_padding=32, + inpainting_mask_invert=0, + initial_noise_multiplier=1.0, + prompt=self.prompt, + negative_prompt=self.negative_prompt, + sampler_name=self.sampler, + scheduler=self.scheduler, + batch_size=1, + n_iter=1, + steps=self.steps, + cfg_scale=self.cfg_scale, + width=output_width, + height=output_height, + restore_faces=False, + do_not_save_samples=True, + do_not_save_grid=True, + ) + context.add_job() + + if self.checkpoint is not None and self.checkpoint != constants.checkpoint_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_model_checkpoint'] = self.checkpoint + options['override_settings'] = override_settings + if self.vae is not None and self.vae != constants.vae_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_vae'] = self.vae + options['override_settings'] = override_settings + + if LineartNoise.with_refiner(context): + ln = LineartNoise() + if ln.preprocess(context, None): + controlnet = ln.get_controlnet_args(context) + image = process_img2img_with_controlnet(context, image, options, controlnet=[controlnet]) + else: + image = process_img2img(context, image, options=options) + else: + image = process_img2img(context, image, options=options) + + if not self.keep_checkpoint: + debug_print('Rollback model') + context.restore_checkpoint() + + return image + + @staticmethod + def process_callback(self, context, img2img): + ctx = Context.newContext(self, img2img, context.args, 0) + ctx.refiner = self + ln = LineartNoise() + if ln.preprocess(ctx, None): + ln.process(ctx, None) + ln.postprocess(ctx, None) + + def postprocess(self, context: Context, image: Image): + devices.torch_gc() diff --git a/3-bmab/sd_bmab/processors/preprocess/resample.py b/3-bmab/sd_bmab/processors/preprocess/resample.py new file mode 100644 index 0000000000000000000000000000000000000000..04e3fcfd11f0f84c15cbe539a17f56cce1741bbd --- /dev/null +++ b/3-bmab/sd_bmab/processors/preprocess/resample.py @@ -0,0 +1,188 @@ +from PIL import Image + +from modules import shared +from modules import devices +from modules import images + +from sd_bmab import util +from sd_bmab import constants +from sd_bmab.base import filter +from sd_bmab.util import debug_print +from sd_bmab.base import process_txt2img, process_img2img_with_controlnet, Context, ProcessorBase + + +class ResamplePreprocessor(ProcessorBase): + def __init__(self) -> None: + super().__init__() + + self.resample_opt = {} + self.enabled = False + self.hiresfix_enabled = False + self.save_image = False + self.method = 'txt2img-1pass' + self.checkpoint = constants.checkpoint_default + self.vae = constants.vae_default + self.filter = 'None' + self.prompt = None + self.negative_prompt = None + self.sampler = None + self.scheduler = None + self.upscaler = None + self.steps = 20 + self.cfg_scale = 0.7 + self.denoising_strength = 0.75 + self.strength = 0.5 + self.begin = 0.0 + self.end = 1.0 + + self.base_sd_model = None + + def use_controlnet(self, context: Context): + return self.preprocess(context, None) + + def preprocess(self, context: Context, image: Image): + self.enabled = context.args['resample_enabled'] + self.resample_opt = context.args.get('module_config', {}).get('resample_opt', {}) + + self.hiresfix_enabled = self.resample_opt.get('hiresfix_enabled', self.hiresfix_enabled) + self.save_image = self.resample_opt.get('save_image', self.save_image) + self.method = self.resample_opt.get('method', self.method) + self.checkpoint = self.resample_opt.get('checkpoint', self.checkpoint) + self.vae = self.resample_opt.get('vae', self.vae) + self.filter = self.resample_opt.get('filter', self.filter) + self.prompt = self.resample_opt.get('prompt', self.prompt) + self.negative_prompt = self.resample_opt.get('negative_prompt', self.negative_prompt) + self.sampler = self.resample_opt.get('sampler', self.sampler) + self.scheduler = self.resample_opt.get('scheduler', self.scheduler) + self.upscaler = self.resample_opt.get('upscaler', self.upscaler) + self.steps = self.resample_opt.get('steps', self.steps) + self.cfg_scale = self.resample_opt.get('cfg_scale', self.cfg_scale) + self.denoising_strength = self.resample_opt.get('denoising_strength', self.denoising_strength) + self.strength = self.resample_opt.get('scale', self.strength) + self.begin = self.resample_opt.get('width', self.begin) + self.end = self.resample_opt.get('height', self.end) + + if self.enabled and self.hiresfix_enabled: + return False + return self.enabled + + @staticmethod + def get_resample_args(image, weight, begin, end): + cn_args = { + 'enabled': True, + 'image': util.b64_encoding(image.convert('RGB')), + 'module': 'tile_resample', + 'model': shared.opts.bmab_cn_tile_resample, + 'weight': weight, + "guidance_start": begin, + "guidance_end": end, + 'resize_mode': 'Just Resize', + 'pixel_perfect': False, + 'control_mode': 'ControlNet is more important', + 'processor_res': 512, + 'threshold_a': 1, + 'threshold_b': 1, + } + return cn_args + + def process(self, context: Context, image: Image): + if self.prompt == '': + self.prompt = context.get_prompt_by_index() + debug_print('prompt', self.prompt) + elif self.prompt.find('#!org!#') >= 0: + current_prompt = context.get_prompt_by_index() + self.prompt = self.prompt.replace('#!org!#', current_prompt) + debug_print('Prompt', self.prompt) + if self.negative_prompt == '': + self.negative_prompt = context.sdprocessing.negative_prompt + if self.checkpoint == constants.checkpoint_default: + self.checkpoint = context.sdprocessing.sd_model + if self.sampler == constants.sampler_default: + self.sampler = context.sdprocessing.sampler_name + if self.scheduler == constants.scheduler_default: + self.scheduler = util.get_scheduler(context.sdprocessing) + + bmab_filter = filter.get_filter(self.filter) + + seed, subseed = context.get_seeds() + options = dict( + seed=seed, subseed=subseed, + denoising_strength=self.denoising_strength, + prompt=self.prompt, + negative_prompt=self.negative_prompt, + sampler_name=self.sampler, + scheduler=self.scheduler, + steps=self.steps, + cfg_scale=self.cfg_scale, + ) + + if self.checkpoint != constants.checkpoint_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_model_checkpoint'] = self.checkpoint + options['override_settings'] = override_settings + if self.vae != constants.vae_default: + override_settings = options.get('override_settings', {}) + override_settings['sd_vae'] = self.vae + options['override_settings'] = override_settings + + filter.preprocess_filter(bmab_filter, context, image, options) + + context.add_job() + if self.save_image: + saved = image.copy() + images.save_image( + saved, context.sdprocessing.outpath_samples, '', + context.sdprocessing.all_seeds[context.index], context.sdprocessing.all_prompts[context.index], + shared.opts.samples_format, p=context.sdprocessing, suffix='-before-resample') + context.add_extra_image(saved) + cn_op_arg = self.get_resample_args(image, self.strength, self.begin, self.end) + + processed = image.copy() + if self.hiresfix_enabled: + if self.method == 'txt2img-1pass' or self.method == 'txt2img-2pass': + options['width'] = processed.width + options['height'] = processed.height + processed = process_txt2img(context, options=options, controlnet=[cn_op_arg]) + elif self.method == 'img2img-1pass': + del cn_op_arg['input_image'] + options['width'] = processed.width + options['height'] = processed.height + processed = process_img2img_with_controlnet(context, image, options=options, controlnet=[cn_op_arg]) + else: + if self.method == 'txt2img-1pass': + if context.is_hires_fix(): + if context.sdprocessing.hr_resize_x != 0 or context.sdprocessing.hr_resize_y != 0: + options['width'] = context.sdprocessing.hr_resize_x + options['height'] = context.sdprocessing.hr_resize_y + else: + options['width'] = int(context.sdprocessing.width * context.sdprocessing.hr_scale) + options['height'] = int(context.sdprocessing.height * context.sdprocessing.hr_scale) + processed = process_txt2img(context, options=options, controlnet=[cn_op_arg]) + elif self.method == 'txt2img-2pass': + if context.is_txtimg() and context.is_hires_fix(): + options.update(dict( + enable_hr=context.sdprocessing.enable_hr, + hr_scale=context.sdprocessing.hr_scale, + hr_resize_x=context.sdprocessing.hr_resize_x, + hr_resize_y=context.sdprocessing.hr_resize_y, + )) + processed = process_txt2img(context, options=options, controlnet=[cn_op_arg]) + elif self.method == 'img2img-1pass': + del cn_op_arg['input_image'] + processed = process_img2img_with_controlnet(context, image, options=options, controlnet=[cn_op_arg]) + + image = filter.process_filter(bmab_filter, context, image, processed) + filter.postprocess_filter(bmab_filter, context) + + return image + + def postprocess(self, context: Context, image: Image): + devices.torch_gc() + + +class ResamplePreprocessorBeforeUpscale(ResamplePreprocessor): + + def preprocess(self, context: Context, image: Image): + super().preprocess(context, image) + return self.enabled and self.hiresfix_enabled and (context.is_hires_fix() or context.is_img2img()) + diff --git a/3-bmab/sd_bmab/processors/preprocess/resize.py b/3-bmab/sd_bmab/processors/preprocess/resize.py new file mode 100644 index 0000000000000000000000000000000000000000..5f7a99d7347b1ed5ed87d1ed2d655fe5a84d34d0 --- /dev/null +++ b/3-bmab/sd_bmab/processors/preprocess/resize.py @@ -0,0 +1,222 @@ +from PIL import Image + +from modules import shared + +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase +from sd_bmab import util +from sd_bmab.base import filter +from sd_bmab.util import debug_print +from sd_bmab.detectors import UltralyticsPersonDetector8n +from sd_bmab.base import process_img2img, process_img2img_with_controlnet +from sd_bmab.external.lama import LamaInpainting + + +class ResizeIntermidiate(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.enabled = False + self.filter = 'None' + self.resize_by_person_opt = None + self.resize_by_person = True + self.method = 'stretching' + self.alignment = 'bottom' + self.value = 0 + self.denoising_strength = 0.75 + + def use_controlnet(self, context: Context): + self.preprocess(context, None) + if self.enabled and self.method in ['inpaint_only+lama', 'inpaint_only']: + return True + return False + + def preprocess(self, context: Context, image: Image): + self.enabled = context.args.get('resize_intermediate_enabled', False) + self.resize_by_person_opt = context.args.get('module_config', {}).get('resize_intermediate_opt', {}) + + self.filter = self.resize_by_person_opt.get('filter', self.filter) + self.resize_by_person = self.resize_by_person_opt.get('resize_by_person', self.resize_by_person) + self.method = self.resize_by_person_opt.get('method', self.method) + self.alignment = self.resize_by_person_opt.get('alignment', self.alignment) + self.value = self.resize_by_person_opt.get('scale', self.value) + self.denoising_strength = self.resize_by_person_opt.get('denoising_strength', self.denoising_strength) + + @staticmethod + def get_inpaint_lama_args(image, mask, module): + cn_args = { + 'enabled': True, + 'image': util.b64_encoding(image), + 'mask': util.b64_encoding(mask), + 'module': module, + 'model': shared.opts.bmab_cn_inpaint, + 'weight': 1, + "guidance_start": 0, + "guidance_end": 1, + 'resize_mode': 'Resize and Fill', + 'pixel_perfect': False, + 'control_mode': 'ControlNet is more important', + 'processor_res': 512, + 'threshold_a': 64, + 'threshold_b': 64, + } + return cn_args + + def process(self, context: Context, image: Image): + bmab_filter = filter.get_filter(self.filter) + filter.preprocess_filter(bmab_filter, context, image) + image = self.process_resize(context, image) + image = filter.process_filter(bmab_filter, context, image, image) + filter.postprocess_filter(bmab_filter, context) + return image + + def process_resize(self, context: Context, image: Image): + context.add_generation_param('BMAB process_resize_by_person', self.value) + org_size = image.size + + if self.resize_by_person: + debug_print('prepare detector') + detector = UltralyticsPersonDetector8n() + boxes, logits = detector.predict(context, image) + + debug_print('boxes', len(boxes)) + debug_print('logits', len(logits)) + debug_print('alignment', self.alignment) + + debug_print('size', org_size) + + if len(boxes) == 0: + return image + + largest = [] + for idx, box in enumerate(boxes): + x1, y1, x2, y2 = box + largest.append(((y2 - y1), box)) + debug_print(f'ratio {idx}', (y2 - y1) / image.height) + largest = sorted(largest, key=lambda c: c[0], reverse=True) + + x1, y1, x2, y2 = largest[0][1] + ratio = (y2 - y1) / image.height + debug_print('ratio', ratio) + debug_print('org_size', org_size) + if ratio > self.value: + image_ratio = ratio / self.value + if image_ratio < 1.0: + return image + else: + return image + else: + image_ratio = 1 / self.value + + context.add_generation_param('BMAB process_resize_by_person_ratio', '%.3s' % image_ratio) + + debug_print('image resize ratio', image_ratio) + stretching_image = util.resize_image_with_alignment(image, self.alignment, int(image.width * image_ratio), int(image.height * image_ratio)) + + if self.method == 'stretching': + # image = util.resize_image(2, image, int(image.width * image_ratio), int(image.height * image_ratio)) + debug_print('Stretching') + return image + elif self.method == 'inpaint': + mask = util.get_mask_with_alignment(image, self.alignment, int(image.width * image_ratio), int(image.height * image_ratio)) + debug_print('mask size', mask.size) + seed, subseed = context.get_seeds() + options = dict( + seed=seed, subseed=subseed, + denoising_strength=self.denoising_strength, + resize_mode=0, + mask=mask, + mask_blur=4, + inpainting_fill=1, + inpaint_full_res=True, + inpaint_full_res_padding=32, + inpainting_mask_invert=0, + initial_noise_multiplier=1.0, + prompt=context.get_prompt_by_index(), + negative_prompt=context.get_negative_prompt_by_index(), + batch_size=1, + n_iter=1, + restore_faces=False, + do_not_save_samples=True, + do_not_save_grid=True, + ) + context.add_job() + image = process_img2img(context, stretching_image, options=options) + return image + elif self.method == 'inpaint+lama': + mask = util.get_mask_with_alignment(image, self.alignment, int(image.width * image_ratio), int(image.height * image_ratio)) + lama = LamaInpainting() + stretching_image = lama(stretching_image, mask) + debug_print('mask size', mask.size) + seed, subseed = context.get_seeds() + options = dict( + seed=seed, subseed=subseed, + denoising_strength=self.denoising_strength, + resize_mode=0, + mask=mask, + mask_blur=4, + inpainting_fill=1, + inpaint_full_res=True, + inpaint_full_res_padding=32, + inpainting_mask_invert=0, + initial_noise_multiplier=1.0, + prompt=context.get_prompt_by_index(), + negative_prompt=context.get_negative_prompt_by_index(), + batch_size=1, + n_iter=1, + restore_faces=False, + do_not_save_samples=True, + do_not_save_grid=True, + ) + context.add_job() + image = process_img2img(context, stretching_image, options=options) + return image + elif self.method == 'inpaint_only+lama': + mask = util.get_mask_with_alignment(image, self.alignment, int(image.width * image_ratio), int(image.height * image_ratio)) + opt = dict(denoising_strength=self.denoising_strength) + debug_print('Stretching image size', stretching_image.size) + debug_print('Mask image size', mask.size) + cnarg = self.get_inpaint_lama_args(stretching_image, mask, 'inpaint_only+lama') + context.add_job() + image = process_img2img_with_controlnet(context, image, opt, controlnet=[cnarg]) + elif self.method == 'inpaint_only': + mask = util.get_mask_with_alignment(image, self.alignment, int(image.width * image_ratio), int(image.height * image_ratio)) + opt = dict(denoising_strength=self.denoising_strength) + debug_print('Stretching image size', stretching_image.size) + debug_print('Mask image size', mask.size) + cnarg = self.get_inpaint_lama_args(stretching_image, mask, 'inpaint_only') + context.add_job() + image = process_img2img_with_controlnet(context, image, opt, controlnet=[cnarg]) + return image + + def postprocess(self, context: Context, image: Image): + pass + + +class ResizeIntermidiateBeforeUpscale(ResizeIntermidiate): + + def preprocess(self, context: Context, image: Image): + super().preprocess(context, image) + + if 0.5 > self.value >= 1.0: + return False + if context.is_txtimg(): + if self.method == 'stretching': + return False + return self.enabled + else: + return self.enabled + + +class ResizeIntermidiateAfterUpsacle(ResizeIntermidiate): + + def preprocess(self, context: Context, image: Image): + super().preprocess(context, image) + + if 0.5 > self.value >= 1.0: + return False + if context.is_txtimg(): + if self.method != 'stretching': + return False + return self.enabled + else: + return self.enabled diff --git a/3-bmab/sd_bmab/processors/utils/__init__.py b/3-bmab/sd_bmab/processors/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ecd3d1083a890b34034f662f0d41cec20adca4a4 --- /dev/null +++ b/3-bmab/sd_bmab/processors/utils/__init__.py @@ -0,0 +1,3 @@ +from sd_bmab.processors.utils.filesave import AfterProcessFileSaver, BeforeProcessFileSaver +from sd_bmab.processors.utils.modelswitch import ApplyModel, RollbackModel +from sd_bmab.processors.utils.checkpoint import CheckPointChanger, CheckPointRestore diff --git a/3-bmab/sd_bmab/processors/utils/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/processors/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e4fcccf9e676b46689526f6c9be7eae8da2fb64 Binary files /dev/null and b/3-bmab/sd_bmab/processors/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/utils/__pycache__/checkpoint.cpython-310.pyc b/3-bmab/sd_bmab/processors/utils/__pycache__/checkpoint.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..508f530a1daa0846e71c594c54a5d2fd0325f475 Binary files /dev/null and b/3-bmab/sd_bmab/processors/utils/__pycache__/checkpoint.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/utils/__pycache__/filesave.cpython-310.pyc b/3-bmab/sd_bmab/processors/utils/__pycache__/filesave.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a2b84eb2b1e229b4370a015fbbc34f39c5b9b359 Binary files /dev/null and b/3-bmab/sd_bmab/processors/utils/__pycache__/filesave.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/utils/__pycache__/modelswitch.cpython-310.pyc b/3-bmab/sd_bmab/processors/utils/__pycache__/modelswitch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..140bc80ed49cff1410b6ae921f873cc7a4945ab5 Binary files /dev/null and b/3-bmab/sd_bmab/processors/utils/__pycache__/modelswitch.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/processors/utils/checkpoint.py b/3-bmab/sd_bmab/processors/utils/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..6ff0b4d44e7567e41f48d1a1aba3edc7c388eb2c --- /dev/null +++ b/3-bmab/sd_bmab/processors/utils/checkpoint.py @@ -0,0 +1,35 @@ +from PIL import Image + +from sd_bmab import constants +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase +from sd_bmab.util import debug_print + + +class CheckPointChanger(ProcessorBase): + def __init__(self) -> None: + super().__init__() + self.checkpoint = constants.checkpoint_default + self.vae = constants.vae_default + + def preprocess(self, context: Context, image: Image): + self.checkpoint = context.args['preprocess_checkpoint'] + self.vae = context.args['preprocess_vae'] + return not (self.checkpoint == constants.checkpoint_default and self.vae == constants.vae_default) + + def process(self, context: Context, image: Image): + debug_print('Change checkpoint', self.checkpoint, self.vae) + context.save_and_apply_checkpoint(self.checkpoint, self.vae) + return image + + +class CheckPointRestore(ProcessorBase): + def __init__(self) -> None: + super().__init__() + + def preprocess(self, context: Context, image: Image): + return True + + def process(self, context: Context, image: Image): + context.restore_checkpoint() + return image diff --git a/3-bmab/sd_bmab/processors/utils/filesave.py b/3-bmab/sd_bmab/processors/utils/filesave.py new file mode 100644 index 0000000000000000000000000000000000000000..38e5b8bf4657fa130d0cee14f684f4fda1cce660 --- /dev/null +++ b/3-bmab/sd_bmab/processors/utils/filesave.py @@ -0,0 +1,43 @@ +from PIL import Image + +from modules import shared +from modules import images + +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase + + +class BeforeProcessFileSaver(ProcessorBase): + def __init__(self) -> None: + super().__init__() + + def preprocess(self, context: Context, image: Image): + return shared.opts.bmab_save_image_before_process + + def process(self, context: Context, image: Image): + images.save_image( + image, context.sdprocessing.outpath_samples, '', + context.sdprocessing.all_seeds[context.index], context.sdprocessing.all_prompts[context.index], + shared.opts.samples_format, p=context.sdprocessing, suffix='-before-bmab') + return image + + def postprocess(self, context: Context, image: Image): + pass + + +class AfterProcessFileSaver(ProcessorBase): + def __init__(self) -> None: + super().__init__() + + def preprocess(self, context: Context, image: Image): + return shared.opts.bmab_save_image_after_process + + def process(self, context: Context, image: Image): + images.save_image( + image, context.sdprocessing.outpath_samples, '', + context.sdprocessing.all_seeds[context.index], context.sdprocessing.all_prompts[context.index], + shared.opts.samples_format, p=context.sdprocessing, suffix="-after-bmab") + return image + + def postprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/processors/utils/modelswitch.py b/3-bmab/sd_bmab/processors/utils/modelswitch.py new file mode 100644 index 0000000000000000000000000000000000000000..d3584fda49480b3022d3bc143a1dd33f38b00079 --- /dev/null +++ b/3-bmab/sd_bmab/processors/utils/modelswitch.py @@ -0,0 +1,55 @@ + +from PIL import Image + +from modules import shared, sd_models + +from sd_bmab.base.context import Context +from sd_bmab.base.processorbase import ProcessorBase + + +base_sd_model = None + + +def change_model(name): + if name is None: + return + info = sd_models.get_closet_checkpoint_match(name) + if info is None: + print(f'Unknown model: {name}') + return + sd_models.reload_model_weights(shared.sd_model, info) + + +class ApplyModel(ProcessorBase): + def __init__(self) -> None: + super().__init__() + + def preprocess(self, context: Context, image: Image): + return shared.opts.bmab_use_specific_model + + def process(self, context: Context, image: Image): + global base_sd_model + base_sd_model = shared.opts.data['sd_model_checkpoint'] + change_model(shared.opts.bmab_model) + return image + + def postprocess(self, context: Context, image: Image): + pass + + +class RollbackModel(ProcessorBase): + def __init__(self) -> None: + super().__init__() + + def preprocess(self, context: Context, image: Image): + return shared.opts.bmab_use_specific_model + + def process(self, context: Context, image: Image): + global base_sd_model + if base_sd_model is not None: + change_model(base_sd_model) + base_sd_model = None + return image + + def postprocess(self, context: Context, image: Image): + pass diff --git a/3-bmab/sd_bmab/sd_override/__init__.py b/3-bmab/sd_bmab/sd_override/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..33bc4cf74877e313d0b39b185357cf831eae17a8 --- /dev/null +++ b/3-bmab/sd_bmab/sd_override/__init__.py @@ -0,0 +1,13 @@ +from sd_bmab.sd_override.samper import override_samplers +from sd_bmab.sd_override.img2img import StableDiffusionProcessingImg2ImgOv +from sd_bmab.sd_override.txt2img import StableDiffusionProcessingTxt2ImgOv + +from modules import processing + +processing.StableDiffusionProcessingImg2Img = StableDiffusionProcessingImg2ImgOv +processing.StableDiffusionProcessingTxt2Img = StableDiffusionProcessingTxt2ImgOv + + +def override_sd_webui(): + # override_samplers() + pass diff --git a/3-bmab/sd_bmab/sd_override/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/sd_override/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d0a3a798bcdff7f03c40c0f5f70757e71a946d0d Binary files /dev/null and b/3-bmab/sd_bmab/sd_override/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/sd_override/__pycache__/img2img.cpython-310.pyc b/3-bmab/sd_bmab/sd_override/__pycache__/img2img.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb80037b5cc7045b128c9dac6cdcd6d771085975 Binary files /dev/null and b/3-bmab/sd_bmab/sd_override/__pycache__/img2img.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/sd_override/__pycache__/samper.cpython-310.pyc b/3-bmab/sd_bmab/sd_override/__pycache__/samper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3025004c898fccda8df50085b26810f21d058256 Binary files /dev/null and b/3-bmab/sd_bmab/sd_override/__pycache__/samper.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/sd_override/__pycache__/sd_models.cpython-310.pyc b/3-bmab/sd_bmab/sd_override/__pycache__/sd_models.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5851dc67dd72a5b6b1796ebc634338b669e9c982 Binary files /dev/null and b/3-bmab/sd_bmab/sd_override/__pycache__/sd_models.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/sd_override/__pycache__/txt2img.cpython-310.pyc b/3-bmab/sd_bmab/sd_override/__pycache__/txt2img.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ece001086c6b34877d83eed7867a62933dcfa42e Binary files /dev/null and b/3-bmab/sd_bmab/sd_override/__pycache__/txt2img.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/sd_override/img2img.py b/3-bmab/sd_bmab/sd_override/img2img.py new file mode 100644 index 0000000000000000000000000000000000000000..b1c9211f51aa74cc2673ec74e681b16542e3de4b --- /dev/null +++ b/3-bmab/sd_bmab/sd_override/img2img.py @@ -0,0 +1,21 @@ +from dataclasses import dataclass + +from modules.processing import StableDiffusionProcessingImg2Img + + +@dataclass(repr=False) +class StableDiffusionProcessingImg2ImgOv(StableDiffusionProcessingImg2Img): + extra_noise: int = 0 + + def __post_init__(self): + super().__post_init__() + + def init(self, all_prompts, all_seeds, all_subseeds): + ret = super().init(all_prompts, all_seeds, all_subseeds) + self.extra_generation_params['Hires prompt'] = '' + self.extra_generation_params['Hires negative prompt'] = '' + return ret + + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): + return super().sample(conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts) + diff --git a/3-bmab/sd_bmab/sd_override/samper.py b/3-bmab/sd_bmab/sd_override/samper.py new file mode 100644 index 0000000000000000000000000000000000000000..b3c4b40980c7347e08497a6d16c3c488329fb8d9 --- /dev/null +++ b/3-bmab/sd_bmab/sd_override/samper.py @@ -0,0 +1,93 @@ +import modules +import k_diffusion.sampling +from modules.sd_samplers_kdiffusion import KDiffusionSampler +from modules.sd_samplers import set_samplers +from modules.shared import opts, state +import inspect +from modules import sd_samplers_common +from modules.script_callbacks import ExtraNoiseParams, extra_noise_callback + + +class KDiffusionSamplerBMAB(KDiffusionSampler): + + def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): + steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps) + + sigmas = self.get_sigmas(p, steps) + sigma_sched = sigmas[steps - t_enc - 1:] + + xi = x + noise * sigma_sched[0] + + if opts.img2img_extra_noise > 0: + p.extra_generation_params["Extra noise"] = opts.img2img_extra_noise + extra_noise_params = ExtraNoiseParams(noise, x, xi) + extra_noise_callback(extra_noise_params) + noise = extra_noise_params.noise + xi += noise * opts.img2img_extra_noise + + if hasattr(p, 'extra_noise') and p.extra_noise > 0: + p.extra_generation_params["Extra noise"] = p.extra_noise + extra_noise_params = ExtraNoiseParams(noise, x, xi) + extra_noise_callback(extra_noise_params) + noise = extra_noise_params.noise + xi += noise * p.extra_noise + + extra_params_kwargs = self.initialize(p) + parameters = inspect.signature(self.func).parameters + + if 'sigma_min' in parameters: + ## last sigma is zero which isn't allowed by DPM Fast & Adaptive so taking value before last + extra_params_kwargs['sigma_min'] = sigma_sched[-2] + if 'sigma_max' in parameters: + extra_params_kwargs['sigma_max'] = sigma_sched[0] + if 'n' in parameters: + extra_params_kwargs['n'] = len(sigma_sched) - 1 + if 'sigma_sched' in parameters: + extra_params_kwargs['sigma_sched'] = sigma_sched + if 'sigmas' in parameters: + extra_params_kwargs['sigmas'] = sigma_sched + + if self.config.options.get('brownian_noise', False): + noise_sampler = self.create_noise_sampler(x, sigmas, p) + extra_params_kwargs['noise_sampler'] = noise_sampler + + if self.config.options.get('solver_type', None) == 'heun': + extra_params_kwargs['solver_type'] = 'heun' + + self.model_wrap_cfg.init_latent = x + self.last_latent = x + self.sampler_extra_args = { + 'cond': conditioning, + 'image_cond': image_conditioning, + 'uncond': unconditional_conditioning, + 'cond_scale': p.cfg_scale, + 's_min_uncond': self.s_min_uncond + } + + samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args=self.sampler_extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs)) + + if self.model_wrap_cfg.padded_cond_uncond: + p.extra_generation_params["Pad conds"] = True + + return samples + + +def override_samplers(): + modules.sd_samplers_kdiffusion.samplers_data_k_diffusion = [ + modules.sd_samplers_common.SamplerData(label, + lambda model, funcname=funcname: KDiffusionSamplerBMAB(funcname, model), + aliases, options) + for label, funcname, aliases, options in modules.sd_samplers_kdiffusion.samplers_k_diffusion + if callable(funcname) or hasattr(k_diffusion.sampling, funcname) + ] + if hasattr(modules, 'sd_samplers_timesteps'): + modules.sd_samplers.all_samplers = [ + *modules.sd_samplers_kdiffusion.samplers_data_k_diffusion, + *modules.sd_samplers_timesteps.samplers_data_timesteps, + ] + else: + modules.sd_samplers.all_samplers = [ + *modules.sd_samplers_kdiffusion.samplers_data_k_diffusion, + *modules.sd_samplers_compvis.samplers_data_compvis, + ] + modules.sd_samplers.all_samplers_map = {x.name: x for x in modules.sd_samplers.all_samplers} diff --git a/3-bmab/sd_bmab/sd_override/sd_models.py b/3-bmab/sd_bmab/sd_override/sd_models.py new file mode 100644 index 0000000000000000000000000000000000000000..51dae64175f3f20a0610019a6e6bad1832389116 --- /dev/null +++ b/3-bmab/sd_bmab/sd_override/sd_models.py @@ -0,0 +1,45 @@ +import os +import sys +from modules import sd_models +from modules import shared + + +def bmab_list_models(): + sd_models.checkpoints_list.clear() + sd_models.checkpoint_aliases.clear() + + cmd_ckpt = shared.cmd_opts.ckpt + if shared.cmd_opts.no_download_sd_model or cmd_ckpt != shared.sd_model_file or os.path.exists(cmd_ckpt): + model_url = None + else: + model_url = f"{shared.hf_endpoint}/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors" + + model_list = sd_models.modelloader.load_models(model_path=sd_models.model_path, model_url=model_url, command_path=shared.cmd_opts.ckpt_dir, ext_filter=[".ckpt", ".safetensors"], download_name="v1-5-pruned-emaonly.safetensors", ext_blacklist=[".vae.ckpt", ".vae.safetensors"]) + + second_path = shared.opts.data.get('bmab_additional_checkpoint_path', '') + print(f'second path {second_path}') + if os.path.exists(second_path): + print(f'load checkpoint from {second_path}') + model_list_seconds = sd_models.modelloader.load_models(model_path=second_path, model_url=model_url, command_path=shared.cmd_opts.ckpt_dir, ext_filter=[".ckpt", ".safetensors"], download_name="v1-5-pruned-emaonly.safetensors", ext_blacklist=[".vae.ckpt", ".vae.safetensors"]) + length = len(sd_models.model_path) + temp = [(x[length:], x) for x in model_list] + length = len(second_path) + temp.extend([(x[length:], x) for x in model_list_seconds]) + model_list = [x[1] for x in sorted(temp, key=lambda x: x[0])] + + if os.path.exists(cmd_ckpt): + checkpoint_info = sd_models.CheckpointInfo(cmd_ckpt) + checkpoint_info.register() + shared.opts.data['sd_model_checkpoint'] = checkpoint_info.title + elif cmd_ckpt is not None and cmd_ckpt != shared.default_sd_model_file: + print(f"Checkpoint in --ckpt argument not found (Possible it was moved to {sd_models.model_path}: {cmd_ckpt}", file=sys.stderr) + + for filename in model_list: + checkpoint_info = sd_models.CheckpointInfo(filename) + checkpoint_info.register() + + +def override(): + bmab_list_models() + sd_models.list_models = bmab_list_models + diff --git a/3-bmab/sd_bmab/sd_override/txt2img.py b/3-bmab/sd_bmab/sd_override/txt2img.py new file mode 100644 index 0000000000000000000000000000000000000000..f7da819956373ab64e1dbb1f7cc54e797ef35ff6 --- /dev/null +++ b/3-bmab/sd_bmab/sd_override/txt2img.py @@ -0,0 +1,319 @@ +import torch +import numpy as np +from PIL import Image +from dataclasses import dataclass, field + +from modules import processing +from modules import shared +from modules import sd_samplers +from modules import images +from modules import devices +from modules import extra_networks +from modules import sd_models +from modules import rng +from modules.shared import opts +from modules.processing import StableDiffusionProcessingTxt2Img +from modules.sd_samplers_common import images_tensor_to_samples, decode_first_stage, approximation_indexes + +from sd_bmab.base import filter +from sd_bmab.external.kohyahiresfix import KohyaHiresFixPreprocessor + + +@dataclass(repr=False) +class StableDiffusionProcessingTxt2ImgOv(StableDiffusionProcessingTxt2Img): + + enable_progressive_growing: bool = field(default=False, init=False) + progressive_growing_min_scale: float = field(default=0.25, init=False) + progressive_growing_max_scale: float = field(default=1.0, init=False) + progressive_growing_steps: int = field(default=4, init=False) + progressive_growing_refinement: bool = field(default=True, init=False) + + def __post_init__(self): + super().__post_init__() + + self.enable_progressive_growing = getattr(self, 'enable_progressive_growing', False) + self.progressive_growing_min_scale = getattr(self, 'progressive_growing_min_scale', 0.25) + self.progressive_growing_max_scale = getattr(self, 'progressive_growing_max_scale', 1.0) + self.progressive_growing_steps = getattr(self, 'progressive_growing_steps', 4) + self.progressive_growing_refinement = getattr(self, 'progressive_growing_refinement', True) + + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): + self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model) + + if self.enable_progressive_growing: + return self.sample_progressive(conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts) + + # Стандартный процесс генерации + x = self.rng.next() + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) + del x + + if not self.enable_hr: + return samples + + decoded_samples = torch.stack(processing.decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32) + devices.torch_gc() + + return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts) + + def __post_init__(self): + super().__post_init__() + self.context = None + self.extra_noise = 0 + self.initial_noise_multiplier = opts.initial_noise_multiplier + + def init(self, all_prompts, all_seeds, all_subseeds): + ret = super().init(all_prompts, all_seeds, all_subseeds) + self.extra_generation_params['Hires prompt'] = '' + self.extra_generation_params['Hires negative prompt'] = '' + return ret + +def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, seed_resize_from_h=0, seed_resize_from_w=0, p=None): + g = rng.ImageRNG(shape, seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=seed_resize_from_h, seed_resize_from_w=seed_resize_from_w) + + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): + with KohyaHiresFixPreprocessor(self): + + self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model) + + x = self.rng.next() + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) + del x + + if not self.enable_hr: + return samples + + if self.latent_scale_mode is None: + decoded_samples = torch.stack(processing.decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32) + else: + decoded_samples = None + + devices.torch_gc() + + return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts) + + def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): + self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model) + + if self.enable_progressive_growing: + return self.sample_progressive(conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts) + + if self.firstpass_image is not None and self.enable_hr: + # here we don't need to generate image, we just take self.firstpass_image and prepare it for hires fix + + if self.latent_scale_mode is None: + image = np.array(self.firstpass_image).astype(np.float32) / 255.0 * 2.0 - 1.0 + image = np.moveaxis(image, 2, 0) + + samples = None + decoded_samples = torch.asarray(np.expand_dims(image, 0)) + + else: + image = np.array(self.firstpass_image).astype(np.float32) / 255.0 + image = np.moveaxis(image, 2, 0) + image = torch.from_numpy(np.expand_dims(image, axis=0)) + image = image.to(shared.device, dtype=devices.dtype_vae) + + if opts.sd_vae_encode_method != 'Full': + self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method + + samples = images_tensor_to_samples(image, approximation_indexes.get(opts.sd_vae_encode_method), self.sd_model) + decoded_samples = None + devices.torch_gc() + + else: + # here we generate an image normally + + x = self.rng.next() + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) + del x + + if not self.enable_hr: + return samples + + devices.torch_gc() + + if self.latent_scale_mode is None: + decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32) + else: + decoded_samples = None + + with sd_models.SkipWritingToConfig(): + sd_models.reload_model_weights(info=self.hr_checkpoint_info) + + return self.sample_hr_pass(samples, decoded_samples, seeds, subseeds, subseed_strength, prompts) + + def sample_progressive(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): + is_sdxl = getattr(self.sd_model, 'is_sdxl', False) + + if is_sdxl: + min_scale = max(0.5, self.progressive_growing_min_scale) + else: + min_scale = self.progressive_growing_min_scale + + resolution_steps = np.linspace(min_scale, self.progressive_growing_max_scale, self.progressive_growing_steps) + + initial_width = max(512 if is_sdxl else 64, int(self.width * resolution_steps[0])) + initial_height = max(512 if is_sdxl else 64, int(self.height * resolution_steps[0])) + + x = create_random_tensors((opt_C, initial_height // opt_f, initial_width // opt_f), seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) + + for i in range(1, len(resolution_steps)): + target_width = int(self.width * resolution_steps[i]) + target_height = int(self.height * resolution_steps[i]) + + if is_sdxl: + target_width = max(512, min(1536, target_width)) + target_height = max(512, min(1536, target_height)) + + samples = torch.nn.functional.interpolate(samples, size=(target_height // opt_f, target_width // opt_f), mode='bicubic', align_corners=False) + + if self.progressive_growing_refinement: + steps_for_refinement = self.steps // len(resolution_steps) + noise = create_random_tensors(samples.shape[1:], seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self) + decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True) + decoded_samples = torch.stack(decoded_samples).float() + decoded_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0) + self.image_conditioning = self.img2img_image_conditioning(decoded_samples * 2 - 1, samples) + + samples = self.sampler.sample_img2img( + self, + samples, + noise, + conditioning, + unconditional_conditioning, + steps=steps_for_refinement, + image_conditioning=self.image_conditioning + ) + + return samples + + def sample_hr_pass(self, samples, decoded_samples, seeds, subseeds, subseed_strength, prompts): + if shared.state.interrupted: + return samples + + self.is_hr_pass = True + + target_width = self.hr_upscale_to_x + target_height = self.hr_upscale_to_y + + def save_intermediate(image, index): + """saves image before applying hires fix, if enabled in options; takes as an argument either an image or batch with latent space images""" + + if not self.save_samples() or not opts.save_images_before_highres_fix: + return + + if not isinstance(image, Image.Image): + image = sd_samplers.sample_to_image(image, index, approximation=0) + + info = processing.create_infotext(self, self.all_prompts, self.all_seeds, self.all_subseeds, [], iteration=self.iteration, position_in_batch=index) + images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, info=info, p=self, suffix="-before-highres-fix") + + img2img_sampler_name = self.hr_sampler_name or self.sampler_name + + self.sampler = sd_samplers.create_sampler(img2img_sampler_name, self.sd_model) + + if self.latent_scale_mode is not None: + for i in range(samples.shape[0]): + save_intermediate(samples, i) + + samples = torch.nn.functional.interpolate(samples, size=(target_height // processing.opt_f, target_width // processing.opt_f), mode=self.latent_scale_mode["mode"], antialias=self.latent_scale_mode["antialias"]) + + # Avoid making the inpainting conditioning unless necessary as + # this does need some extra compute to decode / encode the image again. + if getattr(self, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) < 1.0: + image_conditioning = self.img2img_image_conditioning(decode_first_stage(self.sd_model, samples), samples) + else: + image_conditioning = self.txt2img_image_conditioning(samples) + else: + lowres_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0) + + batch_images = [] + for i, x_sample in enumerate(lowres_samples): + x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) + x_sample = x_sample.astype(np.uint8) + image = Image.fromarray(x_sample) + + save_intermediate(image, i) + + if self.context is not None and self.context.args is not None: + filter_name = self.context.args['txt2img_filter_hresfix_before_upscale'] + filter1 = filter.get_filter(filter_name) + + self.context.index = self.iteration * self.batch_size + i + filter.preprocess_filter(filter1, self.context, image) + image = filter.process_filter(filter1, self.context, None, image, sdprocess=self) + filter.postprocess_filter(filter1, self.context) + + if hasattr(self.context.script, 'resize_image'): + resized = self.context.script.resize_image(self.context, 0, i, image, target_width, target_height, self.hr_upscaler) + else: + resized = images.resize_image(0, image, target_width, target_height, upscaler_name=self.hr_upscaler) + + filter_name = self.context.args['txt2img_filter_hresfix_after_upscale'] + filter2 = filter.get_filter(filter_name) + filter.preprocess_filter(filter2, self.context, image) + image = filter.process_filter(filter2, self.context, image, resized, sdprocess=self) + filter.postprocess_filter(filter2, self.context) + else: + if (self.context is not None and self.context.script is not None) and hasattr(self.context.script, 'resize_image'): + image = self.context.script.resize_image(self.context, 0, i, image, target_width, target_height, self.hr_upscaler) + else: + image = images.resize_image(0, image, target_width, target_height, upscaler_name=self.hr_upscaler) + + image = np.array(image).astype(np.float32) / 255.0 + image = np.moveaxis(image, 2, 0) + batch_images.append(image) + + decoded_samples = torch.from_numpy(np.array(batch_images)) + decoded_samples = decoded_samples.to(shared.device, dtype=devices.dtype_vae) + + if opts.sd_vae_encode_method != 'Full': + self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method + samples = images_tensor_to_samples(decoded_samples, approximation_indexes.get(opts.sd_vae_encode_method)) + + image_conditioning = self.img2img_image_conditioning(decoded_samples, samples) + + shared.state.nextjob() + + samples = samples[:, :, self.truncate_y//2:samples.shape[2]-(self.truncate_y+1)//2, self.truncate_x//2:samples.shape[3]-(self.truncate_x+1)//2] + + self.rng = rng.ImageRNG(samples.shape[1:], self.seeds, subseeds=self.subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w) + noise = self.rng.next() + + # GC now before running the next img2img to prevent running out of memory + devices.torch_gc() + + if not self.disable_extra_networks: + with devices.autocast(): + extra_networks.activate(self, self.hr_extra_network_data) + + with devices.autocast(): + self.calculate_hr_conds() + + sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio(for_hr=True)) + + if self.scripts is not None: + self.scripts.before_hr(self) + + if self.initial_noise_multiplier != 1.0: + self.extra_generation_params["Noise multiplier"] = self.initial_noise_multiplier + noise *= self.initial_noise_multiplier + + with sd_models.SkipWritingToConfig(): + sd_models.reload_model_weights(info=self.hr_checkpoint_info) + + samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning) + + sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio()) + + self.sampler = None + devices.torch_gc() + + decoded_samples = processing.decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True) + + self.is_hr_pass = False + + return decoded_samples + diff --git a/3-bmab/sd_bmab/ui.py b/3-bmab/sd_bmab/ui.py new file mode 100644 index 0000000000000000000000000000000000000000..ceb621b493a6e8f06bd274df17d8b104ab259b2d --- /dev/null +++ b/3-bmab/sd_bmab/ui.py @@ -0,0 +1,815 @@ +import random +import gradio as gr + +from modules import sd_models, sd_vae, shared, extras, images +from modules.ui_components import ToolButton, FormRow, FormColumn, InputAccordion + +from sd_bmab import constants +from sd_bmab import util +from sd_bmab import detectors +from sd_bmab import parameters +from sd_bmab.base import context +from sd_bmab.base import filter +from sd_bmab import pipeline +from sd_bmab import masking +from sd_bmab.util import debug_print, installhelper +from sd_bmab.processors.controlnet import Openpose, IpAdapter +from sd_bmab.processors.postprocess import Watermark +from sd_bmab.processors.basic import ICLight + + +bmab_version = 'v24.05.12.0' + +final_images = [] +last_process = None +bmab_script = None +gallery_select_index = 0 + +def create_ui(bscript, is_img2img): + class ListOv(list): + def __iadd__(self, x): + self.append(x) + return self + + ui_checkpoints = [constants.checkpoint_default] + ui_checkpoints.extend([str(x) for x in sd_models.checkpoints_list.keys()]) + ui_vaes = [constants.vae_default] + ui_vaes.extend([str(x) for x in sd_vae.vae_dict.keys()]) + + elem = ListOv() + with FormRow(): + with InputAccordion(label=f'BMAB', value=False) as Enable_BMAB: + elem += Enable_BMAB + btn_stop = ToolButton('⏹️', visible=True, interactive=True, tooltip='stop generation', elem_id='bmab_stop_generation') + + with gr.Group(): + with gr.Accordion(f'BMAB Preprocessor', open=False): + with gr.Tab('Context', id='bmab_context', elem_id='bmab_context_tabs'): + with gr.Tab('Generic'): + with FormRow(): + with FormColumn(), FormRow(): + checkpoint_models = gr.Dropdown(label='CheckPoint', visible=True, value=ui_checkpoints[0], choices=ui_checkpoints) + elem += checkpoint_models + with FormColumn(), FormRow(): + vaes_models = gr.Dropdown(label='SD VAE', visible=True, value=ui_vaes[0], choices=ui_vaes) + elem += vaes_models + + with FormRow(): + with FormColumn(): + with FormRow(): + gr.Markdown(constants.checkpoint_description) + with FormRow(): + elem += gr.Slider(minimum=0, maximum=1.5, value=1, step=0.001, label='txt2img noise multiplier for hires.fix (EXPERIMENTAL)', elem_id='bmab_txt2img_noise_multiplier') + with FormRow(): + elem += gr.Slider(minimum=0, maximum=1, value=0, step=0.01, label='txt2img extra noise multiplier for hires.fix (EXPERIMENTAL)', elem_id='bmab_txt2img_extra_noise_multiplier') + with FormColumn(): + with FormRow(): + dd_preprocess_filter = gr.Dropdown(label='Preprocess filter', visible=True, value=filter.filters[0], choices=filter.filters) + elem += dd_preprocess_filter + with FormRow(): + dd_hiresfix_filter1 = gr.Dropdown(label='Hires.fix filter before upscale', visible=True, value=filter.filters[0], choices=filter.filters) + elem += dd_hiresfix_filter1 + with FormRow(): + dd_hiresfix_filter2 = gr.Dropdown(label='Hires.fix filter after upscale', visible=True, value=filter.filters[0], choices=filter.filters) + elem += dd_hiresfix_filter2 + + with gr.Tab('Kohya Hires.fix'): + with FormRow(): + with FormColumn(): + elem += gr.Checkbox(label='Enable Kohya hires.fix', value=False) + with FormRow(): + gr.HTML(constants.kohya_hiresfix_description) + with FormRow(): + elem += gr.Slider(minimum=0, maximum=0.5, step=0.01, label="Stop at, first", value=0.15) + elem += gr.Slider(minimum=1, maximum=10, step=1, label="Depth, first", value=3) + with FormRow(): + elem += gr.Slider(minimum=0, maximum=0.5, step=0.01, label="Stop at, second", value=0.4) + elem += gr.Slider(minimum=1, maximum=10, step=1, label="Depth, second", value=4) + with FormRow(): + elem += gr.Dropdown(['bicubic', 'bilinear', 'nearest', 'nearest-exact'], label='Layer scaler', value='bicubic') + elem += gr.Slider(minimum=0.1, maximum=1.0, step=0.05, label="Downsampling scale", value=0.5) + elem += gr.Slider(minimum=1.0, maximum=4.0, step=0.1, label="Upsampling scale", value=2.0) + with FormRow(): + elem += gr.Checkbox(label="Smooth scaling", value=True) + elem += gr.Checkbox(label="Early upsampling", value=False) + elem += gr.Checkbox(label='Disable for additional passes', value=True) + with gr.Tab('Resample', id='bmab_resample', elem_id='bmab_resample_tabs'): + with FormRow(): + with FormColumn(): + elem += gr.Checkbox(label='Enable self resample', value=False) + with FormColumn(): + elem += gr.Checkbox(label='Save image before processing', value=False) + with FormRow(): + elem += gr.Checkbox(label='Enable resample before upscale', value=False) + with FormRow(): + with FormColumn(): + with FormRow(): + resample_models = gr.Dropdown(label='CheckPoint', visible=True, value=ui_checkpoints[0], choices=ui_checkpoints) + elem += resample_models + with FormColumn(): + with FormRow(): + resample_vaes = gr.Dropdown(label='SD VAE', visible=True, value=ui_vaes[0], choices=ui_vaes) + elem += resample_vaes + with FormRow(): + with FormColumn(min_width=100): + methods = ['txt2img-1pass', 'txt2img-2pass', 'img2img-1pass'] + elem += gr.Dropdown(label='Resample method', visible=True, value=methods[0], choices=methods) + with FormColumn(): + dd_resample_filter = gr.Dropdown(label='Resample filter', visible=True, value=filter.filters[0], choices=filter.filters) + elem += dd_resample_filter + with FormRow(): + elem += gr.Textbox(placeholder='prompt. if empty, use main prompt', lines=3, visible=True, value='', label='Resample prompt') + with FormRow(): + elem += gr.Textbox(placeholder='negative prompt. if empty, use main negative prompt', lines=3, visible=True, value='', label='Resample negative prompt') + with FormRow(): + with FormColumn(min_width=100): + with FormRow(): + with FormColumn(min_width=50): + asamplers = [constants.sampler_default] + asamplers.extend([x.name for x in shared.list_samplers()]) + elem += gr.Dropdown(label='Sampling method', visible=True, value=asamplers[0], choices=asamplers) + with FormColumn(min_width=50): + ascheduler = util.get_scueduler_list() + elem += gr.Dropdown(label='Scheduler', elem_id="resample_scheduler", choices=ascheduler, value=ascheduler[0]) + with FormColumn(min_width=100): + upscalers = [constants.fast_upscaler] + upscalers.extend([x.name for x in shared.sd_upscalers]) + elem += gr.Dropdown(label='Upscaler', visible=True, value=upscalers[0], choices=upscalers) + with FormRow(): + with FormColumn(min_width=100): + elem += gr.Slider(minimum=1, maximum=150, value=20, step=1, label='Resample Sampling Steps', elem_id='bmab_resample_steps') + elem += gr.Slider(minimum=1, maximum=30, value=7, step=0.5, label='Resample CFG Scale', elem_id='bmab_resample_cfg_scale') + elem += gr.Slider(minimum=0, maximum=1, value=0.75, step=0.01, label='Resample Denoising Strength', elem_id='bmab_resample_denoising') + elem += gr.Slider(minimum=0.0, maximum=2, value=0.5, step=0.05, label='Resample strength', elem_id='bmab_resample_cn_strength') + elem += gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label='Resample begin', elem_id='bmab_resample_cn_begin') + elem += gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.01, label='Resample end', elem_id='bmab_resample_cn_end') + with gr.Tab('Pretraining', id='bmab_pretraining', elem_id='bmab_pretraining_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Enable pretraining detailer', value=False) + with FormRow(): + elem += gr.Checkbox(label='Enable pretraining before upscale', value=False) + with FormRow(): + with FormColumn(): + with FormRow(): + pretraining_checkpoint_models = gr.Dropdown(label='CheckPoint', visible=True, value=ui_checkpoints[0], choices=ui_checkpoints) + elem += pretraining_checkpoint_models + with FormColumn(): + with FormRow(): + pretraining_vaes_models = gr.Dropdown(label='SD VAE', visible=True, value=ui_vaes[0], choices=ui_vaes) + elem += pretraining_vaes_models + with FormRow(): + with FormColumn(min_width=100): + with FormRow(): + models = ['Select Model'] + models.extend(util.list_pretraining_models()) + pretraining_models = gr.Dropdown(label='Pretraining Model', visible=True, value=models[0], choices=models, elem_id='bmab_pretraining_models') + elem += pretraining_models + with FormColumn(min_width=100): + dd_pretraining_filter = gr.Dropdown(label='Pretraining filter', visible=True, value=filter.filters[0], choices=filter.filters) + elem += dd_pretraining_filter + with FormRow(): + elem += gr.Textbox(placeholder='prompt. if empty, use main prompt', lines=3, visible=True, value='', label='Pretraining prompt') + with FormRow(): + elem += gr.Textbox(placeholder='negative prompt. if empty, use main negative prompt', lines=3, visible=True, value='', label='Pretraining negative prompt') + with FormRow(): + with FormColumn(min_width=100): + with FormRow(): + with FormColumn(min_width=50): + asamplers = [constants.sampler_default] + asamplers.extend([x.name for x in shared.list_samplers()]) + elem += gr.Dropdown(label='Sampling method', visible=True, value=asamplers[0], choices=asamplers) + with FormColumn(min_width=50): + ascheduler = util.get_scueduler_list() + elem += gr.Dropdown(label='Scheduler', elem_id="pretraining_scheduler", choices=ascheduler, value=ascheduler[0]) + with FormRow(): + with FormColumn(min_width=100): + elem += gr.Slider(minimum=1, maximum=150, value=20, step=1, label='Pretraining sampling steps', elem_id='bmab_pretraining_steps') + elem += gr.Slider(minimum=1, maximum=30, value=7, step=0.5, label='Pretraining CFG scale', elem_id='bmab_pretraining_cfg_scale') + elem += gr.Slider(minimum=0, maximum=1, value=0.75, step=0.01, label='Pretraining denoising Strength', elem_id='bmab_pretraining_denoising') + elem += gr.Slider(minimum=0, maximum=128, value=4, step=1, label='Pretraining dilation', elem_id='bmab_pretraining_dilation') + elem += gr.Slider(minimum=0.1, maximum=1, value=0.35, step=0.01, label='Pretraining box threshold', elem_id='bmab_pretraining_box_threshold') + with gr.Tab('Edge', elem_id='bmab_edge_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Enable edge enhancement', value=False) + with FormRow(): + elem += gr.Slider(minimum=1, maximum=255, value=50, step=1, label='Edge low threshold') + elem += gr.Slider(minimum=1, maximum=255, value=200, step=1, label='Edge high threshold') + with FormRow(): + elem += gr.Slider(minimum=0, maximum=1, value=0.5, step=0.05, label='Edge strength') + gr.Markdown('') + with gr.Tab('Resize', elem_id='bmab_preprocess_resize_tab'): + with FormRow(): + elem += gr.Checkbox(label='Enable resize (intermediate)', value=False) + with FormRow(): + elem += gr.Checkbox(label='Resized by person', value=True) + with FormRow(): + gr.HTML(constants.resize_description) + with FormRow(): + with FormColumn(): + methods = ['stretching', 'inpaint', 'inpaint+lama', 'inpaint_only', 'inpaint_only+lama'] + elem += gr.Dropdown(label='Method', visible=True, value=methods[0], choices=methods) + with FormColumn(): + align = [x for x in util.alignment.keys()] + elem += gr.Dropdown(label='Alignment', visible=True, value=align[4], choices=align) + with FormRow(): + with FormColumn(): + dd_resize_filter = gr.Dropdown(label='Resize filter', visible=True, value=filter.filters[0], choices=filter.filters) + elem += dd_resize_filter + with FormColumn(): + gr.Markdown('') + with FormRow(): + elem += gr.Slider(minimum=0.50, maximum=0.95, value=0.85, step=0.01, label='Resize by person intermediate') + with FormRow(): + elem += gr.Slider(minimum=0, maximum=1, value=0.75, step=0.01, label='Denoising Strength for inpaint and inpaint+lama', elem_id='bmab_resize_intermediate_denoising') + with gr.Tab('Refiner', id='bmab_refiner', elem_id='bmab_refiner_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Enable refiner', value=False) + with FormRow(): + with FormColumn(): + with FormRow(): + refiner_models = gr.Dropdown(label='CheckPoint for refiner', visible=True, value=ui_checkpoints[0], choices=ui_checkpoints) + elem += refiner_models + with FormColumn(): + with FormRow(): + vaes = [constants.vae_default] + vaes.extend([str(x) for x in sd_vae.vae_dict.keys()]) + refiner_vaes = gr.Dropdown(label='SD VAE', visible=True, value=ui_vaes[0], choices=ui_vaes) + elem += refiner_vaes + with FormRow(): + elem += gr.Checkbox(label='Use this checkpoint for detailing(Face, Person, Hand)', value=True) + with FormRow(): + elem += gr.Textbox(placeholder='prompt. if empty, use main prompt', lines=3, visible=True, value='', label='Prompt') + with FormRow(): + elem += gr.Textbox(placeholder='negative prompt. if empty, use main negative prompt', lines=3, visible=True, value='', label='Negative Prompt') + with FormRow(): + with FormColumn(min_width=100): + with FormRow(): + with FormColumn(min_width=50): + asamplers = [constants.sampler_default] + asamplers.extend([x.name for x in shared.list_samplers()]) + elem += gr.Dropdown(label='Sampling method', visible=True, value=asamplers[0], choices=asamplers) + with FormColumn(min_width=50): + ascheduler = util.get_scueduler_list() + elem += gr.Dropdown(label='Scheduler', elem_id="refiner_scheduler", choices=ascheduler, value=ascheduler[0]) + with FormColumn(min_width=100): + upscalers = [constants.fast_upscaler] + upscalers.extend([x.name for x in shared.sd_upscalers]) + elem += gr.Dropdown(label='Upscaler', visible=True, value=upscalers[0], choices=upscalers) + with FormRow(): + with FormColumn(min_width=100): + elem += gr.Slider(minimum=1, maximum=150, value=20, step=1, label='Refiner Sampling Steps', elem_id='bmab_refiner_steps') + elem += gr.Slider(minimum=1, maximum=30, value=7, step=0.5, label='Refiner CFG Scale', elem_id='bmab_refiner_cfg_scale') + elem += gr.Slider(minimum=0, maximum=1, value=0.75, step=0.01, label='Refiner Denoising Strength', elem_id='bmab_refiner_denoising') + with FormRow(): + with FormColumn(min_width=100): + elem += gr.Slider(minimum=0, maximum=4, value=1, step=0.1, label='Refiner Scale', elem_id='bmab_refiner_scale') + elem += gr.Slider(minimum=0, maximum=2048, value=0, step=1, label='Refiner Width', elem_id='bmab_refiner_width') + elem += gr.Slider(minimum=0, maximum=2048, value=0, step=1, label='Refiner Height', elem_id='bmab_refiner_height') + + with gr.Accordion(f'BMAB Basic', open=False): + with FormRow(): + with gr.Tabs(elem_id='bmab_tabs'): + with gr.Tab('Basic', elem_id='bmab_basic_tabs'): + with FormRow(): + with FormColumn(): + elem += gr.Slider(minimum=0, maximum=2, value=1, step=0.05, label='Contrast') + elem += gr.Slider(minimum=0, maximum=2, value=1, step=0.05, label='Brightness') + elem += gr.Slider(minimum=-5, maximum=5, value=1, step=0.1, label='Sharpeness') + elem += gr.Slider(minimum=0, maximum=2, value=1, step=0.01, label='Color') + with FormColumn(): + elem += gr.Slider(minimum=-2000, maximum=+2000, value=0, step=1, label='Color temperature') + elem += gr.Slider(minimum=0, maximum=1, value=0, step=0.05, label='Noise alpha') + elem += gr.Slider(minimum=0, maximum=1, value=0, step=0.05, label='Noise alpha at final stage') + with gr.Tab('Imaging', elem_id='bmab_imaging_tabs'): + with FormRow(): + elem += gr.Image(source='upload', type='pil') + with FormRow(): + elem += gr.Checkbox(label='Blend enabled', value=False) + with FormRow(): + with FormColumn(): + elem += gr.Slider(minimum=0, maximum=1, value=1, step=0.05, label='Blend alpha') + with FormColumn(): + gr.Markdown('') + with FormRow(): + elem += gr.Checkbox(label='Enable detect', value=False) + with FormRow(): + elem += gr.Textbox(placeholder='1girl', visible=True, value='', label='Prompt') + with gr.Tab('Person', elem_id='bmab_person_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Enable person detailing for landscape', value=False) + with FormRow(): + elem += gr.Checkbox(label='Use groudingdino for detection', value=False) + elem += gr.Checkbox(label='Force upscale ratio 1:1 without area limit', value=False) + with FormRow(): + elem += gr.Checkbox(label='Block over-scaled image', value=True) + elem += gr.Checkbox(label='Auto Upscale if Block over-scaled image enabled', value=True) + with FormRow(): + with FormColumn(min_width=100): + with FormRow(): + with FormColumn(min_width=50): + person_checkpoint_models = gr.Dropdown(label='CheckPoint', visible=True, value=ui_checkpoints[0], choices=ui_checkpoints) + elem += person_checkpoint_models + with FormColumn(min_width=50): + person_vaes_models = gr.Dropdown(label='SD VAE', visible=True, value=ui_vaes[0], choices=ui_vaes) + elem += person_vaes_models + with FormColumn(min_width=100): + with FormRow(): + with FormColumn(min_width=50): + asamplers = [constants.sampler_default] + asamplers.extend([x.name for x in shared.list_samplers()]) + elem += gr.Dropdown(label='Sampler', elem_id="bmb_person_sampler", visible=True, value=asamplers[0], choices=asamplers) + with FormColumn(min_width=50): + ascheduler = util.get_scueduler_list() + elem += gr.Dropdown(label='Scheduler', elem_id="bmb_person_scheduler", choices=ascheduler, value=ascheduler[0]) + with FormRow(): + with FormColumn(min_width=100): + elem += gr.Slider(minimum=1, maximum=8, value=4, step=0.01, label='Upscale Ratio') + elem += gr.Slider(minimum=0, maximum=20, value=3, step=1, label='Dilation mask') + elem += gr.Slider(minimum=0.01, maximum=1, value=0.1, step=0.01, label='Large person area limit') + elem += gr.Slider(minimum=0, maximum=20, value=1, step=1, label='Limit') + elem += gr.Slider(minimum=0, maximum=2, value=1, step=0.01, visible=shared.opts.data.get('bmab_test_function', False), label='Background color (HIDDEN)') + elem += gr.Slider(minimum=0, maximum=30, value=0, step=1, visible=shared.opts.data.get('bmab_test_function', False), label='Background blur (HIDDEN)') + with FormColumn(): + elem += gr.Slider(minimum=0, maximum=1, value=0.4, step=0.01, label='Denoising Strength') + elem += gr.Slider(minimum=1, maximum=30, value=7, step=0.5, label='CFG Scale') + elem += gr.Slider(minimum=1, maximum=150, value=20, step=1, label='Steps') + with gr.Tab('Face', elem_id='bmab_face_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Enable face detailing', value=False) + with FormRow(): + elem += gr.Checkbox(label='Enable face detailing before upscale', value=False) + with FormRow(): + elem += gr.Checkbox(label='Disable extra networks in prompt (LORA, Hypernetwork, ...)', value=False) + with FormRow(): + with FormColumn(min_width=100): + elem += gr.Dropdown(label='Face detailing sort by', choices=['Score', 'Size', 'Left', 'Right', 'Center'], type='value', value='Score') + with FormColumn(min_width=100): + elem += gr.Slider(minimum=0, maximum=20, value=1, step=1, label='Limit') + with gr.Tab('Face1', elem_id='bmab_face1_tabs'): + with FormRow(): + elem += gr.Textbox(placeholder='prompt. if empty, use main prompt', lines=3, visible=True, value='', label='Prompt') + with FormRow(): + elem += gr.Textbox(placeholder='negative prompt. if empty, use main negative prompt', lines=3, visible=True, value='', label='Negative Prompt') + with gr.Tab('Face2', elem_id='bmab_face2_tabs'): + with FormRow(): + elem += gr.Textbox(placeholder='prompt. if empty, use main prompt', lines=3, visible=True, value='', label='Prompt') + with FormRow(): + elem += gr.Textbox(placeholder='negative prompt. if empty, use main negative prompt', lines=3, visible=True, value='', label='Negative Prompt') + with gr.Tab('Face3', elem_id='bmab_face3_tabs'): + with FormRow(): + elem += gr.Textbox(placeholder='prompt. if empty, use main prompt', lines=3, visible=True, value='', label='Prompt') + with FormRow(): + elem += gr.Textbox(placeholder='negative prompt. if empty, use main negative prompt', lines=3, visible=True, value='', label='Negative Prompt') + with gr.Tab('Face4', elem_id='bmab_face4_tabs'): + with FormRow(): + elem += gr.Textbox(placeholder='prompt. if empty, use main prompt', lines=3, visible=True, value='', label='Prompt') + with FormRow(): + elem += gr.Textbox(placeholder='negative prompt. if empty, use main negative prompt', lines=3, visible=True, value='', label='Negative Prompt') + with gr.Tab('Face5', elem_id='bmab_face5_tabs'): + with FormRow(): + elem += gr.Textbox(placeholder='prompt. if empty, use main prompt', lines=3, visible=True, value='', label='Prompt') + with FormRow(): + elem += gr.Textbox(placeholder='negative prompt. if empty, use main negative prompt', lines=3, visible=True, value='', label='Negative Prompt') + with FormRow(): + with gr.Tab('Parameters', elem_id='bmab_parameter_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Overide Parameters', value=False) + with FormRow(): + with FormColumn(min_width=100): + elem += gr.Slider(minimum=64, maximum=2048, value=512, step=8, label='Width') + elem += gr.Slider(minimum=64, maximum=2048, value=512, step=8, label='Height') + with FormColumn(min_width=100): + elem += gr.Slider(minimum=1, maximum=30, value=7, step=0.5, label='CFG Scale') + elem += gr.Slider(minimum=1, maximum=150, value=20, step=1, label='Steps') + elem += gr.Slider(minimum=0, maximum=64, value=4, step=1, label='Mask Blur') + with FormRow(): + with FormColumn(min_width=100): + with FormRow(): + with FormColumn(min_width=50): + face_models = gr.Dropdown(label='CheckPoint for face', visible=True, value=ui_checkpoints[0], choices=ui_checkpoints) + elem += face_models + with FormColumn(min_width=50): + face_vaes = gr.Dropdown(label='SD VAE for face', visible=True, value=ui_vaes[0], choices=ui_vaes) + elem += face_vaes + with FormRow(): + with FormColumn(min_width=50): + asamplers = [constants.sampler_default] + asamplers.extend([x.name for x in shared.list_samplers()]) + elem += gr.Dropdown(label='Sampler', elem_id="face_sampler", visible=True, value=asamplers[0], choices=asamplers) + with FormColumn(min_width=50): + ascheduler = util.get_scueduler_list() + elem += gr.Dropdown(label='Scheduler', elem_id="face_scheduler", choices=ascheduler, value=ascheduler[0]) + with FormRow(): + inpaint_area = gr.Radio(label='Inpaint area', choices=['Whole picture', 'Only masked'], type='value', value='Only masked') + elem += inpaint_area + with FormRow(): + elem += gr.Slider(label='Only masked padding, pixels', minimum=0, maximum=256, step=4, value=32) + with FormRow(): + choices = detectors.list_face_detectors() + elem += gr.Dropdown(label='Detection Model', choices=choices, type='value', value=choices[0]) + with FormColumn(): + elem += gr.Slider(minimum=0, maximum=1, value=0.4, step=0.01, label='Face Denoising Strength', elem_id='bmab_face_denoising_strength') + elem += gr.Slider(minimum=0, maximum=64, value=4, step=1, label='Face Dilation', elem_id='bmab_face_dilation') + elem += gr.Slider(minimum=0.1, maximum=1, value=0.35, step=0.01, label='Face Box threshold') + elem += gr.Checkbox(label='Skip face detailing by area', value=False) + elem += gr.Slider(minimum=0.0, maximum=3.0, value=0.26, step=0.01, label='Face area (MegaPixel)') + with gr.Tab('Hand', elem_id='bmab_hand_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Enable hand detailing (EXPERIMENTAL)', value=False) + elem += gr.Checkbox(label='Block over-scaled image', value=True) + with FormRow(): + elem += gr.Checkbox(label='Enable best quality (EXPERIMENTAL, Use more GPU)', value=False) + with FormRow(): + elem += gr.Dropdown(label='Method', visible=True, interactive=True, value='subframe', choices=['subframe', 'each hand', 'inpaint each hand', 'at once', 'depth hand refiner']) + with FormRow(): + elem += gr.Textbox(placeholder='prompt. if empty, use main prompt', lines=3, visible=True, value='', label='Prompt') + with FormRow(): + elem += gr.Textbox(placeholder='negative prompt. if empty, use main negative prompt', lines=3, visible=True, value='', label='Negative Prompt') + with FormRow(): + with FormColumn(): + elem += gr.Slider(minimum=0, maximum=1, value=0.4, step=0.01, label='Denoising Strength') + elem += gr.Slider(minimum=1, maximum=30, value=7, step=0.5, label='CFG Scale') + elem += gr.Checkbox(label='Auto Upscale if Block over-scaled image enabled', value=True) + with FormColumn(): + elem += gr.Slider(minimum=1, maximum=4, value=2, step=0.01, label='Upscale Ratio') + elem += gr.Slider(minimum=0, maximum=1, value=0.3, step=0.01, label='Box Threshold') + elem += gr.Slider(minimum=0, maximum=0.3, value=0.1, step=0.01, label='Box Dilation') + with FormRow(): + inpaint_area = gr.Radio(label='Inpaint area', choices=['Whole picture', 'Only masked'], type='value', value='Whole picture') + elem += inpaint_area + with FormRow(): + with FormColumn(): + elem += gr.Slider(label='Only masked padding, pixels', minimum=0, maximum=256, step=4, value=32) + with FormColumn(): + gr.Markdown('') + with FormRow(): + elem += gr.Textbox(placeholder='Additional parameter for advanced user', visible=True, value='', label='Additional Parameter') + with gr.Tab('ControlNet', elem_id='bmab_controlnet_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Enable ControlNet access', value=False) + with FormRow(): + with gr.Tab('Noise', elem_id='bmab_cn_noise_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Enable noise', value=False) + with FormRow(): + elem += gr.Checkbox(label='Process with BMAB refiner', value=False) + with FormRow(): + with FormColumn(): + elem += gr.Slider(minimum=0.0, maximum=2, value=0.4, step=0.05, elem_id='bmab_cn_noise', label='Noise strength') + elem += gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, elem_id='bmab_cn_noise_begin', label='Noise begin') + elem += gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.01, elem_id='bmab_cn_noise_end', label='Noise end') + elem += gr.Radio(label='Hire-fix option for noise', choices=['Both', 'Low res only', 'High res only'], type='value', value='Both') + with FormColumn(): + gr.Markdown('') + with gr.Tab('Pose', elem_id='bmab_cn_pose_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Enable pose', value=False) + with FormRow(): + with FormColumn(): + elem += gr.Slider(minimum=0.0, maximum=2, value=1, step=0.05, elem_id='bmab_cn_pose', label='Pose strength') + elem += gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.01, elem_id='bmab_cn_pose_begin', label='Pose begin') + elem += gr.Slider(minimum=0.0, maximum=1.0, value=1, step=0.01, elem_id='bmab_cn_pose_end', label='Pose end') + elem += gr.Checkbox(label='Face only', value=False) + poses = ['Random'] + poses.extend(Openpose.list_pose()) + dd_pose = gr.Dropdown(label='Pose Selection', interactive=True, visible=True, value=poses[0], choices=poses) + elem += dd_pose + with FormColumn(): + pose_image = gr.Image(elem_id='bmab_pose_image') + with gr.Tab('IpAdapter', elem_id='bmab_cn_ipadapter_tabs'): + with FormRow(): + elem += gr.Checkbox(label='Enable ipadapter', value=False) + with FormRow(): + with FormColumn(): + elem += gr.Slider(minimum=0.0, maximum=2, value=0.6, step=0.05, elem_id='bmab_cn_ipadapter', label='IpAdapter strength') + elem += gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.01, elem_id='bmab_cn_ipadapter_begin', label='IpAdapter begin') + elem += gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.01, elem_id='bmab_cn_ipadapter_end', label='IpAdapter end') + ipadapters = ['Random'] + ipadapters.extend(IpAdapter.list_images()) + dd_ipadapter = gr.Dropdown(label='IpAdapter Selection', interactive=True, visible=True, value=ipadapters[0], choices=ipadapters) + elem += dd_ipadapter + weight_type = IpAdapter.get_weight_type_list() + elem += gr.Dropdown(label='IpAdapter Weight Type', interactive=True, visible=True, value=weight_type[0], choices=weight_type) + with FormColumn(): + ipadapter_image = gr.Image(elem_id='bmab_ipadapter_image') + with gr.Tab('ICLight', elem_id='bmab_ic_light'): + with FormRow(): + elem += gr.Checkbox(label='Enable ICLight', value=False) + with FormRow(): + elem += gr.Checkbox(label='Enable ICLight before upscale', value=True) + with FormRow(): + with FormColumn(): + styles = ICLight.get_styles() + elem += gr.Dropdown(label='Style Selection', visible=True, value=styles[2], choices=styles) + elem += gr.Textbox(label='ICLight Prompt', placeholder='prompt', lines=1, visible=True, value='') + elem += gr.Radio(label='ICLight Preperence', choices=['None', 'Left', 'Right', 'Top', 'Bottom', 'Face', 'Person'], type='value', value='None') + elem += gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.01, elem_id='bmab_iclight_blending', label='Blending') + with FormColumn(): + elem += gr.Checkbox(label='Use background image', value=False) + iclight_image = gr.Image(elem_id='bmab_iclight_image', type='pil', value=ICLight.get_background_image(), interactive=True) + with gr.Accordion(f'BMAB Postprocessor', open=False): + with FormRow(): + with gr.Tab('Resize by person', elem_id='bmab_postprocess_resize_tab'): + with FormRow(): + elem += gr.Checkbox(label='Enable resize by person', value=False) + mode = ['Inpaint', 'ControlNet inpaint+lama'] + elem += gr.Dropdown(label='Mode', visible=True, value=mode[0], choices=mode) + with FormRow(): + with FormColumn(): + elem += gr.Slider(minimum=0.70, maximum=0.95, value=0.85, step=0.01, label='Resize by person') + with FormColumn(): + elem += gr.Slider(minimum=0, maximum=1, value=0.6, step=0.01, label='Denoising Strength for Inpaint, ControlNet') + with FormRow(): + with FormColumn(): + gr.Markdown('') + with FormColumn(): + elem += gr.Slider(minimum=4, maximum=128, value=30, step=1, label='Mask Dilation') + with gr.Tab('Upscale', elem_id='bmab_postprocess_upscale_tab'): + with FormRow(): + with FormColumn(min_width=100): + elem += gr.Checkbox(label='Enable upscale at final stage', value=False) + elem += gr.Checkbox(label='Detailing after upscale', value=True) + with FormColumn(min_width=100): + gr.Markdown('') + with FormRow(): + with FormColumn(min_width=100): + upscalers = [x.name for x in shared.sd_upscalers] + elem += gr.Dropdown(label='Upscaler', visible=True, value=upscalers[0], choices=upscalers) + elem += gr.Slider(minimum=1, maximum=4, value=1.5, step=0.1, label='Upscale ratio') + with gr.Tab('Filter', id='bmab_final_filter', elem_id='bmab_final_filter_tab'): + with FormRow(): + dd_final_filter = gr.Dropdown(label='Final filter', visible=True, value=filter.filters[0], choices=filter.filters) + elem += dd_final_filter + with gr.Tab('Watermark', id='bmab_watermark', elem_id='bmab_watermark'): + elem += gr.Checkbox(label='Watermark enabled', value=False) + with FormRow(): + with FormColumn(min_width=100): + fonts = Watermark.list_fonts() + if len(fonts) == 0: + fonts = [''] + elem += gr.Dropdown(label='Watermark Font', visible=True, value=fonts[0], choices=fonts) + align = [x for x in Watermark.alignment.keys()] + elem += gr.Dropdown(label='Watermark Alignment', visible=True, value=align[5], choices=align) + elem += gr.Dropdown(label='Watermark Text Alignment', visible=True, value='left', choices=['left', 'right', 'center']) + elem += gr.Dropdown(label='Watermark Text Rotate', visible=True, value='0', choices=['0', '90', '180', '270']) + elem += gr.Textbox(label='Watermark Text Color', visible=True, value='#000000') + elem += gr.Textbox(label='Watermark Background Color', visible=True, value='#000000') + with FormColumn(min_width=100): + elem += gr.Slider(minimum=4, maximum=128, value=12, step=1, label='Font Size') + elem += gr.Slider(minimum=0, maximum=100, value=100, step=1, label='Transparency') + elem += gr.Slider(minimum=0, maximum=100, value=0, step=1, label='Background Transparency') + elem += gr.Slider(minimum=0, maximum=100, value=5, step=1, label='Margin') + with FormRow(): + elem += gr.Textbox(placeholder='watermark text here', lines=1, max_lines=10, visible=True, value='', label='Watermark or Image path') + with gr.Accordion(f'BMAB Refresh, Config, Preset, Installer', open=False): + with FormRow(): + configs = parameters.Parameters().list_config() + config = '' if not configs else configs[0] + with gr.Tab('Configuration', elem_id='bmab_configuration_tabs'): + with FormRow(): + with FormColumn(scale=2): + with FormRow(): + config_dd = gr.Dropdown(label='Configuration', visible=True, interactive=True, allow_custom_value=True, value=config, choices=configs) + elem += config_dd + load_btn = ToolButton('⬇️', visible=True, interactive=True, tooltip='load configuration', elem_id='bmab_load_configuration') + save_btn = ToolButton('⬆️', visible=True, interactive=True, tooltip='save configuration', elem_id='bmab_save_configuration') + reset_btn = ToolButton('🔃', visible=True, interactive=True, tooltip='reset to default', elem_id='bmab_reset_configuration') + with FormColumn(scale=1): + gr.Markdown('') + with FormRow(): + with FormColumn(scale=1): + btn_refresh_all = gr.Button('Refresh ALL', visible=True, interactive=True, elem_id='bmab_refresh_all') + with FormColumn(scale=1): + gr.Markdown('') + with FormColumn(scale=1): + gr.Markdown('') + with FormColumn(scale=1): + gr.Markdown('') + with gr.Tab('Preset', elem_id='bmab_configuration_tabs'): + with FormRow(): + with FormColumn(min_width=100): + gr.Markdown('Preset Loader : preset override UI configuration.') + with FormRow(): + presets = parameters.Parameters().list_preset() + with FormColumn(min_width=100): + with FormRow(): + preset_dd = gr.Dropdown(label='Preset', visible=True, interactive=True, allow_custom_value=True, value=presets[0], choices=presets) + elem += preset_dd + refresh_btn = ToolButton('🔄', visible=True, interactive=True, tooltip='refresh preset', elem_id='bmab_preset_refresh') + with gr.Tab('Toy', elem_id='bmab_toy_tabs'): + with FormRow(): + merge_result = gr.Markdown('Result here') + with FormRow(): + random_checkpoint = gr.Button('Merge Random Checkpoint', visible=True, interactive=True, elem_id='bmab_merge_random_checkpoint') + with gr.Tab('Installer', elem_id='bmab_install_tabs'): + with FormRow(): + dd_pkg = gr.Dropdown(label='Package', visible=True, value=installhelper.available_packages[0], choices=installhelper.available_packages) + btn_install = ToolButton('▶️', visible=True, interactive=True, tooltip='Install package', elem_id='bmab_btn_install') + with FormRow(): + markdown_install = gr.Markdown('') + with gr.Accordion(f'BMAB Testroom', open=False, visible=shared.opts.data.get('bmab_for_developer', False)): + with FormRow(): + gallery = gr.Gallery(label='Images', value=[], elem_id='bmab_testroom_gallery') + result_image = gr.Image(elem_id='bmab_result_image') + with FormRow(): + btn_fetch_images = ToolButton('🔄', visible=True, interactive=True, tooltip='fetch images', elem_id='bmab_fetch_images') + btn_process_pipeline = ToolButton('▶️', visible=True, interactive=True, tooltip='fetch images', elem_id='bmab_fetch_images') + + gr.Markdown(f'
{bmab_version}
') + + def load_config(*args): + name = args[0] + ret = parameters.Parameters().load_config(name) + pose_img_name = parameters.Parameters().get_config_value_by_key('module_config.controlnet.pose_selected', ret) + ret.append(Openpose.get_pose(pose_img_name)) + ipadapter_img_name = parameters.Parameters().get_config_value_by_key('module_config.controlnet.ipadapter_selected', ret) + ret.append(IpAdapter.get_image(ipadapter_img_name, displayed=True)) + return ret + + def save_config(*args): + name = parameters.Parameters().get_save_config_name(args) + parameters.Parameters().save_config(args) + return { + config_dd: { + 'choices': parameters.Parameters().list_config(), + 'value': name, + '__type__': 'update' + } + } + + def reset_config(*args): + return parameters.Parameters().get_default() + + def refresh_preset(*args): + return { + preset_dd: { + 'choices': parameters.Parameters().list_preset(), + 'value': 'None', + '__type__': 'update' + } + } + + def merge_random_checkpoint(*args): + def find_random(k, f): + for v in k: + if v.startswith(f): + return v + + result = '' + checkpoints = [str(x) for x in sd_models.checkpoints_list.keys()] + target = random.choices(checkpoints, k=3) + multiplier = random.randrange(10, 90, 1) / 100 + index = random.randrange(0x10000000, 0xFFFFFFFF, 1) + output = f'bmab_random_{format(index, "08X")}' + extras.run_modelmerger(None, target[0], target[1], target[2], 'Weighted sum', multiplier, False, output, 'safetensors', 0, None, '', True, True, True, '{}') + result += f'{output}.safetensors generated
' + for x in range(1, random.randrange(0, 5, 1)): + checkpoints = [str(x) for x in sd_models.checkpoints_list.keys()] + br = find_random(checkpoints, f'{output}.safetensors') + if br is None: + return + index = random.randrange(0x10000000, 0xFFFFFFFF, 1) + output = f'bmab_random_{format(index, "08X")}' + target = random.choices(checkpoints, k=2) + multiplier = random.randrange(10, 90, 1) / 100 + extras.run_modelmerger(None, br, target[0], target[1], 'Weighted sum', multiplier, False, output, 'safetensors', 0, None, '', True, True, True, '{}') + result += f'{output}.safetensors generated
' + debug_print('done') + return { + merge_result: { + 'value': result, + '__type__': 'update' + } + } + + def fetch_images(*args): + global gallery_select_index + gallery_select_index = 0 + return { + gallery: { + 'value': final_images, + '__type__': 'update' + } + } + + def process_pipeline(*args): + config, a = parameters.parse_args(args) + preview = final_images[gallery_select_index] + p = last_process + ctx = context.Context.newContext(bmab_script, p, a, gallery_select_index) + preview = pipeline.process(ctx, preview) + images.save_image( + preview, p.outpath_samples, '', + p.all_seeds[gallery_select_index], p.all_prompts[gallery_select_index], + shared.opts.samples_format, p=p, suffix="-testroom") + return { + result_image: { + 'value': preview, + '__type__': 'update' + } + } + + refresh_targets = [dd_preprocess_filter, dd_hiresfix_filter1, dd_hiresfix_filter2, dd_resample_filter, dd_resize_filter, dd_final_filter, dd_pretraining_filter] + refresh_targets.extend([checkpoint_models, vaes_models, refiner_models, refiner_vaes, face_models, face_vaes, resample_models, resample_vaes]) + refresh_targets.extend([pretraining_checkpoint_models, pretraining_vaes_models, person_checkpoint_models, person_vaes_models]) + refresh_targets.extend([pretraining_models, dd_pose, dd_ipadapter]) + + def reload_filter(*args): + filter.reload_filters() + inputs = list(args) + + _checkpoints = [constants.checkpoint_default] + _checkpoints.extend([str(x) for x in sd_models.checkpoints_list.keys()]) + + _vaes = [constants.vae_default] + _vaes.extend([str(x) for x in sd_vae.vae_dict.keys()]) + + _pretraining_models = ['Select Model'] + _pretraining_models.extend(util.list_pretraining_models()) + + _poses = ['Random'] + _poses.extend(Openpose.list_pose()) + _ipadapter = ['Random'] + _ipadapter.extend(IpAdapter.list_images()) + + values = [ + filter.filters, filter.filters, filter.filters, filter.filters, filter.filters, filter.filters, filter.filters, + _checkpoints, _vaes, _checkpoints, _vaes, _checkpoints, _vaes, _checkpoints, _vaes, + _checkpoints, _vaes, _checkpoints, _vaes, _pretraining_models, _poses, _ipadapter + ] + + ret = { + t: { + 'choices': v, + 'value': v[0] if i not in v else i, + '__type__': 'update' + } + for t, i, v in zip(refresh_targets, inputs, values) + } + + return ret + + def image_selected(data: gr.SelectData, *args): + debug_print(data.index) + global gallery_select_index + gallery_select_index = data.index + + def hit_install(*args): + return installhelper.install(args[0], dd_pkg, markdown_install) + + def stop_process(*args): + bscript.stop_generation = True + gr.Info('Waiting for processing done.') + + load_update_elem = elem[:] + load_update_elem.extend([pose_image, ipadapter_image]) + load_btn.click(load_config, inputs=[config_dd], outputs=load_update_elem) + save_btn.click(save_config, inputs=elem, outputs=[config_dd]) + reset_btn.click(reset_config, outputs=elem) + refresh_btn.click(refresh_preset, outputs=elem) + + random_checkpoint.click(merge_random_checkpoint, outputs=[merge_result]) + btn_fetch_images.click(fetch_images, outputs=[gallery]) + + btn_refresh_all.click( + reload_filter, + inputs=refresh_targets, + outputs=refresh_targets, + ) + + btn_process_pipeline.click(process_pipeline, inputs=elem, outputs=[result_image]) + gallery.select(image_selected, inputs=[gallery]) + + btn_install.click(hit_install, inputs=[dd_pkg], outputs=[dd_pkg, markdown_install]) + btn_stop.click(stop_process) + dd_pose.select(Openpose.pose_selected, inputs=[dd_pose], outputs=[pose_image]) + dd_ipadapter.select(IpAdapter.ipadapter_selected, inputs=[dd_ipadapter], outputs=[ipadapter_image]) + iclight_image.upload(ICLight.put_backgound_image, inputs=[iclight_image]) + return elem + + +def on_ui_settings(): + shared.opts.add_option('bmab_debug_print', shared.OptionInfo(False, 'Print debug message.', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_debug_logging', shared.OptionInfo(False, 'Enable developer logging.', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_show_extends', shared.OptionInfo(False, 'Show before processing image. (DO NOT ENABLE IN CLOUD)', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_test_function', shared.OptionInfo(False, 'Show Test Function', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_keep_original_setting', shared.OptionInfo(False, 'Keep original setting', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_save_image_before_process', shared.OptionInfo(False, 'Save image that before processing', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_save_image_after_process', shared.OptionInfo(False, 'Save image that after processing (some bugs)', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_for_developer', shared.OptionInfo(False, 'Show developer hidden function.', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_use_dino_predict', shared.OptionInfo(False, 'Use GroudingDINO for detecting hand. GroudingDINO should be installed manually.', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_max_detailing_element', shared.OptionInfo( + default=0, label='Max Detailing Element', component=gr.Slider, component_args={'minimum': 0, 'maximum': 10, 'step': 1}, section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_detail_full', shared.OptionInfo(True, 'Allways use FULL, VAE type for encode when detail anything. (v1.6.0)', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_optimize_vram', shared.OptionInfo(default='None', label='Checkpoint for Person, Face, Hand', component=gr.Radio, component_args={'choices': ['None', 'low vram', 'med vram']}, section=('bmab', 'BMAB'))) + mask_names = masking.list_mask_names() + shared.opts.add_option('bmab_mask_model', shared.OptionInfo(default=mask_names[0], label='Masking model', component=gr.Radio, component_args={'choices': mask_names}, section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_use_specific_model', shared.OptionInfo(False, 'Use specific model', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_model', shared.OptionInfo(default='', label='Checkpoint for Person, Face, Hand', component=gr.Textbox, component_args='', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_cn_openpose', shared.OptionInfo(default='control_v11p_sd15_openpose_fp16 [73c2b67d]', label='ControlNet openpose model', component=gr.Textbox, component_args='', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_cn_lineart', shared.OptionInfo(default='control_v11p_sd15_lineart_fp16 [5c23b17d]', label='ControlNet lineart model', component=gr.Textbox, component_args='', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_cn_inpaint', shared.OptionInfo(default='control_v11p_sd15_inpaint_fp16 [be8bc0ed]', label='ControlNet inpaint model', component=gr.Textbox, component_args='', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_cn_tile_resample', shared.OptionInfo(default='control_v11f1e_sd15_tile_fp16 [3b860298]', label='ControlNet tile model', component=gr.Textbox, component_args='', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_cn_inpaint_depth_hand', shared.OptionInfo(default='control_sd15_inpaint_depth_hand_fp16 [09456e54]', label='ControlNet tile model', component=gr.Textbox, component_args='', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_cn_ipadapter', shared.OptionInfo(default='ip-adapter-plus_sd15 [836b5c2e]', label='ControlNet ip adapter model', component=gr.Textbox, component_args='', section=('bmab', 'BMAB'))) + shared.opts.add_option('bmab_additional_checkpoint_path', shared.OptionInfo(default='', label='Additional Checkpoint Path', component=gr.Textbox, component_args='', section=('bmab', 'BMAB'))) diff --git a/3-bmab/sd_bmab/util/__init__.py b/3-bmab/sd_bmab/util/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6f649d0aae8601f99b895bd45ae06f40604f1bbe --- /dev/null +++ b/3-bmab/sd_bmab/util/__init__.py @@ -0,0 +1,407 @@ +import os +import cv2 +import torch +import numpy as np +import hashlib +from pathlib import Path +import glob +from basicsr.utils.download_util import load_file_from_url + +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFilter + +import modules +from modules import shared +from modules import devices +from modules import images +from modules.sd_samplers import sample_to_image +from modules.paths import models_path +from sd_bmab import constants + +from ultralytics import YOLO + + +LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS) + + +def debug_print(*args): + if shared.opts.data.get('bmab_debug_print', False): + print(*args) + + +def image_to_latent(p, img): + image = np.array(img).astype(np.float32) / 255.0 + image = np.moveaxis(image, 2, 0) + batch_images = np.expand_dims(image, axis=0).repeat(1, axis=0) + image = torch.from_numpy(batch_images) + image = 2. * image - 1. + image = image.to(shared.device, dtype=devices.dtype_vae) + return p.sd_model.get_first_stage_encoding(p.sd_model.encode_first_stage(image)) + + +def latent_to_image(x, index=0): + img = sample_to_image(x, index, approximation=0) + return img + + +def tensor_to_image(xx): + x_sample = 255. * np.moveaxis(xx.cpu().numpy(), 0, 2) + x_sample = x_sample.astype(np.uint8) + return Image.fromarray(x_sample) + + +def image_to_tensor(xx): + image = np.array(xx).astype(np.float32) / 255 + image = np.moveaxis(image, 2, 0) + image = torch.from_numpy(image) + return image + + +def resize_image(resize_mode, im, width, height, upscaler_name=None): + if resize_mode == 2: + vwidth = im.width + vheight = height + res = Image.new("RGB", (vwidth, vheight)) + dw = (vwidth - im.width) // 2 + dh = (vheight - im.height) + res.paste(im, (dw, dh)) + if dh > 0: + res.paste(im.resize((vwidth, dh), box=(0, 0, vwidth, 0)), box=(0, 0)) + + im = res + vwidth = width + vheight = height + res = Image.new("RGB", (vwidth, vheight)) + dw = (vwidth - im.width) // 2 + dh = (vheight - im.height) + res.paste(im, (dw, dh)) + + if dw > 0: + res.paste(im.resize((dw, height), box=(0, 0, 0, height)), box=(0, 0)) + res.paste(im.resize((dw, height), box=(im.width, 0, im.width, height)), box=(im.width + dw, 0)) + + return res + + return images.resize_image(resize_mode, im, width, height, upscaler_name) + + +alignment = { + 'top': lambda dx, dy: (dx/2, dx/2, 0, dy), + 'top-right': lambda dx, dy: (dx, 0, 0, dy), + 'right': lambda dx, dy: (dx, 0, dy/2, dy/2), + 'bottom-right': lambda dx, dy: (dx, 0, dy, 0), + 'bottom': lambda dx, dy: (dx/2, dx/2, dy, 0), + 'bottom-left': lambda dx, dy: (0, dx, dy, 0), + 'left': lambda dx, dy: (0, dx, dy/2, dy/2), + 'top-left': lambda dx, dy: (0, dx, 0, dy), + 'center': lambda dx, dy: (dx/2, dx/2, dy/2, dy/2), +} + + +def resize_image_with_alignment(image, al, width, height): + if al not in alignment: + return image + return resize_margin(image, *alignment[al](width - image.width, height - image.height)) + + +def get_mask_with_alignment(image, al, width, height): + return draw_mask(image, *alignment[al](width - image.width, height - image.height)) + + +def resize_margin(image, left, right, top, bottom): + left = int(left) + right = int(right) + top = int(top) + bottom = int(bottom) + input_image = image.copy() + + if left != 0: + res = Image.new("RGB", (image.width + left, image.height)) + res.paste(image, (left, 0)) + res.paste(image.resize((left, image.height), box=(0, 0, 0, image.height)), box=(0, 0)) + image = res + if right != 0: + res = Image.new("RGB", (image.width + right, image.height)) + res.paste(image, (0, 0)) + res.paste(image.resize((right, image.height), box=(image.width, 0, image.width, image.height)), box=(image.width, 0)) + image = res + if top != 0: + res = Image.new("RGB", (image.width, image.height + top)) + res.paste(image, (0, top)) + res.paste(image.resize((image.width, top), box=(0, 0, image.width, 0)), box=(0, 0)) + image = res + if bottom != 0: + res = Image.new("RGB", (image.width, image.height + bottom)) + res.paste(image, (0, 0)) + res.paste(image.resize((image.width, bottom), box=(0, image.height, image.width, image.height)), box=(0, image.height)) + image = res + + img = image.filter(ImageFilter.GaussianBlur(10)) + region_size = 10 + width, height = img.size + for y in range(0, height, region_size): + for x in range(0, width, region_size): + region = img.crop((x, y, x + region_size, y + region_size)) + average_color = region.resize((1, 1)).getpixel((0, 0)) + img.paste(average_color, (x, y, x + region_size, y + region_size)) + img.paste(input_image, box=(left, top)) + image = img.resize(input_image.size, resample=LANCZOS) + return image + + +def draw_mask(image, left, right, top, bottom): + left = int(left) + right = int(right) + top = int(top) + bottom = int(bottom) + + width = image.width + left + right + height = image.height + top + bottom + + mask = Image.new('L', (width, height), 255) + dr = ImageDraw.Draw(mask, 'L') + dr.rectangle((left, top, left + image.width, top + image.height), fill=0) + mask = mask.resize(image.size, resample=LANCZOS) + return mask + + +def box_dilation(box, dil): + x1, y1, x2, y2 = tuple(int(x) for x in box) + dx = int((x2 - x1) * dil) + dy = int((y2 - y1) * dil) + return x1 - dx, y1 - dy, x2 + dx, y2 + dy + + +def fix_box_size(box): + x1, y1, x2, y2 = tuple(int(x) for x in box) + w = x2 - x1 + h = y2 - y1 + w = (w // 8) * 8 + h = (h // 8) * 8 + return x1, y1, x1 + w, y1 + h + + +def fix_size_by_scale(w, h, scale): + w = int(((w * scale) // 8) * 8) + h = int(((h * scale) // 8) * 8) + return w, h + + +def fix_box_by_scale(box, scale): + x1, y1, x2, y2 = tuple(int(x) for x in box) + w = x2 - x1 + h = y2 - y1 + dx = int(w * scale / 2) + dy = int(h * scale / 2) + return x1 - dx, y1 - dy, x2 + dx, y2 + dy + + +def fix_box_limit(box, size): + x1, y1, x2, y2 = tuple(int(x) for x in box) + w = size[0] + h = size[1] + if x1 < 0: + x1 = 0 + if y1 < 0: + y1 = 0 + if x2 >= w: + x2 = w-1 + if y2 >= h: + y2 = h-1 + return x1, y1, x2, y2 + + +def fix_sqare_box(box): + x1, y1, x2, y2 = tuple(int(x) for x in box) + w = int((x2 - x1) / 2) + h = int((y2 - y1) / 2) + x = x1 + w + y = y1 + h + l = max(w, h) + x1 = x - l + x2 = x + l + y1 = y - l + y2 = y + l + return x1, y1, x2, y2 + + +def change_vae(name='auto'): + modules.sd_vae.reload_vae_weights(shared.sd_model, vae_file=modules.sd_vae.vae_dict[name]) + + +def get_seeds(s, p, a): + return p.all_seeds[s.index], p.all_subseeds[s.index] + + +def ultralytics_predict(image, confidence): + bmab_model_path = os.path.join(models_path, "bmab") + yolo = f'{bmab_model_path}/face_yolov8n.pt' + boxes = [] + load = torch.load + torch.load = modules.safe.unsafe_torch_load + try: + model = YOLO(yolo) + pred = model(image, conf=confidence, device='') + boxes = pred[0].boxes.xyxy.cpu().numpy() + boxes = boxes.tolist() + except: + pass + torch.load = load + return boxes + + +def dict_to_str(d): + return ','.join([f'{k}={v}' for k, v in d.items()]) + + +def dilate_mask(mask, dilation): + if dilation < 4: + return mask + arr = np.array(mask) + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dilation, dilation)) + arr = cv2.dilate(arr, kernel, iterations=1) + return Image.fromarray(arr) + + +def erode_mask(mask, erosion): + if erosion < 4: + return mask + arr = np.array(mask) + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (erosion, erosion)) + arr = cv2.erode(arr, kernel, iterations=1) + return Image.fromarray(arr) + + +def get_cn_args(p): + for script_object in p.scripts.alwayson_scripts: + filename = Path(script_object.filename).stem + if filename == 'controlnet': + return (script_object.args_from, script_object.args_to) + return None + + +def b64_encoding(image): + from io import BytesIO + import base64 + + buffered = BytesIO() + image.save(buffered, format="PNG") + return base64.b64encode(buffered.getvalue()).decode("utf-8") + + +def generate_noise(seed, width, height): + img_1 = np.zeros([height, width, 3], dtype=np.uint8) + # Generate random Gaussian noise + mean = 0 + stddev = 180 + r, g, b = cv2.split(img_1) + # cv2.setRNGSeed(seed) + cv2.randn(r, mean, stddev) + cv2.randn(g, mean, stddev) + cv2.randn(b, mean, stddev) + img = cv2.merge([r, g, b]) + pil_image = Image.fromarray(img, mode='RGB') + return pil_image + + +def lazy_loader(filename): + bmab_model_path = os.path.join(models_path, "bmab") + files = glob.glob(bmab_model_path) + + targets = { + 'sam_vit_b_01ec64.pth': 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth', + 'sam_vit_l_0b3195.pth': 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth', + 'sam_vit_h_4b8939.pth': 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth', + 'groundingdino_swint_ogc.pth': 'https://huggingface.co/ShilongLiu/GroundingDINO/resolve/main/groundingdino_swint_ogc.pth', + 'GroundingDINO_SwinT_OGC.py': 'https://raw.githubusercontent.com/IDEA-Research/GroundingDINO/main/groundingdino/config/GroundingDINO_SwinT_OGC.py', + 'face_yolov8n.pt': 'https://huggingface.co/Bingsu/adetailer/resolve/main/face_yolov8n.pt', + 'face_yolov8n_v2.pt': 'https://huggingface.co/Bingsu/adetailer/resolve/main/face_yolov8n_v2.pt', + 'face_yolov8m.pt': 'https://huggingface.co/Bingsu/adetailer/resolve/main/face_yolov8m.pt', + 'face_yolov8s.pt': 'https://huggingface.co/Bingsu/adetailer/resolve/main/face_yolov8s.pt', + 'hand_yolov8n.pt': 'https://huggingface.co/Bingsu/adetailer/resolve/main/hand_yolov8n.pt', + 'hand_yolov8s.pt': 'https://huggingface.co/Bingsu/adetailer/resolve/main/hand_yolov8s.pt', + 'person_yolov8m-seg.pt': 'https://huggingface.co/Bingsu/adetailer/resolve/main/person_yolov8m-seg.pt', + 'person_yolov8n-seg.pt': 'https://huggingface.co/Bingsu/adetailer/resolve/main/person_yolov8n-seg.pt', + 'person_yolov8s-seg.pt': 'https://huggingface.co/Bingsu/adetailer/resolve/main/person_yolov8s-seg.pt', + 'sam_hq_vit_b.pth': 'https://huggingface.co/lkeab/hq-sam/resolve/main/sam_hq_vit_b.pth', + 'sam_hq_vit_h.pth': 'https://huggingface.co/lkeab/hq-sam/resolve/main/sam_hq_vit_h.pth', + 'sam_hq_vit_l.pth': 'https://huggingface.co/lkeab/hq-sam/resolve/main/sam_hq_vit_l.pth', + 'sam_hq_vit_tiny.pth': 'https://huggingface.co/lkeab/hq-sam/resolve/main/sam_hq_vit_tiny.pth', + 'bmab_face_nm_yolov8n.pt': 'https://huggingface.co/portu-sim/bmab/resolve/main/bmab_face_nm_yolov8n.pt', + 'bmab_face_sm_yolov8n.pt': 'https://huggingface.co/portu-sim/bmab/resolve/main/bmab_face_sm_yolov8n.pt', + 'bmab_hand_yolov8n.pt': 'https://huggingface.co/portu-sim/bmab/resolve/main/bmab_hand_yolov8n.pt', + 'ControlNetLama.pth': 'https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetLama.pth', + } + + file = os.path.join(bmab_model_path, filename) + if os.path.exists(file): + return file + + if filename in targets: + load_file_from_url(targets[filename], bmab_model_path) + return file + + +def check_models(): + models_hashes = { + 'bmab_face_nm_yolov8n.pt': '82a88926da', + 'bmab_face_sm_yolov8n.pt': '08e2180f5b', + 'bmab_hand_yolov8n.pt': '59bfcfd3bc', + } + + bmab_model_path = os.path.join(models_path, 'bmab') + for model_file, local_hash in models_hashes.items(): + model_path = os.path.join(bmab_model_path, model_file) + if not os.path.exists(model_path): + continue + _hash = calculate_hash(model_path) + debug_print('hash', model_file, _hash, local_hash) + if _hash != local_hash: + debug_print('different hash load', model_file) + os.remove(model_path) + lazy_loader(model_file) + + +def list_pretraining_models(): + bmab_model_path = os.path.join(models_path, "bmab") + files = glob.glob(os.path.join(bmab_model_path, '*.pt')) + return [os.path.basename(f) for f in files] + + +def load_pretraining_model(filename): + bmab_model_path = os.path.join(models_path, "bmab") + return os.path.join(bmab_model_path, filename) + + +def calculate_sha256(filename): + hash_sha256 = hashlib.sha256() + blksize = 1024 * 1024 + + with open(filename, "rb") as f: + for chunk in iter(lambda: f.read(blksize), b""): + hash_sha256.update(chunk) + + return hash_sha256.hexdigest() + + +def calculate_hash(filename): + sha256 = calculate_sha256(filename) + return sha256[:10] + + +def get_scueduler_list(): + ascheduler = [constants.scheduler_default] + try: + 'for sd 1.8 compat' + from modules import sd_schedulers + ascheduler.extend([x.label for x in sd_schedulers.schedulers]) + except: + pass + return ascheduler + + +def get_scheduler(p): + if hasattr(p, 'scheduler'): + return p.scheduler + return constants.scheduler_default diff --git a/3-bmab/sd_bmab/util/__pycache__/__init__.cpython-310.pyc b/3-bmab/sd_bmab/util/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b64012b02dba654d371c7d79d0a7dda915131d65 Binary files /dev/null and b/3-bmab/sd_bmab/util/__pycache__/__init__.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/util/__pycache__/installhelper.cpython-310.pyc b/3-bmab/sd_bmab/util/__pycache__/installhelper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef37f5255f7fb3f4170c83962cbde5a0563cd110 Binary files /dev/null and b/3-bmab/sd_bmab/util/__pycache__/installhelper.cpython-310.pyc differ diff --git a/3-bmab/sd_bmab/util/installhelper.py b/3-bmab/sd_bmab/util/installhelper.py new file mode 100644 index 0000000000000000000000000000000000000000..61ff34580d070577188a7a8483b5e76fce5625a7 --- /dev/null +++ b/3-bmab/sd_bmab/util/installhelper.py @@ -0,0 +1,123 @@ +import shutil + +import launch +from modules import launch_utils +import torch +import platform + + +groundingdino_install_info = { + '2.1.2+cu121:3.10:linux:amd64': 'https://github.com/Bing-su/GroundingDINO/releases/download/v23.9.27/groundingdino-23.9.27+torch2.1.0.cu121-cp310-cp310-manylinux_2_34_x86_64.whl', + '2.1.2+cu121:3.10:windows:amd64': 'https://github.com/Bing-su/GroundingDINO/releases/download/v23.9.27/groundingdino-23.9.27+torch2.1.0.cu121-cp310-cp310-win_amd64.whl', + '2.1.2+cu121:3.11:linux:amd64': 'https://github.com/Bing-su/GroundingDINO/releases/download/v23.9.27/groundingdino-23.9.27+torch2.1.0.cu121-cp311-cp311-manylinux_2_34_x86_64.whl', + '2.1.2+cu121:3.11:windows:amd64': 'https://github.com/Bing-su/GroundingDINO/releases/download/v23.9.27/groundingdino-23.9.27+torch2.1.0.cu121-cp311-cp311-win_amd64.whl', + '2.1.2+cu121:3.9:linux:amd64': 'https://github.com/Bing-su/GroundingDINO/releases/download/v23.9.27/groundingdino-23.9.27+torch2.1.0.cu121-cp39-cp39-manylinux_2_34_x86_64.whl', + '2.1.2+cu121:3.9:windows:amd64': 'https://github.com/Bing-su/GroundingDINO/releases/download/v23.9.27/groundingdino-23.9.27+torch2.1.0.cu121-cp39-cp39-win_amd64.whl', +} + +groundingdino_install_replacement = { + '2.1.0+cu121:3.10:linux:amd64': '2.1.2+cu121:3.10:linux:amd64', + '2.1.1+cu121:3.10:linux:amd64': '2.1.2+cu121:3.10:linux:amd64', +} + + +available_packages = ['GroundingDINO'] + + +def install_groudingdino(url): + launch_utils.run(f'{launch_utils.python} -m pip uninstall --yes groundingdino', live=True) + launch_utils.run(f'{launch_utils.python} -m pip uninstall --yes pycocotools', live=True) + + print('Install pycocotools') + launch.run_pip('install pycocotools', 'sd-webui-bmab requirement: pycocotools') + + print(f'Install groundingdino from {url}. It takes minutes.') + launch.run_pip(f'install {url}', 'sd-webui-bmab requirement: groundingdino') + print('Done.') + + +def get_condition(): + torch_version = torch.__version__ + pv = platform.python_version_tuple() + system = 'windows' if platform.system() == 'Windows' else 'linux' + machine = 'amd64' if platform.machine() == 'AMD64' else 'x86_64' + return f'{torch_version}:{pv[0]}.{pv[1]}:{system}:{machine}' + + +def get_temporary(): + if platform.system() == 'Windows': + return 'c:\\temp' + return '/temp' + + +def install(pkg_name, dd_pkg, markdown_install): + groundingdino_cuda_name = 'GroundingDINO for CUDA' + groundingdino_selected = pkg_name + + def add_new_available(cond): + msg = f'GroundingDINO for CUDA Found {cond}. Please select GroudingDINO or {groundingdino_cuda_name}' + newname = f'{groundingdino_cuda_name}-{cond}' + if newname not in available_packages: + available_packages.append(newname) + return msg, newname + + def install_normal_groundingdino(c): + url = 'https://github.com/IDEA-Research/GroundingDINO' + launch_utils.run(f'{launch_utils.python} -m pip uninstall --yes groundingdino', live=True) + launch_utils.run(f'{launch_utils.python} -m pip uninstall --yes pycocotools', live=True) + launch_utils.run(f'{launch_utils.python} -m pip install pycocotools', live=True) + if platform.system() == 'Windows': + launch_utils.run(f'{launch_utils.git} clone {url} c:\\Temp\\groundingdino', live=True) + launch_utils.run(f'cd c:\\Temp\\groundingdino && {launch_utils.python} -m pip install -e .', live=True) + shutil.rmtree('c:\\Temp\\groundingdino', ignore_errors=True) + else: + launch_utils.run(f'{launch_utils.git} clone {url} /temp/groundingdino', live=True) + launch_utils.run(f'cd /temp/groundingdino && {launch_utils.python} -m pip install -e .', live=True) + shutil.rmtree('rm -rf /temp/groundingdino', ignore_errors=True) + + return f'Nothing found for this cuda system {c}. Software version of GroundingDINO installed (VERY SLOW!!!)' + + def cuda_in_available_packages(): + for x in available_packages: + if x.startswith(groundingdino_cuda_name): + return True + return False + + if pkg_name == 'GroundingDINO': + cond = get_condition() + if cuda_in_available_packages(): + msg = install_normal_groundingdino(cond) + else: + replacement = groundingdino_install_replacement.get(cond) + if replacement is not None: + msg, groundingdino_selected = add_new_available(cond) + else: + groudingdino_for_cuda = groundingdino_install_info.get(cond) + if groudingdino_for_cuda is None: + msg = install_normal_groundingdino(cond) + else: + msg, groundingdino_selected = add_new_available(cond) + elif pkg_name.startswith(groundingdino_cuda_name): + groudingdino_for_cuda_cond = pkg_name[len(groundingdino_cuda_name)+1:] + groudingdino_for_cuda = groundingdino_install_info.get(groudingdino_for_cuda_cond) + if groudingdino_for_cuda is not None: + install_groudingdino(groudingdino_for_cuda) + msg = f'{groundingdino_cuda_name} installed. {groudingdino_for_cuda}' + groundingdino_selected = f'{groundingdino_cuda_name}-{groudingdino_for_cuda_cond}' + else: + msg = 'Nothing installed.' + else: + msg = 'Nothing installed.' + + return { + dd_pkg: { + 'choices': available_packages, + 'value': groundingdino_selected, + '__type__': 'update' + }, + markdown_install: { + 'value': msg, + '__type__': 'update' + } + } +