diff --git a/.gitattributes b/.gitattributes index ad484fe923de846280a036bbdfb200849053dba6..314884085aa9f31db5fd4cb7937064057e671344 100644 --- a/.gitattributes +++ b/.gitattributes @@ -297,3 +297,9 @@ my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/h5py/h5d.cp my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/h5py/h5f.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/wrapt/_wrappers.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/h5py/h5.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0 filter=lfs diff=lfs merge=lfs -text +my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/h5py/h5t.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/torchaudio/_torchaudio.so filter=lfs diff=lfs merge=lfs -text +my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/Pillow.libs/libpng16-52f22300.so.16.37.0 filter=lfs diff=lfs merge=lfs -text +my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scikit_image.libs/libgomp-a34b3233.so.1.0.0 filter=lfs diff=lfs merge=lfs -text +my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scipy.libs/libquadmath-96973f99.so.0.0.0 filter=lfs diff=lfs merge=lfs -text diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/Pillow.libs/libpng16-52f22300.so.16.37.0 b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/Pillow.libs/libpng16-52f22300.so.16.37.0 new file mode 100644 index 0000000000000000000000000000000000000000..0d4a3d14c0c33488036db1437c28772b70a78b18 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/Pillow.libs/libpng16-52f22300.so.16.37.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc800050906e9a3489fe60fbf57f8dca649c9627f87696ce3369f32c9457d56 +size 277816 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/INSTALLER b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..f79e4cb9aaf0b2d9e8ba78861e2071317b2384b3 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/INSTALLER @@ -0,0 +1 @@ +conda \ No newline at end of file diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/LICENSE b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..07074259b61a65feff513bce8b3bd31b30426758 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/LICENSE @@ -0,0 +1,6 @@ +This software is made available under the terms of *either* of the licenses +found in LICENSE.APACHE or LICENSE.BSD. Contributions to cryptography are made +under the terms of *both* these licenses. + +The code used in the OS random engine is derived from CPython, and is licensed +under the terms of the PSF License Agreement. diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/LICENSE.APACHE b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/LICENSE.APACHE new file mode 100644 index 0000000000000000000000000000000000000000..62589edd12a37dd28b6b6fed1e2d728ac9f05c8d --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/LICENSE.APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/LICENSE.PSF b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/LICENSE.PSF new file mode 100644 index 0000000000000000000000000000000000000000..4d3a4f57dea90ef89b53b97499b74ce5296f41ef --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/LICENSE.PSF @@ -0,0 +1,41 @@ +1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and + the Individual or Organization ("Licensee") accessing and otherwise using Python + 2.7.12 software in source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby + grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, + analyze, test, perform and/or display publicly, prepare derivative works, + distribute, and otherwise use Python 2.7.12 alone or in any derivative + version, provided, however, that PSF's License Agreement and PSF's notice of + copyright, i.e., "Copyright © 2001-2016 Python Software Foundation; All Rights + Reserved" are retained in Python 2.7.12 alone or in any derivative version + prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on or + incorporates Python 2.7.12 or any part thereof, and wants to make the + derivative work available to others as provided herein, then Licensee hereby + agrees to include in any such work a brief summary of the changes made to Python + 2.7.12. + +4. PSF is making Python 2.7.12 available to Licensee on an "AS IS" basis. + PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF + EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR + WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE + USE OF PYTHON 2.7.12 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 2.7.12 + FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF + MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.7.12, OR ANY DERIVATIVE + THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any relationship + of agency, partnership, or joint venture between PSF and Licensee. This License + Agreement does not grant permission to use PSF trademarks or trade name in a + trademark sense to endorse or promote products or services of Licensee, or any + third party. + +8. By copying, installing or otherwise using Python 2.7.12, Licensee agrees + to be bound by the terms and conditions of this License Agreement. diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/RECORD b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..36c1729f616ff8e1de44ebee95ade60929674a7e --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/RECORD @@ -0,0 +1,189 @@ +cryptography-3.4.8.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +cryptography-3.4.8.dist-info/LICENSE,sha256=Q9rSzHUqtyHNmp827OcPtTq3cTVR8tPYaU2OjFoG1uI,323 +cryptography-3.4.8.dist-info/LICENSE.APACHE,sha256=qsc7MUj20dcRHbyjIJn2jSbGRMaBOuHk8F9leaomY_4,11360 +cryptography-3.4.8.dist-info/LICENSE.BSD,sha256=YCxMdILeZHndLpeTzaJ15eY9dz2s0eymiSMqtwCPtPs,1532 +cryptography-3.4.8.dist-info/LICENSE.PSF,sha256=aT7ApmKzn5laTyUrA6YiKUVHDBtvEsoCkY5O_g32S58,2415 +cryptography-3.4.8.dist-info/METADATA,sha256=YmATOFBmnPoAnlGZIf8XwOw2GGEcUv9QGgqgusVOqNs,5171 +cryptography-3.4.8.dist-info/RECORD,, +cryptography-3.4.8.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +cryptography-3.4.8.dist-info/WHEEL,sha256=lZtnO0mZxqr35T145kHCmb-YnzMXkVWJhJ6jnbb7-XE,103 +cryptography-3.4.8.dist-info/direct_url.json,sha256=id5KSvB64YamuPh3QnIux4QOgiFnFq41ltIFKfgnthc,108 +cryptography-3.4.8.dist-info/top_level.txt,sha256=rR2wh6A6juD02TBZNJqqonh8x9UP9Sa5Z9Hl1pCPCiM,31 +cryptography/__about__.py,sha256=Gma4uMyERDaqXMloHsN56Lo-XunkiH9-joZKZJPG5a8,805 +cryptography/__init__.py,sha256=qZ9_96xJ8au-AKkdk2Kq60RKN7zGaim_8YY_rAy3_QY,511 +cryptography/__pycache__/__about__.cpython-38.pyc,, +cryptography/__pycache__/__init__.cpython-38.pyc,, +cryptography/__pycache__/exceptions.cpython-38.pyc,, +cryptography/__pycache__/fernet.cpython-38.pyc,, +cryptography/__pycache__/utils.cpython-38.pyc,, +cryptography/exceptions.py,sha256=W25jw80RaAL0NOppZt48x1LSmgqaZqAObTtUExWCh3k,1194 +cryptography/fernet.py,sha256=Kn_d3z5YFnFP2t9pbX9wpsm7nvlrY7oKO3XLthdstmg,6538 +cryptography/hazmat/__init__.py,sha256=OYlvgprzULzZlsf3yYTsd6VUVyQmpsbHjgJdNnsyRwE,418 +cryptography/hazmat/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/__pycache__/_der.cpython-38.pyc,, +cryptography/hazmat/__pycache__/_oid.cpython-38.pyc,, +cryptography/hazmat/__pycache__/_types.cpython-38.pyc,, +cryptography/hazmat/_der.py,sha256=1Kf4nwKRUt56KpG3a9Idgn0YFeUcnYecoN60p5oZRcA,5221 +cryptography/hazmat/_oid.py,sha256=GVsyziASzIVcnAP_C7dx4czeI_VIccYu9GNV03rWjI0,2372 +cryptography/hazmat/_types.py,sha256=TWd5Q_pS_iDOoUdP3MrYbNbPwwM2hSdONh7230eByto,646 +cryptography/hazmat/backends/__init__.py,sha256=StVq0WWDbGTx0nsqMxVclREpGYp4j467m-k87xuDQRY,576 +cryptography/hazmat/backends/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/backends/__pycache__/interfaces.cpython-38.pyc,, +cryptography/hazmat/backends/interfaces.py,sha256=7_PB6ZpxcRhPSXrZcseOy1u9nQcdb6jXpgf_FDliPQU,10472 +cryptography/hazmat/backends/openssl/__init__.py,sha256=7rpz1Z3eV9vZy_d2iLrwC8Oz0vEruDFrjJlc6W2ZDXA,271 +cryptography/hazmat/backends/openssl/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/aead.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/backend.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/ciphers.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/cmac.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/decode_asn1.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/dh.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/dsa.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/ec.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/ed25519.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/ed448.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/encode_asn1.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/hashes.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/hmac.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/ocsp.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/poly1305.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/rsa.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/utils.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/x25519.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/x448.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/__pycache__/x509.cpython-38.pyc,, +cryptography/hazmat/backends/openssl/aead.py,sha256=zt8ZQ-JethHblWEfwAnB5-09JIL9K8qU1NXwPTjeVYA,5700 +cryptography/hazmat/backends/openssl/backend.py,sha256=HC-d83ZUru3Z11Q7UnjFuko8Jp-ZEHCjzkpocJEfctM,105287 +cryptography/hazmat/backends/openssl/ciphers.py,sha256=fUn5DLrbhI_upLKMvU0aX2_An1dOX8T14PgdZXZr6hU,8611 +cryptography/hazmat/backends/openssl/cmac.py,sha256=KXcwF1XlY0Ew6sTBqPj0I1vr62dfMwCjeV3qBosIw8s,2846 +cryptography/hazmat/backends/openssl/decode_asn1.py,sha256=9s52X0DBtY4zSM0-nPze7A7nho3aM5nCbRa5T4bCvEU,32254 +cryptography/hazmat/backends/openssl/dh.py,sha256=cVPA_PKT4BlT4OvHiJm5ZIDmxNeXBnWy2My4uz8wYpo,10565 +cryptography/hazmat/backends/openssl/dsa.py,sha256=eyWzcpZggJuHLD4U3F9-neLyUqIoEN0MAiSwPIcEw2I,10684 +cryptography/hazmat/backends/openssl/ec.py,sha256=AOKJntDH0-vRCH_BquHiC8RpkM4ENFv509IX7Myuong,13371 +cryptography/hazmat/backends/openssl/ed25519.py,sha256=bSlMfJedRoyzZXoJeaehj_0H_j6Ye5doQHgnib602-Q,5789 +cryptography/hazmat/backends/openssl/ed448.py,sha256=dpJf1zt_o8vfVcXYi_PD8d9H-jBbYEp-d6ZIYDKlC1s,5743 +cryptography/hazmat/backends/openssl/encode_asn1.py,sha256=aiTahXPWVoG-e_0a8aSlE-OIosoT605P_SKZOpB-mJM,23988 +cryptography/hazmat/backends/openssl/hashes.py,sha256=_XZc3glydVD88e0qoHqvOuQ_0xfl2sq0ywfZF4dH91s,3090 +cryptography/hazmat/backends/openssl/hmac.py,sha256=ATz-rzSjGiRjL9_I5WJRO3R7QCiujd0izNqYrqPAHsA,2933 +cryptography/hazmat/backends/openssl/ocsp.py,sha256=pV4Js2tyOcZPdeeNjFl835COi200yRTt-0PUx9MRGlY,14617 +cryptography/hazmat/backends/openssl/poly1305.py,sha256=0hJDAb4pl9dJ_2xgt-XkNfyFA6U_IFXCe5jzOg7gkG0,2327 +cryptography/hazmat/backends/openssl/rsa.py,sha256=3GaXjh3j2LwK4idwSHfaqxVMhhDPKftw8CerJDyRLmQ,20919 +cryptography/hazmat/backends/openssl/utils.py,sha256=k3i_ARXsPvGTEtUUbnWkg9CkiJgPP4Y0VTTLtOEzEmU,2283 +cryptography/hazmat/backends/openssl/x25519.py,sha256=kCnWzuchrJn1Nne4zeotKvlkMty9p3VuM8y1EWo70vQ,4622 +cryptography/hazmat/backends/openssl/x448.py,sha256=8OKYMNXDR7UlViU3sNIH5qmLMGP7J-F3OeEaRK0aots,4141 +cryptography/hazmat/backends/openssl/x509.py,sha256=mbiJfQrTu_G3jttY_FXRZvqZ8wkjiHcMiPsPlwVHyOg,22831 +cryptography/hazmat/bindings/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180 +cryptography/hazmat/bindings/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/bindings/_openssl.abi3.so,sha256=PnatPMQXvFl3qIjIdZuEUvpwbX7xKE8920rex6Y2u6g,915208 +cryptography/hazmat/bindings/_padding.abi3.so,sha256=onQk-dheo-36oyeufSX-XN2G_z0sbtrz-_qT5dd2iLk,16568 +cryptography/hazmat/bindings/_rust.abi3.so,sha256=PdNgtLAf8-ZO9SLwuCwvQqoRh-6l4iiJOqQybltT5KU,1741984 +cryptography/hazmat/bindings/openssl/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180 +cryptography/hazmat/bindings/openssl/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/bindings/openssl/__pycache__/_conditional.cpython-38.pyc,, +cryptography/hazmat/bindings/openssl/__pycache__/binding.cpython-38.pyc,, +cryptography/hazmat/bindings/openssl/_conditional.py,sha256=2yZw_Ekya_GKKWUMzUbj3yYrLFZQNproXx1N4HL7TbU,8251 +cryptography/hazmat/bindings/openssl/binding.py,sha256=mIwnL3fICywOLt-iXZIvw2ijSaOIvdYs1Lwk2FUcxYs,5812 +cryptography/hazmat/primitives/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180 +cryptography/hazmat/primitives/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/primitives/__pycache__/_asymmetric.cpython-38.pyc,, +cryptography/hazmat/primitives/__pycache__/_cipheralgorithm.cpython-38.pyc,, +cryptography/hazmat/primitives/__pycache__/_serialization.cpython-38.pyc,, +cryptography/hazmat/primitives/__pycache__/cmac.cpython-38.pyc,, +cryptography/hazmat/primitives/__pycache__/constant_time.cpython-38.pyc,, +cryptography/hazmat/primitives/__pycache__/hashes.cpython-38.pyc,, +cryptography/hazmat/primitives/__pycache__/hmac.cpython-38.pyc,, +cryptography/hazmat/primitives/__pycache__/keywrap.cpython-38.pyc,, +cryptography/hazmat/primitives/__pycache__/padding.cpython-38.pyc,, +cryptography/hazmat/primitives/__pycache__/poly1305.cpython-38.pyc,, +cryptography/hazmat/primitives/_asymmetric.py,sha256=nVJwmxkakirAXfFp410pC4kY_CinzN5FSJwhEn2IE34,485 +cryptography/hazmat/primitives/_cipheralgorithm.py,sha256=sV8-SjhhY4WtHsaLI7e2x4o2cYAAqP8YWBjhC6k1u10,1000 +cryptography/hazmat/primitives/_serialization.py,sha256=nl1g48RG17TWhegK8WKlBlXquMae_lmUSzgZnEqdwbU,1307 +cryptography/hazmat/primitives/asymmetric/__init__.py,sha256=DwsPrun2J00dimo7mq73llEb-O-N4qaOwEx5SwQbleI,909 +cryptography/hazmat/primitives/asymmetric/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/__pycache__/dh.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/__pycache__/dsa.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/__pycache__/ec.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/__pycache__/ed25519.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/__pycache__/ed448.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/__pycache__/padding.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/__pycache__/rsa.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/__pycache__/utils.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/__pycache__/x25519.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/__pycache__/x448.cpython-38.pyc,, +cryptography/hazmat/primitives/asymmetric/dh.py,sha256=dyNhMSOqPNPVuVtvpUNVwPiPHkeqFrKy6lYSPTn4VqI,6303 +cryptography/hazmat/primitives/asymmetric/dsa.py,sha256=TdeZwnJq8ODqcoreu4jr1LFoFYtxA_z_6mhF8dYc5Yg,8116 +cryptography/hazmat/primitives/asymmetric/ec.py,sha256=1e0IpF8SbzrKPbPD4BYTazOaVrVCXMd406x5hzlB3_0,14613 +cryptography/hazmat/primitives/asymmetric/ed25519.py,sha256=Q42f1Cpnlt9UTSfh29T8xcdEgiNaiWr2Wic3sL_eJnk,2719 +cryptography/hazmat/primitives/asymmetric/ed448.py,sha256=SmBsd5pf3RaJoVxETIAcXC_DB6YGsrJUOrWE1BPx3T0,2630 +cryptography/hazmat/primitives/asymmetric/padding.py,sha256=ETdsTtHWSER0ZmTWoCVnWPkG9wvBIxGtal-e6xxl0i4,2115 +cryptography/hazmat/primitives/asymmetric/rsa.py,sha256=Ekxr0B_O2IUre0kw_oIiLJNtx46ADqC6caypjI6d_0w,12004 +cryptography/hazmat/primitives/asymmetric/utils.py,sha256=prIqN-UBc7RfOzFMgM8ON2s3DX8MrXeUlUH1LnmG8gg,1225 +cryptography/hazmat/primitives/asymmetric/x25519.py,sha256=-nbaGlgT1sufO9Ic-urwKDql8Da0U3GL6hZJIMqHgVc,2588 +cryptography/hazmat/primitives/asymmetric/x448.py,sha256=38mR8pqTBFWz5Emv9cQGlqtv_Qg37Bmrla0kRc2HmrU,2549 +cryptography/hazmat/primitives/ciphers/__init__.py,sha256=njx_RoatYaxZD0rYhYGi84WQnTZkMSpK67UfWIqkQpE,582 +cryptography/hazmat/primitives/ciphers/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/primitives/ciphers/__pycache__/aead.cpython-38.pyc,, +cryptography/hazmat/primitives/ciphers/__pycache__/algorithms.cpython-38.pyc,, +cryptography/hazmat/primitives/ciphers/__pycache__/base.cpython-38.pyc,, +cryptography/hazmat/primitives/ciphers/__pycache__/modes.cpython-38.pyc,, +cryptography/hazmat/primitives/ciphers/aead.py,sha256=eKzVH2mf-5aFSaBOG9JnJAAd7XBnf9w4BH2Uu2ZT01w,6833 +cryptography/hazmat/primitives/ciphers/algorithms.py,sha256=EEJCTrUCe8iHN2O1f_bwR2UqhOemhi53-34WsQ6DddI,3829 +cryptography/hazmat/primitives/ciphers/base.py,sha256=w8_AWJwX1PrWpvjeB-_RF3iobalR3Hu3HIMDOMr92c8,7164 +cryptography/hazmat/primitives/ciphers/modes.py,sha256=mOnOgXyoD0N9NsSOkZvA8qMA3V5O7HubVwYiWVJvRFs,6549 +cryptography/hazmat/primitives/cmac.py,sha256=Kkzk8VQHe-_cYeVab24S4ODMWJOZkC4bLWLvCoMWyvQ,2158 +cryptography/hazmat/primitives/constant_time.py,sha256=6bkW00QjhKusdgsQbexXhMlGX0XRN59XNmxWS2W38NA,387 +cryptography/hazmat/primitives/hashes.py,sha256=cLNJcKKsI8E6ZhENKkppsJ_8S6W97y0tHzXa-ABBhtY,6051 +cryptography/hazmat/primitives/hmac.py,sha256=rhrLt6LwlzbIvnqpmOQVT6L_4Xd9xBsUBunPCkHcvWs,2332 +cryptography/hazmat/primitives/kdf/__init__.py,sha256=DcZhzfLG8d8IYBH771lGTVU5S87OQDpu3nrfOwZnsmA,715 +cryptography/hazmat/primitives/kdf/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/primitives/kdf/__pycache__/concatkdf.cpython-38.pyc,, +cryptography/hazmat/primitives/kdf/__pycache__/hkdf.cpython-38.pyc,, +cryptography/hazmat/primitives/kdf/__pycache__/kbkdf.cpython-38.pyc,, +cryptography/hazmat/primitives/kdf/__pycache__/pbkdf2.cpython-38.pyc,, +cryptography/hazmat/primitives/kdf/__pycache__/scrypt.cpython-38.pyc,, +cryptography/hazmat/primitives/kdf/__pycache__/x963kdf.cpython-38.pyc,, +cryptography/hazmat/primitives/kdf/concatkdf.py,sha256=F9wepne-IRmhTZ9J4H_XLDI0Rl8LccY6wvhVA0jQ4Tc,4576 +cryptography/hazmat/primitives/kdf/hkdf.py,sha256=doR70wjOcA56hxhhQtV2M-ekajjjr5hoT5F8KMxoZdo,3807 +cryptography/hazmat/primitives/kdf/kbkdf.py,sha256=teuWbRvCZShWiRnv0eg-sXrxm-g7Ss02Ulb3vVbzPvc,5195 +cryptography/hazmat/primitives/kdf/pbkdf2.py,sha256=4HaLcppspYe8od6vur0E408qYgQPjJKtI9kDrWesIdo,2261 +cryptography/hazmat/primitives/kdf/scrypt.py,sha256=vCMYGRp-Q--9DxiDQHbkVVRXkhrQTR0qkC0LriV6Hy8,2248 +cryptography/hazmat/primitives/kdf/x963kdf.py,sha256=N5-2KOA2Z-7kAxjhhU5quNcRpmThyQC5dhU-Cw95jWk,2458 +cryptography/hazmat/primitives/keywrap.py,sha256=ibpVZ19OGcoEVrSE7cizdoMDdRDaqcATeVRK5_4MCO4,5927 +cryptography/hazmat/primitives/padding.py,sha256=PYlgTNHZUYROnQZ1oeeqKm1WyzkqLlwIpRUgdASHOG8,6193 +cryptography/hazmat/primitives/poly1305.py,sha256=_Dtv6oCMn94rAhQ6pjie9mO_MiDLVL5It3Z5sdpCU3c,1711 +cryptography/hazmat/primitives/serialization/__init__.py,sha256=RALEthF7wRjlMyTvSq09XmKQey74tsSdDCCsDaD6yQU,1129 +cryptography/hazmat/primitives/serialization/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/primitives/serialization/__pycache__/base.cpython-38.pyc,, +cryptography/hazmat/primitives/serialization/__pycache__/pkcs12.cpython-38.pyc,, +cryptography/hazmat/primitives/serialization/__pycache__/pkcs7.cpython-38.pyc,, +cryptography/hazmat/primitives/serialization/__pycache__/ssh.cpython-38.pyc,, +cryptography/hazmat/primitives/serialization/base.py,sha256=OYqk2UnIR5IAKP1QRNifhoQw-HX3etcWudn3W2JVIyg,1440 +cryptography/hazmat/primitives/serialization/pkcs12.py,sha256=JuWr5Vqz6zEpjh3j7ME1SCk3TFDNhONjQds_Se7XpFg,2270 +cryptography/hazmat/primitives/serialization/pkcs7.py,sha256=CsmnGEbtLKm2o6D7h_a-EvHQOfwlHxrV96VkjnrNX7s,5223 +cryptography/hazmat/primitives/serialization/ssh.py,sha256=doX0irj_Q1wd1N_JU-Xic_5zUkMH_zZKcQUUOB-axGk,22293 +cryptography/hazmat/primitives/twofactor/__init__.py,sha256=ZHo4zwWidFP2RWFl8luiNuYkVMZPghzx54izPNSCtD4,222 +cryptography/hazmat/primitives/twofactor/__pycache__/__init__.cpython-38.pyc,, +cryptography/hazmat/primitives/twofactor/__pycache__/hotp.cpython-38.pyc,, +cryptography/hazmat/primitives/twofactor/__pycache__/totp.cpython-38.pyc,, +cryptography/hazmat/primitives/twofactor/__pycache__/utils.cpython-38.pyc,, +cryptography/hazmat/primitives/twofactor/hotp.py,sha256=JXph-N0S8CDM-laRoV_G-Welhn7PvcpgXTxRbp_yEjk,2826 +cryptography/hazmat/primitives/twofactor/totp.py,sha256=2GTFsdUdA585-N_sqfPhlBBWDY-ExaH1HKH1p3XPWmk,1912 +cryptography/hazmat/primitives/twofactor/utils.py,sha256=8TG5oyaz8CxHCXqqh26iAny9w_W1e9SgVdCZaeEzOwU,982 +cryptography/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +cryptography/utils.py,sha256=dyYUz2jr1tTsYQ3SaX3_cBYu720kopdatNy_83L1Mkc,4861 +cryptography/x509/__init__.py,sha256=4_Xsv7yVMCGbpIbSgc4SPxDX-3Mn83gN07Us1PAM_eA,7634 +cryptography/x509/__pycache__/__init__.cpython-38.pyc,, +cryptography/x509/__pycache__/base.cpython-38.pyc,, +cryptography/x509/__pycache__/certificate_transparency.cpython-38.pyc,, +cryptography/x509/__pycache__/extensions.cpython-38.pyc,, +cryptography/x509/__pycache__/general_name.cpython-38.pyc,, +cryptography/x509/__pycache__/name.cpython-38.pyc,, +cryptography/x509/__pycache__/ocsp.cpython-38.pyc,, +cryptography/x509/__pycache__/oid.cpython-38.pyc,, +cryptography/x509/base.py,sha256=duSe4bIuBiJ5g2NC8-VSxDfqHZ0CEEcXZKhcBGq-eeA,28193 +cryptography/x509/certificate_transparency.py,sha256=rzJvxd1FVfc5gOjUT-T2VF5vcOC597UrrI_5JJwZprI,979 +cryptography/x509/extensions.py,sha256=M-n_8gEjO5_03ufGHoK_6w8YSSiNyWvHUJ5Kgq5zoN4,54019 +cryptography/x509/general_name.py,sha256=5dld2ktZnCEg3l14UyKk6DSlzFHXlc6WxW5J8R8Mk-Q,8161 +cryptography/x509/name.py,sha256=PpRua5nWFLZtOg77XdaybGVNspO8ZvQ7ddNDn203vys,8529 +cryptography/x509/ocsp.py,sha256=ERB5osTWbNieLj945Xoq0NjBkzqodo_WBL7ORaC2fDg,14738 +cryptography/x509/oid.py,sha256=1PxP9Pr_lh77zqyvTJefeRozK3VYaRlNmWfYfDWr2Ak,12619 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/REQUESTED b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/WHEEL b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..7ae20a049f3296c665c3bc701f4e6f33be833438 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.37.0) +Root-Is-Purelib: false +Tag: cp38-cp38-linux_x86_64 + diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/top_level.txt b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..52ccfc6e37b267e56858371a3d248b87d62f1037 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/cryptography-3.4.8.dist-info/top_level.txt @@ -0,0 +1,3 @@ +_openssl +_padding +cryptography diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/docker_pycreds-0.4.0.dist-info/INSTALLER b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/docker_pycreds-0.4.0.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/docker_pycreds-0.4.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c6ef0e6a7c9903ea5f359baf811c0f02cb836d89 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/__init__.py @@ -0,0 +1,4 @@ +# flake8: noqa +from .store import Store +from .errors import StoreError, CredentialsNotFound +from .constants import * \ No newline at end of file diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/constants.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..6a82d8da423f9f342c19f6f023e1ee2bc4a4d7bc --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/constants.py @@ -0,0 +1,4 @@ +PROGRAM_PREFIX = 'docker-credential-' +DEFAULT_LINUX_STORE = 'secretservice' +DEFAULT_OSX_STORE = 'osxkeychain' +DEFAULT_WIN32_STORE = 'wincred' diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/errors.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/errors.py new file mode 100644 index 0000000000000000000000000000000000000000..9e4695ca9d98d69e294c9cf483eb4f69df4b1560 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/errors.py @@ -0,0 +1,25 @@ +class StoreError(RuntimeError): + pass + + +class CredentialsNotFound(StoreError): + pass + + +class InitializationError(StoreError): + pass + + +def process_store_error(cpe, program): + message = cpe.output.decode('utf-8') + if 'credentials not found in native keychain' in message: + return CredentialsNotFound( + 'No matching credentials in {0}'.format( + program + ) + ) + return StoreError( + 'Credentials store {0} exited with "{1}".'.format( + program, cpe.output.decode('utf-8').strip() + ) + ) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/store.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/store.py new file mode 100644 index 0000000000000000000000000000000000000000..728451222166455e6a50c4eda94cccb096dd0a11 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/store.py @@ -0,0 +1,107 @@ +import json +import os +import subprocess + +import six + +from . import constants +from . import errors +from .utils import create_environment_dict +from .utils import find_executable + + +class Store(object): + def __init__(self, program, environment=None): + """ Create a store object that acts as an interface to + perform the basic operations for storing, retrieving + and erasing credentials using `program`. + """ + self.program = constants.PROGRAM_PREFIX + program + self.exe = find_executable(self.program) + self.environment = environment + if self.exe is None: + raise errors.InitializationError( + '{0} not installed or not available in PATH'.format( + self.program + ) + ) + + def get(self, server): + """ Retrieve credentials for `server`. If no credentials are found, + a `StoreError` will be raised. + """ + if not isinstance(server, six.binary_type): + server = server.encode('utf-8') + data = self._execute('get', server) + result = json.loads(data.decode('utf-8')) + + # docker-credential-pass will return an object for inexistent servers + # whereas other helpers will exit with returncode != 0. For + # consistency, if no significant data is returned, + # raise CredentialsNotFound + if result['Username'] == '' and result['Secret'] == '': + raise errors.CredentialsNotFound( + 'No matching credentials in {}'.format(self.program) + ) + + return result + + def store(self, server, username, secret): + """ Store credentials for `server`. Raises a `StoreError` if an error + occurs. + """ + data_input = json.dumps({ + 'ServerURL': server, + 'Username': username, + 'Secret': secret + }).encode('utf-8') + return self._execute('store', data_input) + + def erase(self, server): + """ Erase credentials for `server`. Raises a `StoreError` if an error + occurs. + """ + if not isinstance(server, six.binary_type): + server = server.encode('utf-8') + self._execute('erase', server) + + def list(self): + """ List stored credentials. Requires v0.4.0+ of the helper. + """ + data = self._execute('list', None) + return json.loads(data.decode('utf-8')) + + def _execute(self, subcmd, data_input): + output = None + env = create_environment_dict(self.environment) + try: + if six.PY3: + output = subprocess.check_output( + [self.exe, subcmd], input=data_input, env=env, + ) + else: + process = subprocess.Popen( + [self.exe, subcmd], stdin=subprocess.PIPE, + stdout=subprocess.PIPE, env=env, + ) + output, err = process.communicate(data_input) + if process.returncode != 0: + raise subprocess.CalledProcessError( + returncode=process.returncode, cmd='', output=output + ) + except subprocess.CalledProcessError as e: + raise errors.process_store_error(e, self.program) + except OSError as e: + if e.errno == os.errno.ENOENT: + raise errors.StoreError( + '{0} not installed or not available in PATH'.format( + self.program + ) + ) + else: + raise errors.StoreError( + 'Unexpected OS error "{0}", errno={1}'.format( + e.strerror, e.errno + ) + ) + return output diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/utils.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3f720ef1a7c2be69832f9eadb417abc8c377b2ac --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/utils.py @@ -0,0 +1,38 @@ +import distutils.spawn +import os +import sys + + +def find_executable(executable, path=None): + """ + As distutils.spawn.find_executable, but on Windows, look up + every extension declared in PATHEXT instead of just `.exe` + """ + if sys.platform != 'win32': + return distutils.spawn.find_executable(executable, path) + + if path is None: + path = os.environ['PATH'] + + paths = path.split(os.pathsep) + extensions = os.environ.get('PATHEXT', '.exe').split(os.pathsep) + base, ext = os.path.splitext(executable) + + if not os.path.isfile(executable): + for p in paths: + for ext in extensions: + f = os.path.join(p, base + ext) + if os.path.isfile(f): + return f + return None + else: + return executable + + +def create_environment_dict(overrides): + """ + Create and return a copy of os.environ with the specified overrides + """ + result = os.environ.copy() + result.update(overrides or {}) + return result diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/version.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/version.py new file mode 100644 index 0000000000000000000000000000000000000000..df0b6bbe4126b0fd1a7555a3c2257311119b573e --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/dockerpycreds/version.py @@ -0,0 +1,2 @@ +version = "0.4.0" +version_info = tuple([int(d) for d in version.split("-")[0].split(".")]) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/LICENSE b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..3a654e906619009358eb2cfe80609bd12b43fa7f --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Alex Rogozhnikov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/METADATA b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..dab1c5d48716db88e6558d05b75cb506c6e1624b --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/METADATA @@ -0,0 +1,309 @@ +Metadata-Version: 2.1 +Name: einops +Version: 0.4.1 +Summary: A new flavour of deep learning operations +Home-page: https://github.com/arogozhnikov/einops +Author: Alex Rogozhnikov +License: UNKNOWN +Keywords: deep learning,neural networks,tensor manipulation,machine learning,scientific computations,einops +Platform: UNKNOWN +Classifier: Intended Audience :: Science/Research +Classifier: Programming Language :: Python :: 3 +Description-Content-Type: text/markdown +License-File: LICENSE + + + + + + +
+ einops package examples +
+
This video in high quality (mp4) +

+
+ + +# einops +[![Run tests](https://github.com/arogozhnikov/einops/actions/workflows/run_tests.yml/badge.svg)](https://github.com/arogozhnikov/einops/actions/workflows/run_tests.yml) +[![PyPI version](https://badge.fury.io/py/einops.svg)](https://badge.fury.io/py/einops) +[![Documentation](https://img.shields.io/badge/documentation-link-blue.svg)](https://einops.rocks/) +![Supported python versions](https://raw.githubusercontent.com/arogozhnikov/einops/master/docs/resources/python_badge.svg) + + +Flexible and powerful tensor operations for readable and reliable code. +Supports numpy, pytorch, tensorflow, jax, and [others](#supported-frameworks). + +## Recent updates: + +- torch.jit.script is supported for pytorch layers +- powerful EinMix added to einops. [Einmix tutorial notebook](https://github.com/arogozhnikov/einops/blob/master/docs/3-einmix-layer.ipynb) + + + +## Tweets + +> In case you need convincing arguments for setting aside time to learn about einsum and einops... +[Tim Rocktäschel, FAIR](https://twitter.com/_rockt/status/1230818967205425152) + +> Writing better code with PyTorch and einops 👌 +[Andrej Karpathy, AI at Tesla](https://twitter.com/karpathy/status/1290826075916779520) + +> Slowly but surely, einops is seeping in to every nook and cranny of my code. If you find yourself shuffling around bazillion dimensional tensors, this might change your life +[Nasim Rahaman, MILA (Montreal)](https://twitter.com/nasim_rahaman/status/1216022614755463169) + +[More testimonials](https://einops.rocks/pages/testimonials/) + +## Contents + +- [Installation](#Installation) +- [Documentation](https://einops.rocks/) +- [Tutorial](#Tutorials) +- [API micro-reference](#API) +- [Why using einops](#Why-using-einops-notation) +- [Supported frameworks](#Supported-frameworks) +- [Contributing](#Contributing) +- [Repository](https://github.com/arogozhnikov/einops) and [discussions](https://github.com/arogozhnikov/einops/discussions) + +## Installation + +Plain and simple: +```bash +pip install einops +``` + + + +## Tutorials + +Tutorials are the most convenient way to see `einops` in action + +- part 1: [einops fundamentals](https://github.com/arogozhnikov/einops/blob/master/docs/1-einops-basics.ipynb) +- part 2: [einops for deep learning](https://github.com/arogozhnikov/einops/blob/master/docs/2-einops-for-deep-learning.ipynb) +- part 3: [improve pytorch code with einops](https://arogozhnikov.github.io/einops/pytorch-examples.html) + + +## API + +`einops` has a minimalistic yet powerful API. + +Three operations provided ([einops tutorial](https://github.com/arogozhnikov/einops/blob/master/docs/) +shows those cover stacking, reshape, transposition, squeeze/unsqueeze, repeat, tile, concatenate, view and numerous reductions) + +```python +from einops import rearrange, reduce, repeat +# rearrange elements according to the pattern +output_tensor = rearrange(input_tensor, 't b c -> b c t') +# combine rearrangement and reduction +output_tensor = reduce(input_tensor, 'b c (h h2) (w w2) -> b h w c', 'mean', h2=2, w2=2) +# copy along a new axis +output_tensor = repeat(input_tensor, 'h w -> h w c', c=3) +``` +And two corresponding layers (`einops` keeps a separate version for each framework) with the same API. + +```python +from einops.layers.chainer import Rearrange, Reduce +from einops.layers.gluon import Rearrange, Reduce +from einops.layers.keras import Rearrange, Reduce +from einops.layers.torch import Rearrange, Reduce +from einops.layers.tensorflow import Rearrange, Reduce +``` + +Layers behave similarly to operations and have the same parameters +(with the exception of the first argument, which is passed during call) + +```python +layer = Rearrange(pattern, **axes_lengths) +layer = Reduce(pattern, reduction, **axes_lengths) + +# apply created layer to a tensor / variable +x = layer(x) +``` + +Example of using layers within a model: +```python +# example given for pytorch, but code in other frameworks is almost identical +from torch.nn import Sequential, Conv2d, MaxPool2d, Linear, ReLU +from einops.layers.torch import Rearrange + +model = Sequential( + Conv2d(3, 6, kernel_size=5), + MaxPool2d(kernel_size=2), + Conv2d(6, 16, kernel_size=5), + MaxPool2d(kernel_size=2), + # flattening + Rearrange('b c h w -> b (c h w)'), + Linear(16*5*5, 120), + ReLU(), + Linear(120, 10), +) +``` + + + +## Naming + +`einops` stands for Einstein-Inspired Notation for operations +(though "Einstein operations" is more attractive and easier to remember). + +Notation was loosely inspired by Einstein summation (in particular by `numpy.einsum` operation). + +## Why use `einops` notation?! + + +### Semantic information (being verbose in expectations) + +```python +y = x.view(x.shape[0], -1) +y = rearrange(x, 'b c h w -> b (c h w)') +``` +While these two lines are doing the same job in *some* context, +the second one provides information about the input and output. +In other words, `einops` focuses on interface: *what is the input and output*, not *how* the output is computed. + +The next operation looks similar: + +```python +y = rearrange(x, 'time c h w -> time (c h w)') +``` +but it gives the reader a hint: +this is not an independent batch of images we are processing, +but rather a sequence (video). + +Semantic information makes the code easier to read and maintain. + +### Convenient checks + +Reconsider the same example: + +```python +y = x.view(x.shape[0], -1) # x: (batch, 256, 19, 19) +y = rearrange(x, 'b c h w -> b (c h w)') +``` +The second line checks that the input has four dimensions, +but you can also specify particular dimensions. +That's opposed to just writing comments about shapes since +[comments don't work and don't prevent mistakes](https://medium.freecodecamp.org/code-comments-the-good-the-bad-and-the-ugly-be9cc65fbf83) +as we know +```python +y = x.view(x.shape[0], -1) # x: (batch, 256, 19, 19) +y = rearrange(x, 'b c h w -> b (c h w)', c=256, h=19, w=19) +``` + +### Result is strictly determined + +Below we have at least two ways to define the depth-to-space operation +```python +# depth-to-space +rearrange(x, 'b c (h h2) (w w2) -> b (c h2 w2) h w', h2=2, w2=2) +rearrange(x, 'b c (h h2) (w w2) -> b (h2 w2 c) h w', h2=2, w2=2) +``` +There are at least four more ways to do it. Which one is used by the framework? + +These details are ignored, since *usually* it makes no difference, +but it can make a big difference (e.g. if you use grouped convolutions in the next stage), +and you'd like to specify this in your code. + + +### Uniformity + +```python +reduce(x, 'b c (x dx) -> b c x', 'max', dx=2) +reduce(x, 'b c (x dx) (y dy) -> b c x y', 'max', dx=2, dy=3) +reduce(x, 'b c (x dx) (y dy) (z dz) -> b c x y z', 'max', dx=2, dy=3, dz=4) +``` +These examples demonstrated that we don't use separate operations for 1d/2d/3d pooling, +those are all defined in a uniform way. + +Space-to-depth and depth-to space are defined in many frameworks but how about width-to-height? Here you go: + +```python +rearrange(x, 'b c h (w w2) -> b c (h w2) w', w2=2) +``` + +### Framework independent behavior + +Even simple functions are defined differently by different frameworks + +```python +y = x.flatten() # or flatten(x) +``` + +Suppose `x`'s shape was `(3, 4, 5)`, then `y` has shape ... + +- numpy, cupy, chainer, pytorch: `(60,)` +- keras, tensorflow.layers, mxnet and gluon: `(3, 20)` + +`einops` works the same way in all frameworks. + +### Independence of framework terminology + +Example: `tile` vs `repeat` causes lots of confusion. To copy image along width: +```python +np.tile(image, (1, 2)) # in numpy +image.repeat(1, 2) # pytorch's repeat ~ numpy's tile +``` + +With einops you don't need to decipher which axis was repeated: +```python +repeat(image, 'h w -> h (tile w)', tile=2) # in numpy +repeat(image, 'h w -> h (tile w)', tile=2) # in pytorch +repeat(image, 'h w -> h (tile w)', tile=2) # in tf +repeat(image, 'h w -> h (tile w)', tile=2) # in jax +repeat(image, 'h w -> h (tile w)', tile=2) # in mxnet +... (etc.) +``` + +Testimonials provide user's perspective on the same question. + +## Supported frameworks + +Einops works with ... + +- [numpy](http://www.numpy.org/) +- [pytorch](https://pytorch.org/) +- [tensorflow](https://www.tensorflow.org/) +- [jax](https://github.com/google/jax) +- [cupy](https://cupy.chainer.org/) +- [chainer](https://chainer.org/) +- [gluon](https://gluon.mxnet.io/) +- [tf.keras](https://www.tensorflow.org/guide/keras) +- [mxnet](https://mxnet.apache.org/) (experimental) + + +## Contributing + +Best ways to contribute are + +- spread the word about `einops` +- if you like explaining things, more tutorials/tear-downs of implementations is welcome +- tutorials in other languages are very welcome +- do you have project/code example to share? Let me know in github discussions +- use `einops` in your papers! + +## Supported python versions + +`einops` works with python 3.6 or later. + + diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/RECORD b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..c83ca1081bb599f3c03a081c791d14215c40f59e --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/RECORD @@ -0,0 +1,31 @@ +einops-0.4.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +einops-0.4.1.dist-info/LICENSE,sha256=MNmENkKW9R_67K1LAe4SfpUlDFBokY1LZvyWIGcj5DQ,1073 +einops-0.4.1.dist-info/METADATA,sha256=UdOBa4tijnwPJI48dGASJt4-czHTJ4LLiY4dfdRXffI,10737 +einops-0.4.1.dist-info/RECORD,, +einops-0.4.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +einops-0.4.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92 +einops-0.4.1.dist-info/top_level.txt,sha256=zh9ckJ4QUP-fUBSO5-UKAcNKvC_lzMGMYS6_nnoT4Tc,7 +einops/__init__.py,sha256=8uWtV9MPDvreSsu6VG0E-j2TlxrckMGW2Zy5fCRFu6I,297 +einops/__pycache__/__init__.cpython-38.pyc,, +einops/__pycache__/_backends.cpython-38.pyc,, +einops/__pycache__/_torch_specific.cpython-38.pyc,, +einops/__pycache__/einops.cpython-38.pyc,, +einops/__pycache__/parsing.cpython-38.pyc,, +einops/_backends.py,sha256=xyh2XysbubzGMqyRZPu8ld5-1bob693ESp6vkZR4gV8,17132 +einops/_torch_specific.py,sha256=-VuZFXozi6GPtKQFWvb7BhzswyOfjByXGG0GLvvhDXg,2804 +einops/einops.py,sha256=LkY-JdbOUqf_GtuiskedET4aaSs0yJKfVaLoBwS6UP8,27879 +einops/layers/__init__.py,sha256=JHwHQUP5sBIYhSwRrjhZYxGIdw8-UTEWUPbeEduBuBY,2824 +einops/layers/__pycache__/__init__.cpython-38.pyc,, +einops/layers/__pycache__/_einmix.cpython-38.pyc,, +einops/layers/__pycache__/chainer.cpython-38.pyc,, +einops/layers/__pycache__/gluon.cpython-38.pyc,, +einops/layers/__pycache__/keras.cpython-38.pyc,, +einops/layers/__pycache__/tensorflow.cpython-38.pyc,, +einops/layers/__pycache__/torch.cpython-38.pyc,, +einops/layers/_einmix.py,sha256=k1Wt5z7KmJF9nj345ZhUXeRBcV1D2bNkz35yF82zB_E,8249 +einops/layers/chainer.py,sha256=VisqqyZiEpDl7NdCSjVSa4u7aXgZuNpA0hglkfGydiM,1927 +einops/layers/gluon.py,sha256=Ll85s1OWKqRAhSwFS33jQwbTicD1MnhrH4lbnlqvoPU,2101 +einops/layers/keras.py,sha256=RTsR-aim1Sco5VXI2W1Qs639hJRJ0hWIilTZCs3Ftn4,212 +einops/layers/tensorflow.py,sha256=xNsVaKIMoB2kZeSeFUKXq29LWz-Fppt2K2aRln5s0-Y,3269 +einops/layers/torch.py,sha256=IOdwPR2uL_ZFuzWthGz6p-8af1zg801UmjB8uTBA5HY,2379 +einops/parsing.py,sha256=75hvgp6iWvvLUe67IaQujmox1tjvF9ZsBMaXQYnQmqU,6637 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/REQUESTED b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/WHEEL b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..becc9a66ea739ba941d48a749e248761cc6e658a --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.37.1) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/top_level.txt b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..d27fa26c681221a38b4a3e34e9734be8e6e93952 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/einops-0.4.1.dist-info/top_level.txt @@ -0,0 +1 @@ +einops diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4cf3b507d7f908a3eff1b3e01db7164ee9209807 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/__init__.py @@ -0,0 +1,51 @@ +""" +A platform independent file lock that supports the with-statement. + +.. autodata:: filelock.__version__ + :no-value: + +""" +from __future__ import annotations + +import sys +import warnings +from typing import TYPE_CHECKING + +from ._api import AcquireReturnProxy, BaseFileLock +from ._error import Timeout +from ._soft import SoftFileLock +from ._unix import UnixFileLock, has_fcntl +from ._windows import WindowsFileLock +from .version import version + +#: version of the project as a string +__version__: str = version + + +if sys.platform == "win32": # pragma: win32 cover + _FileLock: type[BaseFileLock] = WindowsFileLock +else: # pragma: win32 no cover # noqa: PLR5501 + if has_fcntl: + _FileLock: type[BaseFileLock] = UnixFileLock + else: + _FileLock = SoftFileLock + if warnings is not None: + warnings.warn("only soft file lock is available", stacklevel=2) + +if TYPE_CHECKING: + FileLock = SoftFileLock +else: + #: Alias for the lock, which should be used for the current platform. + FileLock = _FileLock + + +__all__ = [ + "__version__", + "FileLock", + "SoftFileLock", + "Timeout", + "UnixFileLock", + "WindowsFileLock", + "BaseFileLock", + "AcquireReturnProxy", +] diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/_error.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/_error.py new file mode 100644 index 0000000000000000000000000000000000000000..f7ff08c0f508ad7077eb6ed1990898840c952b3a --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/_error.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Any + + +class Timeout(TimeoutError): # noqa: N818 + """Raised when the lock could not be acquired in *timeout* seconds.""" + + def __init__(self, lock_file: str) -> None: + super().__init__() + self._lock_file = lock_file + + def __reduce__(self) -> str | tuple[Any, ...]: + return self.__class__, (self._lock_file,) # Properly pickle the exception + + def __str__(self) -> str: + return f"The file lock '{self._lock_file}' could not be acquired." + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.lock_file!r})" + + @property + def lock_file(self) -> str: + """:return: The path of the file lock.""" + return self._lock_file + + +__all__ = [ + "Timeout", +] diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/_soft.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/_soft.py new file mode 100644 index 0000000000000000000000000000000000000000..28c67f74cc82b8f55e47afd6a71972cc1fb95eb6 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/_soft.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import EACCES, EEXIST +from pathlib import Path + +from ._api import BaseFileLock +from ._util import ensure_directory_exists, raise_on_not_writable_file + + +class SoftFileLock(BaseFileLock): + """Simply watches the existence of the lock file.""" + + def _acquire(self) -> None: + raise_on_not_writable_file(self.lock_file) + ensure_directory_exists(self.lock_file) + # first check for exists and read-only mode as the open will mask this case as EEXIST + flags = ( + os.O_WRONLY # open for writing only + | os.O_CREAT + | os.O_EXCL # together with above raise EEXIST if the file specified by filename exists + | os.O_TRUNC # truncate the file to zero byte + ) + try: + file_handler = os.open(self.lock_file, flags, self._context.mode) + except OSError as exception: # re-raise unless expected exception + if not ( + exception.errno == EEXIST # lock already exist + or (exception.errno == EACCES and sys.platform == "win32") # has no access to this lock + ): # pragma: win32 no cover + raise + else: + self._context.lock_file_fd = file_handler + + def _release(self) -> None: + assert self._context.lock_file_fd is not None # noqa: S101 + os.close(self._context.lock_file_fd) # the lock file is definitely not None + self._context.lock_file_fd = None + with suppress(OSError): # the file is already deleted and that's what we want + Path(self.lock_file).unlink() + + +__all__ = [ + "SoftFileLock", +] diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/_windows.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/_windows.py new file mode 100644 index 0000000000000000000000000000000000000000..8db55dcbaa3e7bab091781b17ce22fde1fc239f2 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/_windows.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import EACCES +from pathlib import Path +from typing import cast + +from ._api import BaseFileLock +from ._util import ensure_directory_exists, raise_on_not_writable_file + +if sys.platform == "win32": # pragma: win32 cover + import msvcrt + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems.""" + + def _acquire(self) -> None: + raise_on_not_writable_file(self.lock_file) + ensure_directory_exists(self.lock_file) + flags = ( + os.O_RDWR # open for read and write + | os.O_CREAT # create file if not exists + | os.O_TRUNC # truncate file if not empty + ) + try: + fd = os.open(self.lock_file, flags, self._context.mode) + except OSError as exception: + if exception.errno != EACCES: # has no access to this lock + raise + else: + try: + msvcrt.locking(fd, msvcrt.LK_NBLCK, 1) + except OSError as exception: + os.close(fd) # close file first + if exception.errno != EACCES: # file is already locked + raise + else: + self._context.lock_file_fd = fd + + def _release(self) -> None: + fd = cast(int, self._context.lock_file_fd) + self._context.lock_file_fd = None + msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) + os.close(fd) + + with suppress(OSError): # Probably another instance of the application hat acquired the file lock. + Path(self.lock_file).unlink() + +else: # pragma: win32 no cover + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + + +__all__ = [ + "WindowsFileLock", +] diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/version.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/version.py new file mode 100644 index 0000000000000000000000000000000000000000..cc9fc1550b3b64cc4ff85291e33b6cb0a745af97 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/filelock/version.py @@ -0,0 +1,16 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple, Union + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = '3.13.1' +__version_tuple__ = version_tuple = (3, 13, 1) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/h5py/h5t.cpython-38-x86_64-linux-gnu.so b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/h5py/h5t.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..032bb22a228b3bfe478fb321e12d6b22973aa10b --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/h5py/h5t.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb221d3a6600c61dd539917c6744220aa72dbd66b25a24795eeea1f3e43ab4a9 +size 950896 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/_dask.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/_dask.py new file mode 100644 index 0000000000000000000000000000000000000000..e759b7fd890efc291d254d5cec35dd070dc403cb --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/_dask.py @@ -0,0 +1,364 @@ +from __future__ import print_function, division, absolute_import + +import asyncio +import concurrent.futures +import contextlib + +import time +from uuid import uuid4 +import weakref + +from .parallel import AutoBatchingMixin, ParallelBackendBase, BatchedCalls +from .parallel import parallel_backend + +try: + import distributed +except ImportError: + distributed = None + +if distributed is not None: + from dask.utils import funcname, itemgetter + from dask.sizeof import sizeof + from dask.distributed import ( + Client, + as_completed, + get_client, + secede, + rejoin + ) + from distributed.utils import thread_state + + try: + # asyncio.TimeoutError, Python3-only error thrown by recent versions of + # distributed + from distributed.utils import TimeoutError as _TimeoutError + except ImportError: + from tornado.gen import TimeoutError as _TimeoutError + + +def is_weakrefable(obj): + try: + weakref.ref(obj) + return True + except TypeError: + return False + + +class _WeakKeyDictionary: + """A variant of weakref.WeakKeyDictionary for unhashable objects. + + This datastructure is used to store futures for broadcasted data objects + such as large numpy arrays or pandas dataframes that are not hashable and + therefore cannot be used as keys of traditional python dicts. + + Furthermore using a dict with id(array) as key is not safe because the + Python is likely to reuse id of recently collected arrays. + """ + + def __init__(self): + self._data = {} + + def __getitem__(self, obj): + ref, val = self._data[id(obj)] + if ref() is not obj: + # In case of a race condition with on_destroy. + raise KeyError(obj) + return val + + def __setitem__(self, obj, value): + key = id(obj) + try: + ref, _ = self._data[key] + if ref() is not obj: + # In case of race condition with on_destroy. + raise KeyError(obj) + except KeyError: + # Insert the new entry in the mapping along with a weakref + # callback to automatically delete the entry from the mapping + # as soon as the object used as key is garbage collected. + def on_destroy(_): + del self._data[key] + ref = weakref.ref(obj, on_destroy) + self._data[key] = ref, value + + def __len__(self): + return len(self._data) + + def clear(self): + self._data.clear() + + +def _funcname(x): + try: + if isinstance(x, list): + x = x[0][0] + except Exception: + pass + return funcname(x) + + +def _make_tasks_summary(tasks): + """Summarize of list of (func, args, kwargs) function calls""" + unique_funcs = {func for func, args, kwargs in tasks} + + if len(unique_funcs) == 1: + mixed = False + else: + mixed = True + return len(tasks), mixed, _funcname(tasks) + + +class Batch: + """dask-compatible wrapper that executes a batch of tasks""" + def __init__(self, tasks): + # collect some metadata from the tasks to ease Batch calls + # introspection when debugging + self._num_tasks, self._mixed, self._funcname = _make_tasks_summary( + tasks + ) + + def __call__(self, tasks=None): + results = [] + with parallel_backend('dask'): + for func, args, kwargs in tasks: + results.append(func(*args, **kwargs)) + return results + + def __repr__(self): + descr = f"batch_of_{self._funcname}_{self._num_tasks}_calls" + if self._mixed: + descr = "mixed_" + descr + return descr + + +def _joblib_probe_task(): + # Noop used by the joblib connector to probe when workers are ready. + pass + + +class DaskDistributedBackend(AutoBatchingMixin, ParallelBackendBase): + MIN_IDEAL_BATCH_DURATION = 0.2 + MAX_IDEAL_BATCH_DURATION = 1.0 + supports_timeout = True + + def __init__(self, scheduler_host=None, scatter=None, + client=None, loop=None, wait_for_workers_timeout=10, + **submit_kwargs): + super().__init__() + + if distributed is None: + msg = ("You are trying to use 'dask' as a joblib parallel backend " + "but dask is not installed. Please install dask " + "to fix this error.") + raise ValueError(msg) + + if client is None: + if scheduler_host: + client = Client(scheduler_host, loop=loop, + set_as_default=False) + else: + try: + client = get_client() + except ValueError as e: + msg = ("To use Joblib with Dask first create a Dask Client" + "\n\n" + " from dask.distributed import Client\n" + " client = Client()\n" + "or\n" + " client = Client('scheduler-address:8786')") + raise ValueError(msg) from e + + self.client = client + + if scatter is not None and not isinstance(scatter, (list, tuple)): + raise TypeError("scatter must be a list/tuple, got " + "`%s`" % type(scatter).__name__) + + if scatter is not None and len(scatter) > 0: + # Keep a reference to the scattered data to keep the ids the same + self._scatter = list(scatter) + scattered = self.client.scatter(scatter, broadcast=True) + self.data_futures = {id(x): f for x, f in zip(scatter, scattered)} + else: + self._scatter = [] + self.data_futures = {} + self.wait_for_workers_timeout = wait_for_workers_timeout + self.submit_kwargs = submit_kwargs + self.waiting_futures = as_completed( + [], + loop=client.loop, + with_results=True, + raise_errors=False + ) + self._results = {} + self._callbacks = {} + + async def _collect(self): + while self._continue: + async for future, result in self.waiting_futures: + cf_future = self._results.pop(future) + callback = self._callbacks.pop(future) + if future.status == "error": + typ, exc, tb = result + cf_future.set_exception(exc) + else: + cf_future.set_result(result) + callback(result) + await asyncio.sleep(0.01) + + def __reduce__(self): + return (DaskDistributedBackend, ()) + + def get_nested_backend(self): + return DaskDistributedBackend(client=self.client), -1 + + def configure(self, n_jobs=1, parallel=None, **backend_args): + self.parallel = parallel + return self.effective_n_jobs(n_jobs) + + def start_call(self): + self._continue = True + self.client.loop.add_callback(self._collect) + self.call_data_futures = _WeakKeyDictionary() + + def stop_call(self): + # The explicit call to clear is required to break a cycling reference + # to the futures. + self._continue = False + # wait for the future collection routine (self._backend._collect) to + # finish in order to limit asyncio warnings due to aborting _collect + # during a following backend termination call + time.sleep(0.01) + self.call_data_futures.clear() + + def effective_n_jobs(self, n_jobs): + effective_n_jobs = sum(self.client.ncores().values()) + if effective_n_jobs != 0 or not self.wait_for_workers_timeout: + return effective_n_jobs + + # If there is no worker, schedule a probe task to wait for the workers + # to come up and be available. If the dask cluster is in adaptive mode + # task might cause the cluster to provision some workers. + try: + self.client.submit(_joblib_probe_task).result( + timeout=self.wait_for_workers_timeout) + except _TimeoutError as e: + error_msg = ( + "DaskDistributedBackend has no worker after {} seconds. " + "Make sure that workers are started and can properly connect " + "to the scheduler and increase the joblib/dask connection " + "timeout with:\n\n" + "parallel_backend('dask', wait_for_workers_timeout={})" + ).format(self.wait_for_workers_timeout, + max(10, 2 * self.wait_for_workers_timeout)) + raise TimeoutError(error_msg) from e + return sum(self.client.ncores().values()) + + async def _to_func_args(self, func): + itemgetters = dict() + + # Futures that are dynamically generated during a single call to + # Parallel.__call__. + call_data_futures = getattr(self, 'call_data_futures', None) + + async def maybe_to_futures(args): + out = [] + for arg in args: + arg_id = id(arg) + if arg_id in itemgetters: + out.append(itemgetters[arg_id]) + continue + + f = self.data_futures.get(arg_id, None) + if f is None and call_data_futures is not None: + try: + f = await call_data_futures[arg] + except KeyError: + pass + if f is None: + if is_weakrefable(arg) and sizeof(arg) > 1e3: + # Automatically scatter large objects to some of + # the workers to avoid duplicated data transfers. + # Rely on automated inter-worker data stealing if + # more workers need to reuse this data + # concurrently. + # set hash=False - nested scatter calls (i.e + # calling client.scatter inside a dask worker) + # using hash=True often raise CancelledError, + # see dask/distributed#3703 + _coro = self.client.scatter( + arg, + asynchronous=True, + hash=False + ) + # Centralize the scattering of identical arguments + # between concurrent apply_async callbacks by + # exposing the running coroutine in + # call_data_futures before it completes. + t = asyncio.Task(_coro) + call_data_futures[arg] = t + + f = await t + + if f is not None: + out.append(f) + else: + out.append(arg) + return out + + tasks = [] + for f, args, kwargs in func.items: + args = list(await maybe_to_futures(args)) + kwargs = dict(zip(kwargs.keys(), + await maybe_to_futures(kwargs.values()))) + tasks.append((f, args, kwargs)) + + return (Batch(tasks), tasks) + + def apply_async(self, func, callback=None): + + cf_future = concurrent.futures.Future() + cf_future.get = cf_future.result # achieve AsyncResult API + + async def f(func, callback): + batch, tasks = await self._to_func_args(func) + key = f'{repr(batch)}-{uuid4().hex}' + + dask_future = self.client.submit( + batch, tasks=tasks, key=key, **self.submit_kwargs + ) + self.waiting_futures.add(dask_future) + self._callbacks[dask_future] = callback + self._results[dask_future] = cf_future + + self.client.loop.add_callback(f, func, callback) + + return cf_future + + def abort_everything(self, ensure_ready=True): + """ Tell the client to cancel any task submitted via this instance + + joblib.Parallel will never access those results + """ + with self.waiting_futures.lock: + self.waiting_futures.futures.clear() + while not self.waiting_futures.queue.empty(): + self.waiting_futures.queue.get() + + @contextlib.contextmanager + def retrieval_context(self): + """Override ParallelBackendBase.retrieval_context to avoid deadlocks. + + This removes thread from the worker's thread pool (using 'secede'). + Seceding avoids deadlock in nested parallelism settings. + """ + # See 'joblib.Parallel.__call__' and 'joblib.Parallel.retrieve' for how + # this is used. + if hasattr(thread_state, 'execution_state'): + # we are in a worker. Secede to avoid deadlock. + secede() + + yield + + if hasattr(thread_state, 'execution_state'): + rejoin() diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py new file mode 100644 index 0000000000000000000000000000000000000000..42645285d929bfa652b06f333647473d4c5edf94 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/_parallel_backends.py @@ -0,0 +1,610 @@ +""" +Backends for embarrassingly parallel code. +""" + +import gc +import os +import warnings +import threading +import functools +import contextlib +from abc import ABCMeta, abstractmethod + +from .my_exceptions import WorkerInterrupt +from ._multiprocessing_helpers import mp + +if mp is not None: + from .pool import MemmappingPool + from multiprocessing.pool import ThreadPool + from .executor import get_memmapping_executor + + # Compat between concurrent.futures and multiprocessing TimeoutError + from multiprocessing import TimeoutError + from concurrent.futures._base import TimeoutError as CfTimeoutError + from .externals.loky import process_executor, cpu_count + + +class ParallelBackendBase(metaclass=ABCMeta): + """Helper abc which defines all methods a ParallelBackend must implement""" + + supports_timeout = False + supports_inner_max_num_threads = False + nesting_level = None + + def __init__(self, nesting_level=None, inner_max_num_threads=None, + **kwargs): + super().__init__(**kwargs) + self.nesting_level = nesting_level + self.inner_max_num_threads = inner_max_num_threads + + MAX_NUM_THREADS_VARS = [ + 'OMP_NUM_THREADS', 'OPENBLAS_NUM_THREADS', 'MKL_NUM_THREADS', + 'BLIS_NUM_THREADS', 'VECLIB_MAXIMUM_THREADS', 'NUMBA_NUM_THREADS', + 'NUMEXPR_NUM_THREADS', + ] + + TBB_ENABLE_IPC_VAR = "ENABLE_IPC" + + @abstractmethod + def effective_n_jobs(self, n_jobs): + """Determine the number of jobs that can actually run in parallel + + n_jobs is the number of workers requested by the callers. Passing + n_jobs=-1 means requesting all available workers for instance matching + the number of CPU cores on the worker host(s). + + This method should return a guesstimate of the number of workers that + can actually perform work concurrently. The primary use case is to make + it possible for the caller to know in how many chunks to slice the + work. + + In general working on larger data chunks is more efficient (less + scheduling overhead and better use of CPU cache prefetching heuristics) + as long as all the workers have enough work to do. + """ + + @abstractmethod + def apply_async(self, func, callback=None): + """Schedule a func to be run""" + + def configure(self, n_jobs=1, parallel=None, prefer=None, require=None, + **backend_args): + """Reconfigure the backend and return the number of workers. + + This makes it possible to reuse an existing backend instance for + successive independent calls to Parallel with different parameters. + """ + self.parallel = parallel + return self.effective_n_jobs(n_jobs) + + def start_call(self): + """Call-back method called at the beginning of a Parallel call""" + + def stop_call(self): + """Call-back method called at the end of a Parallel call""" + + def terminate(self): + """Shutdown the workers and free the shared memory.""" + + def compute_batch_size(self): + """Determine the optimal batch size""" + return 1 + + def batch_completed(self, batch_size, duration): + """Callback indicate how long it took to run a batch""" + + def get_exceptions(self): + """List of exception types to be captured.""" + return [] + + def abort_everything(self, ensure_ready=True): + """Abort any running tasks + + This is called when an exception has been raised when executing a tasks + and all the remaining tasks will be ignored and can therefore be + aborted to spare computation resources. + + If ensure_ready is True, the backend should be left in an operating + state as future tasks might be re-submitted via that same backend + instance. + + If ensure_ready is False, the implementer of this method can decide + to leave the backend in a closed / terminated state as no new task + are expected to be submitted to this backend. + + Setting ensure_ready to False is an optimization that can be leveraged + when aborting tasks via killing processes from a local process pool + managed by the backend it-self: if we expect no new tasks, there is no + point in re-creating new workers. + """ + # Does nothing by default: to be overridden in subclasses when + # canceling tasks is possible. + pass + + def get_nested_backend(self): + """Backend instance to be used by nested Parallel calls. + + By default a thread-based backend is used for the first level of + nesting. Beyond, switch to sequential backend to avoid spawning too + many threads on the host. + """ + nesting_level = getattr(self, 'nesting_level', 0) + 1 + if nesting_level > 1: + return SequentialBackend(nesting_level=nesting_level), None + else: + return ThreadingBackend(nesting_level=nesting_level), None + + @contextlib.contextmanager + def retrieval_context(self): + """Context manager to manage an execution context. + + Calls to Parallel.retrieve will be made inside this context. + + By default, this does nothing. It may be useful for subclasses to + handle nested parallelism. In particular, it may be required to avoid + deadlocks if a backend manages a fixed number of workers, when those + workers may be asked to do nested Parallel calls. Without + 'retrieval_context' this could lead to deadlock, as all the workers + managed by the backend may be "busy" waiting for the nested parallel + calls to finish, but the backend has no free workers to execute those + tasks. + """ + yield + + def _prepare_worker_env(self, n_jobs): + """Return environment variables limiting threadpools in external libs. + + This function return a dict containing environment variables to pass + when creating a pool of process. These environment variables limit the + number of threads to `n_threads` for OpenMP, MKL, Accelerated and + OpenBLAS libraries in the child processes. + """ + explicit_n_threads = self.inner_max_num_threads + default_n_threads = str(max(cpu_count() // n_jobs, 1)) + + # Set the inner environment variables to self.inner_max_num_threads if + # it is given. Else, default to cpu_count // n_jobs unless the variable + # is already present in the parent process environment. + env = {} + for var in self.MAX_NUM_THREADS_VARS: + if explicit_n_threads is None: + var_value = os.environ.get(var, None) + if var_value is None: + var_value = default_n_threads + else: + var_value = str(explicit_n_threads) + + env[var] = var_value + + if self.TBB_ENABLE_IPC_VAR not in os.environ: + # To avoid over-subscription when using TBB, let the TBB schedulers + # use Inter Process Communication to coordinate: + env[self.TBB_ENABLE_IPC_VAR] = "1" + return env + + @staticmethod + def in_main_thread(): + return isinstance(threading.current_thread(), threading._MainThread) + + +class SequentialBackend(ParallelBackendBase): + """A ParallelBackend which will execute all batches sequentially. + + Does not use/create any threading objects, and hence has minimal + overhead. Used when n_jobs == 1. + """ + + uses_threads = True + supports_sharedmem = True + + def effective_n_jobs(self, n_jobs): + """Determine the number of jobs which are going to run in parallel""" + if n_jobs == 0: + raise ValueError('n_jobs == 0 in Parallel has no meaning') + return 1 + + def apply_async(self, func, callback=None): + """Schedule a func to be run""" + result = ImmediateResult(func) + if callback: + callback(result) + return result + + def get_nested_backend(self): + # import is not top level to avoid cyclic import errors. + from .parallel import get_active_backend + + # SequentialBackend should neither change the nesting level, the + # default backend or the number of jobs. Just return the current one. + return get_active_backend() + + +class PoolManagerMixin(object): + """A helper class for managing pool of workers.""" + + _pool = None + + def effective_n_jobs(self, n_jobs): + """Determine the number of jobs which are going to run in parallel""" + if n_jobs == 0: + raise ValueError('n_jobs == 0 in Parallel has no meaning') + elif mp is None or n_jobs is None: + # multiprocessing is not available or disabled, fallback + # to sequential mode + return 1 + elif n_jobs < 0: + n_jobs = max(cpu_count() + 1 + n_jobs, 1) + return n_jobs + + def terminate(self): + """Shutdown the process or thread pool""" + if self._pool is not None: + self._pool.close() + self._pool.terminate() # terminate does a join() + self._pool = None + + def _get_pool(self): + """Used by apply_async to make it possible to implement lazy init""" + return self._pool + + def apply_async(self, func, callback=None): + """Schedule a func to be run""" + return self._get_pool().apply_async( + SafeFunction(func), callback=callback) + + def abort_everything(self, ensure_ready=True): + """Shutdown the pool and restart a new one with the same parameters""" + self.terminate() + if ensure_ready: + self.configure(n_jobs=self.parallel.n_jobs, parallel=self.parallel, + **self.parallel._backend_args) + + +class AutoBatchingMixin(object): + """A helper class for automagically batching jobs.""" + + # In seconds, should be big enough to hide multiprocessing dispatching + # overhead. + # This settings was found by running benchmarks/bench_auto_batching.py + # with various parameters on various platforms. + MIN_IDEAL_BATCH_DURATION = .2 + + # Should not be too high to avoid stragglers: long jobs running alone + # on a single worker while other workers have no work to process any more. + MAX_IDEAL_BATCH_DURATION = 2 + + # Batching counters default values + _DEFAULT_EFFECTIVE_BATCH_SIZE = 1 + _DEFAULT_SMOOTHED_BATCH_DURATION = 0.0 + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._effective_batch_size = self._DEFAULT_EFFECTIVE_BATCH_SIZE + self._smoothed_batch_duration = self._DEFAULT_SMOOTHED_BATCH_DURATION + + def compute_batch_size(self): + """Determine the optimal batch size""" + old_batch_size = self._effective_batch_size + batch_duration = self._smoothed_batch_duration + if (batch_duration > 0 and + batch_duration < self.MIN_IDEAL_BATCH_DURATION): + # The current batch size is too small: the duration of the + # processing of a batch of task is not large enough to hide + # the scheduling overhead. + ideal_batch_size = int(old_batch_size * + self.MIN_IDEAL_BATCH_DURATION / + batch_duration) + # Multiply by two to limit oscilations between min and max. + ideal_batch_size *= 2 + + # dont increase the batch size too fast to limit huge batch sizes + # potentially leading to starving worker + batch_size = min(2 * old_batch_size, ideal_batch_size) + + batch_size = max(batch_size, 1) + + self._effective_batch_size = batch_size + if self.parallel.verbose >= 10: + self.parallel._print( + "Batch computation too fast (%.4fs.) " + "Setting batch_size=%d.", (batch_duration, batch_size)) + elif (batch_duration > self.MAX_IDEAL_BATCH_DURATION and + old_batch_size >= 2): + # The current batch size is too big. If we schedule overly long + # running batches some CPUs might wait with nothing left to do + # while a couple of CPUs a left processing a few long running + # batches. Better reduce the batch size a bit to limit the + # likelihood of scheduling such stragglers. + + # decrease the batch size quickly to limit potential starving + ideal_batch_size = int( + old_batch_size * self.MIN_IDEAL_BATCH_DURATION / batch_duration + ) + # Multiply by two to limit oscilations between min and max. + batch_size = max(2 * ideal_batch_size, 1) + self._effective_batch_size = batch_size + if self.parallel.verbose >= 10: + self.parallel._print( + "Batch computation too slow (%.4fs.) " + "Setting batch_size=%d.", (batch_duration, batch_size)) + else: + # No batch size adjustment + batch_size = old_batch_size + + if batch_size != old_batch_size: + # Reset estimation of the smoothed mean batch duration: this + # estimate is updated in the multiprocessing apply_async + # CallBack as long as the batch_size is constant. Therefore + # we need to reset the estimate whenever we re-tune the batch + # size. + self._smoothed_batch_duration = \ + self._DEFAULT_SMOOTHED_BATCH_DURATION + + return batch_size + + def batch_completed(self, batch_size, duration): + """Callback indicate how long it took to run a batch""" + if batch_size == self._effective_batch_size: + # Update the smoothed streaming estimate of the duration of a batch + # from dispatch to completion + old_duration = self._smoothed_batch_duration + if old_duration == self._DEFAULT_SMOOTHED_BATCH_DURATION: + # First record of duration for this batch size after the last + # reset. + new_duration = duration + else: + # Update the exponentially weighted average of the duration of + # batch for the current effective size. + new_duration = 0.8 * old_duration + 0.2 * duration + self._smoothed_batch_duration = new_duration + + def reset_batch_stats(self): + """Reset batch statistics to default values. + + This avoids interferences with future jobs. + """ + self._effective_batch_size = self._DEFAULT_EFFECTIVE_BATCH_SIZE + self._smoothed_batch_duration = self._DEFAULT_SMOOTHED_BATCH_DURATION + + +class ThreadingBackend(PoolManagerMixin, ParallelBackendBase): + """A ParallelBackend which will use a thread pool to execute batches in. + + This is a low-overhead backend but it suffers from the Python Global + Interpreter Lock if the called function relies a lot on Python objects. + Mostly useful when the execution bottleneck is a compiled extension that + explicitly releases the GIL (for instance a Cython loop wrapped in a "with + nogil" block or an expensive call to a library such as NumPy). + + The actual thread pool is lazily initialized: the actual thread pool + construction is delayed to the first call to apply_async. + + ThreadingBackend is used as the default backend for nested calls. + """ + + supports_timeout = True + uses_threads = True + supports_sharedmem = True + + def configure(self, n_jobs=1, parallel=None, **backend_args): + """Build a process or thread pool and return the number of workers""" + n_jobs = self.effective_n_jobs(n_jobs) + if n_jobs == 1: + # Avoid unnecessary overhead and use sequential backend instead. + raise FallbackToBackend( + SequentialBackend(nesting_level=self.nesting_level)) + self.parallel = parallel + self._n_jobs = n_jobs + return n_jobs + + def _get_pool(self): + """Lazily initialize the thread pool + + The actual pool of worker threads is only initialized at the first + call to apply_async. + """ + if self._pool is None: + self._pool = ThreadPool(self._n_jobs) + return self._pool + + +class MultiprocessingBackend(PoolManagerMixin, AutoBatchingMixin, + ParallelBackendBase): + """A ParallelBackend which will use a multiprocessing.Pool. + + Will introduce some communication and memory overhead when exchanging + input and output data with the with the worker Python processes. + However, does not suffer from the Python Global Interpreter Lock. + """ + + supports_timeout = True + + def effective_n_jobs(self, n_jobs): + """Determine the number of jobs which are going to run in parallel. + + This also checks if we are attempting to create a nested parallel + loop. + """ + if mp is None: + return 1 + + if mp.current_process().daemon: + # Daemonic processes cannot have children + if n_jobs != 1: + warnings.warn( + 'Multiprocessing-backed parallel loops cannot be nested,' + ' setting n_jobs=1', + stacklevel=3) + return 1 + + if process_executor._CURRENT_DEPTH > 0: + # Mixing loky and multiprocessing in nested loop is not supported + if n_jobs != 1: + warnings.warn( + 'Multiprocessing-backed parallel loops cannot be nested,' + ' below loky, setting n_jobs=1', + stacklevel=3) + return 1 + + elif not (self.in_main_thread() or self.nesting_level == 0): + # Prevent posix fork inside in non-main posix threads + if n_jobs != 1: + warnings.warn( + 'Multiprocessing-backed parallel loops cannot be nested' + ' below threads, setting n_jobs=1', + stacklevel=3) + return 1 + + return super(MultiprocessingBackend, self).effective_n_jobs(n_jobs) + + def configure(self, n_jobs=1, parallel=None, prefer=None, require=None, + **memmappingpool_args): + """Build a process or thread pool and return the number of workers""" + n_jobs = self.effective_n_jobs(n_jobs) + if n_jobs == 1: + raise FallbackToBackend( + SequentialBackend(nesting_level=self.nesting_level)) + + # Make sure to free as much memory as possible before forking + gc.collect() + self._pool = MemmappingPool(n_jobs, **memmappingpool_args) + self.parallel = parallel + return n_jobs + + def terminate(self): + """Shutdown the process or thread pool""" + super(MultiprocessingBackend, self).terminate() + self.reset_batch_stats() + + +class LokyBackend(AutoBatchingMixin, ParallelBackendBase): + """Managing pool of workers with loky instead of multiprocessing.""" + + supports_timeout = True + supports_inner_max_num_threads = True + + def configure(self, n_jobs=1, parallel=None, prefer=None, require=None, + idle_worker_timeout=300, **memmappingexecutor_args): + """Build a process executor and return the number of workers""" + n_jobs = self.effective_n_jobs(n_jobs) + if n_jobs == 1: + raise FallbackToBackend( + SequentialBackend(nesting_level=self.nesting_level)) + + self._workers = get_memmapping_executor( + n_jobs, timeout=idle_worker_timeout, + env=self._prepare_worker_env(n_jobs=n_jobs), + context_id=parallel._id, **memmappingexecutor_args) + self.parallel = parallel + return n_jobs + + def effective_n_jobs(self, n_jobs): + """Determine the number of jobs which are going to run in parallel""" + if n_jobs == 0: + raise ValueError('n_jobs == 0 in Parallel has no meaning') + elif mp is None or n_jobs is None: + # multiprocessing is not available or disabled, fallback + # to sequential mode + return 1 + elif mp.current_process().daemon: + # Daemonic processes cannot have children + if n_jobs != 1: + warnings.warn( + 'Loky-backed parallel loops cannot be called in a' + ' multiprocessing, setting n_jobs=1', + stacklevel=3) + return 1 + elif not (self.in_main_thread() or self.nesting_level == 0): + # Prevent posix fork inside in non-main posix threads + if n_jobs != 1: + warnings.warn( + 'Loky-backed parallel loops cannot be nested below ' + 'threads, setting n_jobs=1', + stacklevel=3) + return 1 + elif n_jobs < 0: + n_jobs = max(cpu_count() + 1 + n_jobs, 1) + return n_jobs + + def apply_async(self, func, callback=None): + """Schedule a func to be run""" + future = self._workers.submit(SafeFunction(func)) + future.get = functools.partial(self.wrap_future_result, future) + if callback is not None: + future.add_done_callback(callback) + return future + + @staticmethod + def wrap_future_result(future, timeout=None): + """Wrapper for Future.result to implement the same behaviour as + AsyncResults.get from multiprocessing.""" + try: + return future.result(timeout=timeout) + except CfTimeoutError as e: + raise TimeoutError from e + + def terminate(self): + if self._workers is not None: + # Don't terminate the workers as we want to reuse them in later + # calls, but cleanup the temporary resources that the Parallel call + # created. This 'hack' requires a private, low-level operation. + self._workers._temp_folder_manager._unlink_temporary_resources( + context_id=self.parallel._id + ) + self._workers = None + + self.reset_batch_stats() + + def abort_everything(self, ensure_ready=True): + """Shutdown the workers and restart a new one with the same parameters + """ + self._workers.terminate(kill_workers=True) + self._workers = None + + if ensure_ready: + self.configure(n_jobs=self.parallel.n_jobs, parallel=self.parallel) + + +class ImmediateResult(object): + def __init__(self, batch): + # Don't delay the application, to avoid keeping the input + # arguments in memory + self.results = batch() + + def get(self): + return self.results + + +class SafeFunction(object): + """Wrapper that handles the serialization of exception tracebacks. + + TODO python2_drop: check whether SafeFunction is still needed since we + dropped support for Python 2. If not needed anymore it should be + deprecated. + + If an exception is triggered when calling the inner function, a copy of + the full traceback is captured to make it possible to serialize + it so that it can be rendered in a different Python process. + + """ + def __init__(self, func): + self.func = func + + def __call__(self, *args, **kwargs): + try: + return self.func(*args, **kwargs) + except KeyboardInterrupt as e: + # We capture the KeyboardInterrupt and reraise it as + # something different, as multiprocessing does not + # interrupt processing for a KeyboardInterrupt + raise WorkerInterrupt() from e + except BaseException: + # Rely on Python 3 built-in Remote Traceback reporting + raise + + +class FallbackToBackend(Exception): + """Raised when configuration should fallback to another backend""" + + def __init__(self, backend): + self.backend = backend diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/_store_backends.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/_store_backends.py new file mode 100644 index 0000000000000000000000000000000000000000..d4389ed8666051da1ac726f7952ce0b23e165f36 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/_store_backends.py @@ -0,0 +1,414 @@ +"""Storage providers backends for Memory caching.""" + +import re +import os +import os.path +import datetime +import json +import shutil +import warnings +import collections +import operator +import threading +from abc import ABCMeta, abstractmethod + +from .backports import concurrency_safe_rename +from .disk import mkdirp, memstr_to_bytes, rm_subdirs +from . import numpy_pickle + +CacheItemInfo = collections.namedtuple('CacheItemInfo', + 'path size last_access') + + +def concurrency_safe_write(object_to_write, filename, write_func): + """Writes an object into a unique file in a concurrency-safe way.""" + thread_id = id(threading.current_thread()) + temporary_filename = '{}.thread-{}-pid-{}'.format( + filename, thread_id, os.getpid()) + write_func(object_to_write, temporary_filename) + + return temporary_filename + + +class StoreBackendBase(metaclass=ABCMeta): + """Helper Abstract Base Class which defines all methods that + a StorageBackend must implement.""" + + location = None + + @abstractmethod + def _open_item(self, f, mode): + """Opens an item on the store and return a file-like object. + + This method is private and only used by the StoreBackendMixin object. + + Parameters + ---------- + f: a file-like object + The file-like object where an item is stored and retrieved + mode: string, optional + the mode in which the file-like object is opened allowed valued are + 'rb', 'wb' + + Returns + ------- + a file-like object + """ + + @abstractmethod + def _item_exists(self, location): + """Checks if an item location exists in the store. + + This method is private and only used by the StoreBackendMixin object. + + Parameters + ---------- + location: string + The location of an item. On a filesystem, this corresponds to the + absolute path, including the filename, of a file. + + Returns + ------- + True if the item exists, False otherwise + """ + + @abstractmethod + def _move_item(self, src, dst): + """Moves an item from src to dst in the store. + + This method is private and only used by the StoreBackendMixin object. + + Parameters + ---------- + src: string + The source location of an item + dst: string + The destination location of an item + """ + + @abstractmethod + def create_location(self, location): + """Creates a location on the store. + + Parameters + ---------- + location: string + The location in the store. On a filesystem, this corresponds to a + directory. + """ + + @abstractmethod + def clear_location(self, location): + """Clears a location on the store. + + Parameters + ---------- + location: string + The location in the store. On a filesystem, this corresponds to a + directory or a filename absolute path + """ + + @abstractmethod + def get_items(self): + """Returns the whole list of items available in the store. + + Returns + ------- + The list of items identified by their ids (e.g filename in a + filesystem). + """ + + @abstractmethod + def configure(self, location, verbose=0, backend_options=dict()): + """Configures the store. + + Parameters + ---------- + location: string + The base location used by the store. On a filesystem, this + corresponds to a directory. + verbose: int + The level of verbosity of the store + backend_options: dict + Contains a dictionnary of named paremeters used to configure the + store backend. + """ + + +class StoreBackendMixin(object): + """Class providing all logic for managing the store in a generic way. + + The StoreBackend subclass has to implement 3 methods: create_location, + clear_location and configure. The StoreBackend also has to provide + a private _open_item, _item_exists and _move_item methods. The _open_item + method has to have the same signature as the builtin open and return a + file-like object. + """ + + def load_item(self, path, verbose=1, msg=None): + """Load an item from the store given its path as a list of + strings.""" + full_path = os.path.join(self.location, *path) + + if verbose > 1: + if verbose < 10: + print('{0}...'.format(msg)) + else: + print('{0} from {1}'.format(msg, full_path)) + + mmap_mode = (None if not hasattr(self, 'mmap_mode') + else self.mmap_mode) + + filename = os.path.join(full_path, 'output.pkl') + if not self._item_exists(filename): + raise KeyError("Non-existing item (may have been " + "cleared).\nFile %s does not exist" % filename) + + # file-like object cannot be used when mmap_mode is set + if mmap_mode is None: + with self._open_item(filename, "rb") as f: + item = numpy_pickle.load(f) + else: + item = numpy_pickle.load(filename, mmap_mode=mmap_mode) + return item + + def dump_item(self, path, item, verbose=1): + """Dump an item in the store at the path given as a list of + strings.""" + try: + item_path = os.path.join(self.location, *path) + if not self._item_exists(item_path): + self.create_location(item_path) + filename = os.path.join(item_path, 'output.pkl') + if verbose > 10: + print('Persisting in %s' % item_path) + + def write_func(to_write, dest_filename): + with self._open_item(dest_filename, "wb") as f: + numpy_pickle.dump(to_write, f, + compress=self.compress) + + self._concurrency_safe_write(item, filename, write_func) + except: # noqa: E722 + " Race condition in the creation of the directory " + + def clear_item(self, path): + """Clear the item at the path, given as a list of strings.""" + item_path = os.path.join(self.location, *path) + if self._item_exists(item_path): + self.clear_location(item_path) + + def contains_item(self, path): + """Check if there is an item at the path, given as a list of + strings""" + item_path = os.path.join(self.location, *path) + filename = os.path.join(item_path, 'output.pkl') + + return self._item_exists(filename) + + def get_item_info(self, path): + """Return information about item.""" + return {'location': os.path.join(self.location, + *path)} + + def get_metadata(self, path): + """Return actual metadata of an item.""" + try: + item_path = os.path.join(self.location, *path) + filename = os.path.join(item_path, 'metadata.json') + with self._open_item(filename, 'rb') as f: + return json.loads(f.read().decode('utf-8')) + except: # noqa: E722 + return {} + + def store_metadata(self, path, metadata): + """Store metadata of a computation.""" + try: + item_path = os.path.join(self.location, *path) + self.create_location(item_path) + filename = os.path.join(item_path, 'metadata.json') + + def write_func(to_write, dest_filename): + with self._open_item(dest_filename, "wb") as f: + f.write(json.dumps(to_write).encode('utf-8')) + + self._concurrency_safe_write(metadata, filename, write_func) + except: # noqa: E722 + pass + + def contains_path(self, path): + """Check cached function is available in store.""" + func_path = os.path.join(self.location, *path) + return self.object_exists(func_path) + + def clear_path(self, path): + """Clear all items with a common path in the store.""" + func_path = os.path.join(self.location, *path) + if self._item_exists(func_path): + self.clear_location(func_path) + + def store_cached_func_code(self, path, func_code=None): + """Store the code of the cached function.""" + func_path = os.path.join(self.location, *path) + if not self._item_exists(func_path): + self.create_location(func_path) + + if func_code is not None: + filename = os.path.join(func_path, "func_code.py") + with self._open_item(filename, 'wb') as f: + f.write(func_code.encode('utf-8')) + + def get_cached_func_code(self, path): + """Store the code of the cached function.""" + path += ['func_code.py', ] + filename = os.path.join(self.location, *path) + try: + with self._open_item(filename, 'rb') as f: + return f.read().decode('utf-8') + except: # noqa: E722 + raise + + def get_cached_func_info(self, path): + """Return information related to the cached function if it exists.""" + return {'location': os.path.join(self.location, *path)} + + def clear(self): + """Clear the whole store content.""" + self.clear_location(self.location) + + def reduce_store_size(self, bytes_limit): + """Reduce store size to keep it under the given bytes limit.""" + items_to_delete = self._get_items_to_delete(bytes_limit) + + for item in items_to_delete: + if self.verbose > 10: + print('Deleting item {0}'.format(item)) + try: + self.clear_location(item.path) + except OSError: + # Even with ignore_errors=True shutil.rmtree can raise OSError + # with: + # [Errno 116] Stale file handle if another process has deleted + # the folder already. + pass + + def _get_items_to_delete(self, bytes_limit): + """Get items to delete to keep the store under a size limit.""" + if isinstance(bytes_limit, str): + bytes_limit = memstr_to_bytes(bytes_limit) + + items = self.get_items() + size = sum(item.size for item in items) + + to_delete_size = size - bytes_limit + if to_delete_size < 0: + return [] + + # We want to delete first the cache items that were accessed a + # long time ago + items.sort(key=operator.attrgetter('last_access')) + + items_to_delete = [] + size_so_far = 0 + + for item in items: + if size_so_far > to_delete_size: + break + + items_to_delete.append(item) + size_so_far += item.size + + return items_to_delete + + def _concurrency_safe_write(self, to_write, filename, write_func): + """Writes an object into a file in a concurrency-safe way.""" + temporary_filename = concurrency_safe_write(to_write, + filename, write_func) + self._move_item(temporary_filename, filename) + + def __repr__(self): + """Printable representation of the store location.""" + return '{class_name}(location="{location}")'.format( + class_name=self.__class__.__name__, location=self.location) + + +class FileSystemStoreBackend(StoreBackendBase, StoreBackendMixin): + """A StoreBackend used with local or network file systems.""" + + _open_item = staticmethod(open) + _item_exists = staticmethod(os.path.exists) + _move_item = staticmethod(concurrency_safe_rename) + + def clear_location(self, location): + """Delete location on store.""" + if (location == self.location): + rm_subdirs(location) + else: + shutil.rmtree(location, ignore_errors=True) + + def create_location(self, location): + """Create object location on store""" + mkdirp(location) + + def get_items(self): + """Returns the whole list of items available in the store.""" + items = [] + + for dirpath, _, filenames in os.walk(self.location): + is_cache_hash_dir = re.match('[a-f0-9]{32}', + os.path.basename(dirpath)) + + if is_cache_hash_dir: + output_filename = os.path.join(dirpath, 'output.pkl') + try: + last_access = os.path.getatime(output_filename) + except OSError: + try: + last_access = os.path.getatime(dirpath) + except OSError: + # The directory has already been deleted + continue + + last_access = datetime.datetime.fromtimestamp(last_access) + try: + full_filenames = [os.path.join(dirpath, fn) + for fn in filenames] + dirsize = sum(os.path.getsize(fn) + for fn in full_filenames) + except OSError: + # Either output_filename or one of the files in + # dirpath does not exist any more. We assume this + # directory is being cleaned by another process already + continue + + items.append(CacheItemInfo(dirpath, dirsize, + last_access)) + + return items + + def configure(self, location, verbose=1, backend_options=None): + """Configure the store backend. + + For this backend, valid store options are 'compress' and 'mmap_mode' + """ + if backend_options is None: + backend_options = {} + + # setup location directory + self.location = location + if not os.path.exists(self.location): + mkdirp(self.location) + + # item can be stored compressed for faster I/O + self.compress = backend_options.get('compress', False) + + # FileSystemStoreBackend can be used with mmap_mode options under + # certain conditions. + mmap_mode = backend_options.get('mmap_mode') + if self.compress and mmap_mode is not None: + warnings.warn('Compressed items cannot be memmapped in a ' + 'filesystem store. Option will be ignored.', + stacklevel=2) + + self.mmap_mode = mmap_mode + self.verbose = verbose diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/backports.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/backports.py new file mode 100644 index 0000000000000000000000000000000000000000..cb2f7233d56e44e6c6d2d041349d0b5511f90f2e --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/backports.py @@ -0,0 +1,78 @@ +""" +Backports of fixes for joblib dependencies +""" +import os +import time + +from distutils.version import LooseVersion +from os.path import basename +from multiprocessing import util + + +try: + import numpy as np + + def make_memmap(filename, dtype='uint8', mode='r+', offset=0, + shape=None, order='C', unlink_on_gc_collect=False): + """Custom memmap constructor compatible with numpy.memmap. + + This function: + - is a backport the numpy memmap offset fix (See + https://github.com/numpy/numpy/pull/8443 for more details. + The numpy fix is available starting numpy 1.13) + - adds ``unlink_on_gc_collect``, which specifies explicitly whether + the process re-constructing the memmap owns a reference to the + underlying file. If set to True, it adds a finalizer to the + newly-created memmap that sends a maybe_unlink request for the + memmaped file to resource_tracker. + """ + util.debug( + "[MEMMAP READ] creating a memmap (shape {}, filename {}, " + "pid {})".format(shape, basename(filename), os.getpid()) + ) + + mm = np.memmap(filename, dtype=dtype, mode=mode, offset=offset, + shape=shape, order=order) + if LooseVersion(np.__version__) < '1.13': + mm.offset = offset + if unlink_on_gc_collect: + from ._memmapping_reducer import add_maybe_unlink_finalizer + add_maybe_unlink_finalizer(mm) + return mm +except ImportError: + def make_memmap(filename, dtype='uint8', mode='r+', offset=0, + shape=None, order='C', unlink_on_gc_collect=False): + raise NotImplementedError( + "'joblib.backports.make_memmap' should not be used " + 'if numpy is not installed.') + + +if os.name == 'nt': + # https://github.com/joblib/joblib/issues/540 + access_denied_errors = (5, 13) + from os import replace + + def concurrency_safe_rename(src, dst): + """Renames ``src`` into ``dst`` overwriting ``dst`` if it exists. + + On Windows os.replace can yield permission errors if executed by two + different processes. + """ + max_sleep_time = 1 + total_sleep_time = 0 + sleep_time = 0.001 + while total_sleep_time < max_sleep_time: + try: + replace(src, dst) + break + except Exception as exc: + if getattr(exc, 'winerror', None) in access_denied_errors: + time.sleep(sleep_time) + total_sleep_time += sleep_time + sleep_time *= 2 + else: + raise + else: + raise +else: + from os import replace as concurrency_safe_rename # noqa diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/disk.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/disk.py new file mode 100644 index 0000000000000000000000000000000000000000..3b2735d0448f9760cf3a32ad65144321c6e1d389 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/disk.py @@ -0,0 +1,136 @@ +""" +Disk management utilities. +""" + +# Authors: Gael Varoquaux +# Lars Buitinck +# Copyright (c) 2010 Gael Varoquaux +# License: BSD Style, 3 clauses. + + +import os +import sys +import time +import errno +import shutil + +from multiprocessing import util + + +try: + WindowsError +except NameError: + WindowsError = OSError + + +def disk_used(path): + """ Return the disk usage in a directory.""" + size = 0 + for file in os.listdir(path) + ['.']: + stat = os.stat(os.path.join(path, file)) + if hasattr(stat, 'st_blocks'): + size += stat.st_blocks * 512 + else: + # on some platform st_blocks is not available (e.g., Windows) + # approximate by rounding to next multiple of 512 + size += (stat.st_size // 512 + 1) * 512 + # We need to convert to int to avoid having longs on some systems (we + # don't want longs to avoid problems we SQLite) + return int(size / 1024.) + + +def memstr_to_bytes(text): + """ Convert a memory text to its value in bytes. + """ + kilo = 1024 + units = dict(K=kilo, M=kilo ** 2, G=kilo ** 3) + try: + size = int(units[text[-1]] * float(text[:-1])) + except (KeyError, ValueError) as e: + raise ValueError( + "Invalid literal for size give: %s (type %s) should be " + "alike '10G', '500M', '50K'." % (text, type(text))) from e + return size + + +def mkdirp(d): + """Ensure directory d exists (like mkdir -p on Unix) + No guarantee that the directory is writable. + """ + try: + os.makedirs(d) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +# if a rmtree operation fails in rm_subdirs, wait for this much time (in secs), +# then retry up to RM_SUBDIRS_N_RETRY times. If it still fails, raise the +# exception. this mecanism ensures that the sub-process gc have the time to +# collect and close the memmaps before we fail. +RM_SUBDIRS_RETRY_TIME = 0.1 +RM_SUBDIRS_N_RETRY = 5 + + +def rm_subdirs(path, onerror=None): + """Remove all subdirectories in this path. + + The directory indicated by `path` is left in place, and its subdirectories + are erased. + + If onerror is set, it is called to handle the error with arguments (func, + path, exc_info) where func is os.listdir, os.remove, or os.rmdir; + path is the argument to that function that caused it to fail; and + exc_info is a tuple returned by sys.exc_info(). If onerror is None, + an exception is raised. + """ + + # NOTE this code is adapted from the one in shutil.rmtree, and is + # just as fast + + names = [] + try: + names = os.listdir(path) + except os.error: + if onerror is not None: + onerror(os.listdir, path, sys.exc_info()) + else: + raise + + for name in names: + fullname = os.path.join(path, name) + delete_folder(fullname, onerror=onerror) + + +def delete_folder(folder_path, onerror=None, allow_non_empty=True): + """Utility function to cleanup a temporary folder if it still exists.""" + if os.path.isdir(folder_path): + if onerror is not None: + shutil.rmtree(folder_path, False, onerror) + else: + # allow the rmtree to fail once, wait and re-try. + # if the error is raised again, fail + err_count = 0 + while True: + files = os.listdir(folder_path) + try: + if len(files) == 0 or allow_non_empty: + shutil.rmtree( + folder_path, ignore_errors=False, onerror=None + ) + util.debug( + "Sucessfully deleted {}".format(folder_path)) + break + else: + raise OSError( + "Expected empty folder {} but got {} " + "files.".format(folder_path, len(files)) + ) + except (OSError, WindowsError): + err_count += 1 + if err_count > RM_SUBDIRS_N_RETRY: + # the folder cannot be deleted right now. It maybe + # because some temporary files have not been deleted + # yet. + raise + time.sleep(RM_SUBDIRS_RETRY_TIME) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/func_inspect.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/func_inspect.py new file mode 100644 index 0000000000000000000000000000000000000000..d334a2b9dcf52a27e2cc136a01cb04ae5f658719 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/func_inspect.py @@ -0,0 +1,365 @@ +""" +My own variation on function-specific inspect-like features. +""" + +# Author: Gael Varoquaux +# Copyright (c) 2009 Gael Varoquaux +# License: BSD Style, 3 clauses. + +import inspect +import warnings +import re +import os +import collections + +from itertools import islice +from tokenize import open as open_py_source + +from .logger import pformat + +full_argspec_fields = ('args varargs varkw defaults kwonlyargs ' + 'kwonlydefaults annotations') +full_argspec_type = collections.namedtuple('FullArgSpec', full_argspec_fields) + + +def get_func_code(func): + """ Attempts to retrieve a reliable function code hash. + + The reason we don't use inspect.getsource is that it caches the + source, whereas we want this to be modified on the fly when the + function is modified. + + Returns + ------- + func_code: string + The function code + source_file: string + The path to the file in which the function is defined. + first_line: int + The first line of the code in the source file. + + Notes + ------ + This function does a bit more magic than inspect, and is thus + more robust. + """ + source_file = None + try: + code = func.__code__ + source_file = code.co_filename + if not os.path.exists(source_file): + # Use inspect for lambda functions and functions defined in an + # interactive shell, or in doctests + source_code = ''.join(inspect.getsourcelines(func)[0]) + line_no = 1 + if source_file.startswith('', source_file).groups() + line_no = int(line_no) + source_file = '' % source_file + return source_code, source_file, line_no + # Try to retrieve the source code. + with open_py_source(source_file) as source_file_obj: + first_line = code.co_firstlineno + # All the lines after the function definition: + source_lines = list(islice(source_file_obj, first_line - 1, None)) + return ''.join(inspect.getblock(source_lines)), source_file, first_line + except: + # If the source code fails, we use the hash. This is fragile and + # might change from one session to another. + if hasattr(func, '__code__'): + # Python 3.X + return str(func.__code__.__hash__()), source_file, -1 + else: + # Weird objects like numpy ufunc don't have __code__ + # This is fragile, as quite often the id of the object is + # in the repr, so it might not persist across sessions, + # however it will work for ufuncs. + return repr(func), source_file, -1 + + +def _clean_win_chars(string): + """Windows cannot encode some characters in filename.""" + import urllib + if hasattr(urllib, 'quote'): + quote = urllib.quote + else: + # In Python 3, quote is elsewhere + import urllib.parse + quote = urllib.parse.quote + for char in ('<', '>', '!', ':', '\\'): + string = string.replace(char, quote(char)) + return string + + +def get_func_name(func, resolv_alias=True, win_characters=True): + """ Return the function import path (as a list of module names), and + a name for the function. + + Parameters + ---------- + func: callable + The func to inspect + resolv_alias: boolean, optional + If true, possible local aliases are indicated. + win_characters: boolean, optional + If true, substitute special characters using urllib.quote + This is useful in Windows, as it cannot encode some filenames + """ + if hasattr(func, '__module__'): + module = func.__module__ + else: + try: + module = inspect.getmodule(func) + except TypeError: + if hasattr(func, '__class__'): + module = func.__class__.__module__ + else: + module = 'unknown' + if module is None: + # Happens in doctests, eg + module = '' + if module == '__main__': + try: + filename = os.path.abspath(inspect.getsourcefile(func)) + except: + filename = None + if filename is not None: + # mangling of full path to filename + parts = filename.split(os.sep) + if parts[-1].startswith(', where: + # - N is the cell number where the function was defined + # - XYZ is a hash representing the function's code (and name). + # It will be consistent across sessions and kernel restarts, + # and will change if the function's code/name changes + # We remove N so that cache is properly hit if the cell where + # the func is defined is re-exectuted. + # The XYZ hash should avoid collisions between functions with + # the same name, both within the same notebook but also across + # notebooks + splitted = parts[-1].split('-') + parts[-1] = '-'.join(splitted[:2] + splitted[3:]) + elif len(parts) > 2 and parts[-2].startswith('ipykernel_'): + # In a notebook session (ipykernel). Filename seems to be 'xyz' + # of above. parts[-2] has the structure ipykernel_XXXXXX where + # XXXXXX is a six-digit number identifying the current run (?). + # If we split it off, the function again has the same + # identifier across runs. + parts[-2] = 'ipykernel' + filename = '-'.join(parts) + if filename.endswith('.py'): + filename = filename[:-3] + module = module + '-' + filename + module = module.split('.') + if hasattr(func, 'func_name'): + name = func.func_name + elif hasattr(func, '__name__'): + name = func.__name__ + else: + name = 'unknown' + # Hack to detect functions not defined at the module-level + if resolv_alias: + # TODO: Maybe add a warning here? + if hasattr(func, 'func_globals') and name in func.func_globals: + if not func.func_globals[name] is func: + name = '%s-alias' % name + if inspect.ismethod(func): + # We need to add the name of the class + if hasattr(func, 'im_class'): + klass = func.im_class + module.append(klass.__name__) + if os.name == 'nt' and win_characters: + # Windows can't encode certain characters in filenames + name = _clean_win_chars(name) + module = [_clean_win_chars(s) for s in module] + return module, name + + +def _signature_str(function_name, arg_sig): + """Helper function to output a function signature""" + return '{}{}'.format(function_name, arg_sig) + + +def _function_called_str(function_name, args, kwargs): + """Helper function to output a function call""" + template_str = '{0}({1}, {2})' + + args_str = repr(args)[1:-1] + kwargs_str = ', '.join('%s=%s' % (k, v) + for k, v in kwargs.items()) + return template_str.format(function_name, args_str, + kwargs_str) + + +def filter_args(func, ignore_lst, args=(), kwargs=dict()): + """ Filters the given args and kwargs using a list of arguments to + ignore, and a function specification. + + Parameters + ---------- + func: callable + Function giving the argument specification + ignore_lst: list of strings + List of arguments to ignore (either a name of an argument + in the function spec, or '*', or '**') + *args: list + Positional arguments passed to the function. + **kwargs: dict + Keyword arguments passed to the function + + Returns + ------- + filtered_args: list + List of filtered positional and keyword arguments. + """ + args = list(args) + if isinstance(ignore_lst, str): + # Catch a common mistake + raise ValueError( + 'ignore_lst must be a list of parameters to ignore ' + '%s (type %s) was given' % (ignore_lst, type(ignore_lst))) + # Special case for functools.partial objects + if (not inspect.ismethod(func) and not inspect.isfunction(func)): + if ignore_lst: + warnings.warn('Cannot inspect object %s, ignore list will ' + 'not work.' % func, stacklevel=2) + return {'*': args, '**': kwargs} + arg_sig = inspect.signature(func) + arg_names = [] + arg_defaults = [] + arg_kwonlyargs = [] + arg_varargs = None + arg_varkw = None + for param in arg_sig.parameters.values(): + if param.kind is param.POSITIONAL_OR_KEYWORD: + arg_names.append(param.name) + elif param.kind is param.KEYWORD_ONLY: + arg_names.append(param.name) + arg_kwonlyargs.append(param.name) + elif param.kind is param.VAR_POSITIONAL: + arg_varargs = param.name + elif param.kind is param.VAR_KEYWORD: + arg_varkw = param.name + if param.default is not param.empty: + arg_defaults.append(param.default) + if inspect.ismethod(func): + # First argument is 'self', it has been removed by Python + # we need to add it back: + args = [func.__self__, ] + args + # func is an instance method, inspect.signature(func) does not + # include self, we need to fetch it from the class method, i.e + # func.__func__ + class_method_sig = inspect.signature(func.__func__) + self_name = next(iter(class_method_sig.parameters)) + arg_names = [self_name] + arg_names + # XXX: Maybe I need an inspect.isbuiltin to detect C-level methods, such + # as on ndarrays. + + _, name = get_func_name(func, resolv_alias=False) + arg_dict = dict() + arg_position = -1 + for arg_position, arg_name in enumerate(arg_names): + if arg_position < len(args): + # Positional argument or keyword argument given as positional + if arg_name not in arg_kwonlyargs: + arg_dict[arg_name] = args[arg_position] + else: + raise ValueError( + "Keyword-only parameter '%s' was passed as " + 'positional parameter for %s:\n' + ' %s was called.' + % (arg_name, + _signature_str(name, arg_sig), + _function_called_str(name, args, kwargs)) + ) + + else: + position = arg_position - len(arg_names) + if arg_name in kwargs: + arg_dict[arg_name] = kwargs[arg_name] + else: + try: + arg_dict[arg_name] = arg_defaults[position] + except (IndexError, KeyError) as e: + # Missing argument + raise ValueError( + 'Wrong number of arguments for %s:\n' + ' %s was called.' + % (_signature_str(name, arg_sig), + _function_called_str(name, args, kwargs)) + ) from e + + varkwargs = dict() + for arg_name, arg_value in sorted(kwargs.items()): + if arg_name in arg_dict: + arg_dict[arg_name] = arg_value + elif arg_varkw is not None: + varkwargs[arg_name] = arg_value + else: + raise TypeError("Ignore list for %s() contains an unexpected " + "keyword argument '%s'" % (name, arg_name)) + + if arg_varkw is not None: + arg_dict['**'] = varkwargs + if arg_varargs is not None: + varargs = args[arg_position + 1:] + arg_dict['*'] = varargs + + # Now remove the arguments to be ignored + for item in ignore_lst: + if item in arg_dict: + arg_dict.pop(item) + else: + raise ValueError("Ignore list: argument '%s' is not defined for " + "function %s" + % (item, + _signature_str(name, arg_sig)) + ) + # XXX: Return a sorted list of pairs? + return arg_dict + + +def _format_arg(arg): + formatted_arg = pformat(arg, indent=2) + if len(formatted_arg) > 1500: + formatted_arg = '%s...' % formatted_arg[:700] + return formatted_arg + + +def format_signature(func, *args, **kwargs): + # XXX: Should this use inspect.formatargvalues/formatargspec? + module, name = get_func_name(func) + module = [m for m in module if m] + if module: + module.append(name) + module_path = '.'.join(module) + else: + module_path = name + arg_str = list() + previous_length = 0 + for arg in args: + formatted_arg = _format_arg(arg) + if previous_length > 80: + formatted_arg = '\n%s' % formatted_arg + previous_length = len(formatted_arg) + arg_str.append(formatted_arg) + arg_str.extend(['%s=%s' % (v, _format_arg(i)) for v, i in kwargs.items()]) + arg_str = ', '.join(arg_str) + + signature = '%s(%s)' % (name, arg_str) + return module_path, signature + + +def format_call(func, args, kwargs, object_name="Memory"): + """ Returns a nicely formatted statement displaying the function + call with the given arguments. + """ + path, signature = format_signature(func, *args, **kwargs) + msg = '%s\n[%s] Calling %s...\n%s' % (80 * '_', object_name, + path, signature) + return msg + # XXX: Not using logging framework + # self.debug(msg) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/hashing.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/hashing.py new file mode 100644 index 0000000000000000000000000000000000000000..24aeb559d89a430405cde9656a16d2807ce505a4 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/hashing.py @@ -0,0 +1,266 @@ +""" +Fast cryptographic hash of Python objects, with a special case for fast +hashing of numpy arrays. +""" + +# Author: Gael Varoquaux +# Copyright (c) 2009 Gael Varoquaux +# License: BSD Style, 3 clauses. + +import pickle +import hashlib +import sys +import types +import struct +import io +import decimal + + +Pickler = pickle._Pickler + + +class _ConsistentSet(object): + """ Class used to ensure the hash of Sets is preserved + whatever the order of its items. + """ + def __init__(self, set_sequence): + # Forces order of elements in set to ensure consistent hash. + try: + # Trying first to order the set assuming the type of elements is + # consistent and orderable. + # This fails on python 3 when elements are unorderable + # but we keep it in a try as it's faster. + self._sequence = sorted(set_sequence) + except (TypeError, decimal.InvalidOperation): + # If elements are unorderable, sorting them using their hash. + # This is slower but works in any case. + self._sequence = sorted((hash(e) for e in set_sequence)) + + +class _MyHash(object): + """ Class used to hash objects that won't normally pickle """ + + def __init__(self, *args): + self.args = args + + +class Hasher(Pickler): + """ A subclass of pickler, to do cryptographic hashing, rather than + pickling. + """ + + def __init__(self, hash_name='md5'): + self.stream = io.BytesIO() + # By default we want a pickle protocol that only changes with + # the major python version and not the minor one + protocol = 3 + Pickler.__init__(self, self.stream, protocol=protocol) + # Initialise the hash obj + self._hash = hashlib.new(hash_name) + + def hash(self, obj, return_digest=True): + try: + self.dump(obj) + except pickle.PicklingError as e: + e.args += ('PicklingError while hashing %r: %r' % (obj, e),) + raise + dumps = self.stream.getvalue() + self._hash.update(dumps) + if return_digest: + return self._hash.hexdigest() + + def save(self, obj): + if isinstance(obj, (types.MethodType, type({}.pop))): + # the Pickler cannot pickle instance methods; here we decompose + # them into components that make them uniquely identifiable + if hasattr(obj, '__func__'): + func_name = obj.__func__.__name__ + else: + func_name = obj.__name__ + inst = obj.__self__ + if type(inst) == type(pickle): + obj = _MyHash(func_name, inst.__name__) + elif inst is None: + # type(None) or type(module) do not pickle + obj = _MyHash(func_name, inst) + else: + cls = obj.__self__.__class__ + obj = _MyHash(func_name, inst, cls) + Pickler.save(self, obj) + + def memoize(self, obj): + # We want hashing to be sensitive to value instead of reference. + # For example we want ['aa', 'aa'] and ['aa', 'aaZ'[:2]] + # to hash to the same value and that's why we disable memoization + # for strings + if isinstance(obj, (bytes, str)): + return + Pickler.memoize(self, obj) + + # The dispatch table of the pickler is not accessible in Python + # 3, as these lines are only bugware for IPython, we skip them. + def save_global(self, obj, name=None, pack=struct.pack): + # We have to override this method in order to deal with objects + # defined interactively in IPython that are not injected in + # __main__ + kwargs = dict(name=name, pack=pack) + del kwargs['pack'] + try: + Pickler.save_global(self, obj, **kwargs) + except pickle.PicklingError: + Pickler.save_global(self, obj, **kwargs) + module = getattr(obj, "__module__", None) + if module == '__main__': + my_name = name + if my_name is None: + my_name = obj.__name__ + mod = sys.modules[module] + if not hasattr(mod, my_name): + # IPython doesn't inject the variables define + # interactively in __main__ + setattr(mod, my_name, obj) + + dispatch = Pickler.dispatch.copy() + # builtin + dispatch[type(len)] = save_global + # type + dispatch[type(object)] = save_global + # classobj + dispatch[type(Pickler)] = save_global + # function + dispatch[type(pickle.dump)] = save_global + + def _batch_setitems(self, items): + # forces order of keys in dict to ensure consistent hash. + try: + # Trying first to compare dict assuming the type of keys is + # consistent and orderable. + # This fails on python 3 when keys are unorderable + # but we keep it in a try as it's faster. + Pickler._batch_setitems(self, iter(sorted(items))) + except TypeError: + # If keys are unorderable, sorting them using their hash. This is + # slower but works in any case. + Pickler._batch_setitems(self, iter(sorted((hash(k), v) + for k, v in items))) + + def save_set(self, set_items): + # forces order of items in Set to ensure consistent hash + Pickler.save(self, _ConsistentSet(set_items)) + + dispatch[type(set())] = save_set + + +class NumpyHasher(Hasher): + """ Special case the hasher for when numpy is loaded. + """ + + def __init__(self, hash_name='md5', coerce_mmap=False): + """ + Parameters + ---------- + hash_name: string + The hash algorithm to be used + coerce_mmap: boolean + Make no difference between np.memmap and np.ndarray + objects. + """ + self.coerce_mmap = coerce_mmap + Hasher.__init__(self, hash_name=hash_name) + # delayed import of numpy, to avoid tight coupling + import numpy as np + self.np = np + if hasattr(np, 'getbuffer'): + self._getbuffer = np.getbuffer + else: + self._getbuffer = memoryview + + def save(self, obj): + """ Subclass the save method, to hash ndarray subclass, rather + than pickling them. Off course, this is a total abuse of + the Pickler class. + """ + if isinstance(obj, self.np.ndarray) and not obj.dtype.hasobject: + # Compute a hash of the object + # The update function of the hash requires a c_contiguous buffer. + if obj.shape == (): + # 0d arrays need to be flattened because viewing them as bytes + # raises a ValueError exception. + obj_c_contiguous = obj.flatten() + elif obj.flags.c_contiguous: + obj_c_contiguous = obj + elif obj.flags.f_contiguous: + obj_c_contiguous = obj.T + else: + # Cater for non-single-segment arrays: this creates a + # copy, and thus aleviates this issue. + # XXX: There might be a more efficient way of doing this + obj_c_contiguous = obj.flatten() + + # memoryview is not supported for some dtypes, e.g. datetime64, see + # https://github.com/numpy/numpy/issues/4983. The + # workaround is to view the array as bytes before + # taking the memoryview. + self._hash.update( + self._getbuffer(obj_c_contiguous.view(self.np.uint8))) + + # We store the class, to be able to distinguish between + # Objects with the same binary content, but different + # classes. + if self.coerce_mmap and isinstance(obj, self.np.memmap): + # We don't make the difference between memmap and + # normal ndarrays, to be able to reload previously + # computed results with memmap. + klass = self.np.ndarray + else: + klass = obj.__class__ + # We also return the dtype and the shape, to distinguish + # different views on the same data with different dtypes. + + # The object will be pickled by the pickler hashed at the end. + obj = (klass, ('HASHED', obj.dtype, obj.shape, obj.strides)) + elif isinstance(obj, self.np.dtype): + # numpy.dtype consistent hashing is tricky to get right. This comes + # from the fact that atomic np.dtype objects are interned: + # ``np.dtype('f4') is np.dtype('f4')``. The situation is + # complicated by the fact that this interning does not resist a + # simple pickle.load/dump roundtrip: + # ``pickle.loads(pickle.dumps(np.dtype('f4'))) is not + # np.dtype('f4') Because pickle relies on memoization during + # pickling, it is easy to + # produce different hashes for seemingly identical objects, such as + # ``[np.dtype('f4'), np.dtype('f4')]`` + # and ``[np.dtype('f4'), pickle.loads(pickle.dumps('f4'))]``. + # To prevent memoization from interfering with hashing, we isolate + # the serialization (and thus the pickle memoization) of each dtype + # using each time a different ``pickle.dumps`` call unrelated to + # the current Hasher instance. + self._hash.update("_HASHED_DTYPE".encode('utf-8')) + self._hash.update(pickle.dumps(obj)) + return + Hasher.save(self, obj) + + +def hash(obj, hash_name='md5', coerce_mmap=False): + """ Quick calculation of a hash to identify uniquely Python objects + containing numpy arrays. + + + Parameters + ----------- + hash_name: 'md5' or 'sha1' + Hashing algorithm used. sha1 is supposedly safer, but md5 is + faster. + coerce_mmap: boolean + Make no difference between np.memmap and np.ndarray + """ + valid_hash_names = ('md5', 'sha1') + if hash_name not in valid_hash_names: + raise ValueError("Valid options for 'hash_name' are {}. " + "Got hash_name={!r} instead." + .format(valid_hash_names, hash_name)) + if 'numpy' in sys.modules: + hasher = NumpyHasher(hash_name=hash_name, coerce_mmap=coerce_mmap) + else: + hasher = Hasher(hash_name=hash_name) + return hasher.hash(obj) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/numpy_pickle_compat.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/numpy_pickle_compat.py new file mode 100644 index 0000000000000000000000000000000000000000..096acbcf00d87c4c86b504e9975c8dba8b556ef3 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/numpy_pickle_compat.py @@ -0,0 +1,243 @@ +"""Numpy pickle compatibility functions.""" + +import pickle +import os +import zlib +import inspect + +from io import BytesIO + +from .numpy_pickle_utils import _ZFILE_PREFIX +from .numpy_pickle_utils import Unpickler +from .numpy_pickle_utils import _ensure_native_byte_order + +def hex_str(an_int): + """Convert an int to an hexadecimal string.""" + return '{:#x}'.format(an_int) + + +def asbytes(s): + if isinstance(s, bytes): + return s + return s.encode('latin1') + + +_MAX_LEN = len(hex_str(2 ** 64)) +_CHUNK_SIZE = 64 * 1024 + + +def read_zfile(file_handle): + """Read the z-file and return the content as a string. + + Z-files are raw data compressed with zlib used internally by joblib + for persistence. Backward compatibility is not guaranteed. Do not + use for external purposes. + """ + file_handle.seek(0) + header_length = len(_ZFILE_PREFIX) + _MAX_LEN + length = file_handle.read(header_length) + length = length[len(_ZFILE_PREFIX):] + length = int(length, 16) + + # With python2 and joblib version <= 0.8.4 compressed pickle header is one + # character wider so we need to ignore an additional space if present. + # Note: the first byte of the zlib data is guaranteed not to be a + # space according to + # https://tools.ietf.org/html/rfc6713#section-2.1 + next_byte = file_handle.read(1) + if next_byte != b' ': + # The zlib compressed data has started and we need to go back + # one byte + file_handle.seek(header_length) + + # We use the known length of the data to tell Zlib the size of the + # buffer to allocate. + data = zlib.decompress(file_handle.read(), 15, length) + assert len(data) == length, ( + "Incorrect data length while decompressing %s." + "The file could be corrupted." % file_handle) + return data + + +def write_zfile(file_handle, data, compress=1): + """Write the data in the given file as a Z-file. + + Z-files are raw data compressed with zlib used internally by joblib + for persistence. Backward compatibility is not guarantied. Do not + use for external purposes. + """ + file_handle.write(_ZFILE_PREFIX) + length = hex_str(len(data)) + # Store the length of the data + file_handle.write(asbytes(length.ljust(_MAX_LEN))) + file_handle.write(zlib.compress(asbytes(data), compress)) + +############################################################################### +# Utility objects for persistence. + + +class NDArrayWrapper(object): + """An object to be persisted instead of numpy arrays. + + The only thing this object does, is to carry the filename in which + the array has been persisted, and the array subclass. + """ + + def __init__(self, filename, subclass, allow_mmap=True): + """Constructor. Store the useful information for later.""" + self.filename = filename + self.subclass = subclass + self.allow_mmap = allow_mmap + + def read(self, unpickler): + """Reconstruct the array.""" + filename = os.path.join(unpickler._dirname, self.filename) + # Load the array from the disk + # use getattr instead of self.allow_mmap to ensure backward compat + # with NDArrayWrapper instances pickled with joblib < 0.9.0 + allow_mmap = getattr(self, 'allow_mmap', True) + kwargs = {} + if allow_mmap: + kwargs['mmap_mode'] = unpickler.mmap_mode + if "allow_pickle" in inspect.signature(unpickler.np.load).parameters: + # Required in numpy 1.16.3 and later to aknowledge the security + # risk. + kwargs["allow_pickle"] = True + array = unpickler.np.load(filename, **kwargs) + + # Detect byte order mis-match and swap as needed. + array = _ensure_native_byte_order(array) + + # Reconstruct subclasses. This does not work with old + # versions of numpy + if (hasattr(array, '__array_prepare__') and + self.subclass not in (unpickler.np.ndarray, + unpickler.np.memmap)): + # We need to reconstruct another subclass + new_array = unpickler.np.core.multiarray._reconstruct( + self.subclass, (0,), 'b') + return new_array.__array_prepare__(array) + else: + return array + + +class ZNDArrayWrapper(NDArrayWrapper): + """An object to be persisted instead of numpy arrays. + + This object store the Zfile filename in which + the data array has been persisted, and the meta information to + retrieve it. + The reason that we store the raw buffer data of the array and + the meta information, rather than array representation routine + (tobytes) is that it enables us to use completely the strided + model to avoid memory copies (a and a.T store as fast). In + addition saving the heavy information separately can avoid + creating large temporary buffers when unpickling data with + large arrays. + """ + + def __init__(self, filename, init_args, state): + """Constructor. Store the useful information for later.""" + self.filename = filename + self.state = state + self.init_args = init_args + + def read(self, unpickler): + """Reconstruct the array from the meta-information and the z-file.""" + # Here we a simply reproducing the unpickling mechanism for numpy + # arrays + filename = os.path.join(unpickler._dirname, self.filename) + array = unpickler.np.core.multiarray._reconstruct(*self.init_args) + with open(filename, 'rb') as f: + data = read_zfile(f) + state = self.state + (data,) + array.__setstate__(state) + return array + + +class ZipNumpyUnpickler(Unpickler): + """A subclass of the Unpickler to unpickle our numpy pickles.""" + + dispatch = Unpickler.dispatch.copy() + + def __init__(self, filename, file_handle, mmap_mode=None): + """Constructor.""" + self._filename = os.path.basename(filename) + self._dirname = os.path.dirname(filename) + self.mmap_mode = mmap_mode + self.file_handle = self._open_pickle(file_handle) + Unpickler.__init__(self, self.file_handle) + try: + import numpy as np + except ImportError: + np = None + self.np = np + + def _open_pickle(self, file_handle): + return BytesIO(read_zfile(file_handle)) + + def load_build(self): + """Set the state of a newly created object. + + We capture it to replace our place-holder objects, + NDArrayWrapper, by the array we are interested in. We + replace them directly in the stack of pickler. + """ + Unpickler.load_build(self) + if isinstance(self.stack[-1], NDArrayWrapper): + if self.np is None: + raise ImportError("Trying to unpickle an ndarray, " + "but numpy didn't import correctly") + nd_array_wrapper = self.stack.pop() + array = nd_array_wrapper.read(self) + self.stack.append(array) + + dispatch[pickle.BUILD[0]] = load_build + + +def load_compatibility(filename): + """Reconstruct a Python object from a file persisted with joblib.dump. + + This function ensures the compatibility with joblib old persistence format + (<= 0.9.3). + + Parameters + ----------- + filename: string + The name of the file from which to load the object + + Returns + ------- + result: any Python object + The object stored in the file. + + See Also + -------- + joblib.dump : function to save an object + + Notes + ----- + + This function can load numpy array files saved separately during the + dump. + """ + with open(filename, 'rb') as file_handle: + # We are careful to open the file handle early and keep it open to + # avoid race-conditions on renames. That said, if data is stored in + # companion files, moving the directory will create a race when + # joblib tries to access the companion files. + unpickler = ZipNumpyUnpickler(filename, file_handle=file_handle) + try: + obj = unpickler.load() + except UnicodeDecodeError as exc: + # More user-friendly error message + new_exc = ValueError( + 'You may be trying to read with ' + 'python 3 a joblib pickle generated with python 2. ' + 'This feature is not supported by joblib.') + new_exc.__cause__ = exc + raise new_exc + finally: + if hasattr(unpickler, 'file_handle'): + unpickler.file_handle.close() + return obj diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/numpy_pickle_utils.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/numpy_pickle_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..02a88ffaf1fb1ef1cd93695a0b597ced7650e9ac --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/numpy_pickle_utils.py @@ -0,0 +1,253 @@ +"""Utilities for fast persistence of big data, with optional compression.""" + +# Author: Gael Varoquaux +# Copyright (c) 2009 Gael Varoquaux +# License: BSD Style, 3 clauses. + +import pickle +import io +import sys +import warnings +import contextlib + +from .compressor import _ZFILE_PREFIX +from .compressor import _COMPRESSORS + +try: + import numpy as np +except ImportError: + np = None + +Unpickler = pickle._Unpickler +Pickler = pickle._Pickler +xrange = range + + +try: + # The python standard library can be built without bz2 so we make bz2 + # usage optional. + # see https://github.com/scikit-learn/scikit-learn/issues/7526 for more + # details. + import bz2 +except ImportError: + bz2 = None + +# Buffer size used in io.BufferedReader and io.BufferedWriter +_IO_BUFFER_SIZE = 1024 ** 2 + + +def _is_raw_file(fileobj): + """Check if fileobj is a raw file object, e.g created with open.""" + fileobj = getattr(fileobj, 'raw', fileobj) + return isinstance(fileobj, io.FileIO) + + +def _get_prefixes_max_len(): + # Compute the max prefix len of registered compressors. + prefixes = [len(compressor.prefix) for compressor in _COMPRESSORS.values()] + prefixes += [len(_ZFILE_PREFIX)] + return max(prefixes) + + +def _is_numpy_array_byte_order_mismatch(array): + """Check if numpy array is having byte order mis-match""" + return ((sys.byteorder == 'big' and + (array.dtype.byteorder == '<' or + (array.dtype.byteorder == '|' and array.dtype.fields and + all(e[0].byteorder == '<' + for e in array.dtype.fields.values())))) or + (sys.byteorder == 'little' and + (array.dtype.byteorder == '>' or + (array.dtype.byteorder == '|' and array.dtype.fields and + all(e[0].byteorder == '>' + for e in array.dtype.fields.values()))))) + + +def _ensure_native_byte_order(array): + """Use the byte order of the host while preserving values + + Does nothing if array already uses the system byte order. + """ + if _is_numpy_array_byte_order_mismatch(array): + array = array.byteswap().newbyteorder('=') + return array + + +############################################################################### +# Cache file utilities +def _detect_compressor(fileobj): + """Return the compressor matching fileobj. + + Parameters + ---------- + fileobj: file object + + Returns + ------- + str in {'zlib', 'gzip', 'bz2', 'lzma', 'xz', 'compat', 'not-compressed'} + """ + # Read the magic number in the first bytes of the file. + max_prefix_len = _get_prefixes_max_len() + if hasattr(fileobj, 'peek'): + # Peek allows to read those bytes without moving the cursor in the + # file whic. + first_bytes = fileobj.peek(max_prefix_len) + else: + # Fallback to seek if the fileobject is not peekable. + first_bytes = fileobj.read(max_prefix_len) + fileobj.seek(0) + + if first_bytes.startswith(_ZFILE_PREFIX): + return "compat" + else: + for name, compressor in _COMPRESSORS.items(): + if first_bytes.startswith(compressor.prefix): + return name + + return "not-compressed" + + +def _buffered_read_file(fobj): + """Return a buffered version of a read file object.""" + return io.BufferedReader(fobj, buffer_size=_IO_BUFFER_SIZE) + + +def _buffered_write_file(fobj): + """Return a buffered version of a write file object.""" + return io.BufferedWriter(fobj, buffer_size=_IO_BUFFER_SIZE) + + +@contextlib.contextmanager +def _read_fileobject(fileobj, filename, mmap_mode=None): + """Utility function opening the right fileobject from a filename. + + The magic number is used to choose between the type of file object to open: + * regular file object (default) + * zlib file object + * gzip file object + * bz2 file object + * lzma file object (for xz and lzma compressor) + + Parameters + ---------- + fileobj: file object + compressor: str in {'zlib', 'gzip', 'bz2', 'lzma', 'xz', 'compat', + 'not-compressed'} + filename: str + filename path corresponding to the fileobj parameter. + mmap_mode: str + memory map mode that should be used to open the pickle file. This + parameter is useful to verify that the user is not trying to one with + compression. Default: None. + + Returns + ------- + a file like object + + """ + # Detect if the fileobj contains compressed data. + compressor = _detect_compressor(fileobj) + + if compressor == 'compat': + # Compatibility with old pickle mode: simply return the input + # filename "as-is" and let the compatibility function be called by the + # caller. + warnings.warn("The file '%s' has been generated with a joblib " + "version less than 0.10. " + "Please regenerate this pickle file." % filename, + DeprecationWarning, stacklevel=2) + yield filename + else: + if compressor in _COMPRESSORS: + # based on the compressor detected in the file, we open the + # correct decompressor file object, wrapped in a buffer. + compressor_wrapper = _COMPRESSORS[compressor] + inst = compressor_wrapper.decompressor_file(fileobj) + fileobj = _buffered_read_file(inst) + + # Checking if incompatible load parameters with the type of file: + # mmap_mode cannot be used with compressed file or in memory buffers + # such as io.BytesIO. + if mmap_mode is not None: + if isinstance(fileobj, io.BytesIO): + warnings.warn('In memory persistence is not compatible with ' + 'mmap_mode "%(mmap_mode)s" flag passed. ' + 'mmap_mode option will be ignored.' + % locals(), stacklevel=2) + elif compressor != 'not-compressed': + warnings.warn('mmap_mode "%(mmap_mode)s" is not compatible ' + 'with compressed file %(filename)s. ' + '"%(mmap_mode)s" flag will be ignored.' + % locals(), stacklevel=2) + elif not _is_raw_file(fileobj): + warnings.warn('"%(fileobj)r" is not a raw file, mmap_mode ' + '"%(mmap_mode)s" flag will be ignored.' + % locals(), stacklevel=2) + + yield fileobj + + +def _write_fileobject(filename, compress=("zlib", 3)): + """Return the right compressor file object in write mode.""" + compressmethod = compress[0] + compresslevel = compress[1] + + if compressmethod in _COMPRESSORS.keys(): + file_instance = _COMPRESSORS[compressmethod].compressor_file( + filename, compresslevel=compresslevel) + return _buffered_write_file(file_instance) + else: + file_instance = _COMPRESSORS['zlib'].compressor_file( + filename, compresslevel=compresslevel) + return _buffered_write_file(file_instance) + + +# Utility functions/variables from numpy required for writing arrays. +# We need at least the functions introduced in version 1.9 of numpy. Here, +# we use the ones from numpy 1.10.2. +BUFFER_SIZE = 2 ** 18 # size of buffer for reading npz files in bytes + + +def _read_bytes(fp, size, error_template="ran out of data"): + """Read from file-like object until size bytes are read. + + TODO python2_drop: is it still needed? The docstring mentions python 2.6 + and it looks like this can be at least simplified ... + + Raises ValueError if not EOF is encountered before size bytes are read. + Non-blocking objects only supported if they derive from io objects. + + Required as e.g. ZipExtFile in python 2.6 can return less data than + requested. + + This function was taken from numpy/lib/format.py in version 1.10.2. + + Parameters + ---------- + fp: file-like object + size: int + error_template: str + + Returns + ------- + a bytes object + The data read in bytes. + + """ + data = bytes() + while True: + # io files (default in python3) return None or raise on + # would-block, python2 file will truncate, probably nothing can be + # done about that. note that regular files can't be non-blocking + try: + r = fp.read(size - len(data)) + data += r + if len(r) == 0 or len(data) == size: + break + except io.BlockingIOError: + pass + if len(data) != size: + msg = "EOF: reading %s, expected %d bytes got %d" + raise ValueError(msg % (error_template, size, len(data))) + else: + return data diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/parallel.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..687557eb610521845e702f39d48e89dd4330f3d2 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/parallel.py @@ -0,0 +1,1074 @@ +""" +Helpers for embarrassingly parallel code. +""" +# Author: Gael Varoquaux < gael dot varoquaux at normalesup dot org > +# Copyright: 2010, Gael Varoquaux +# License: BSD 3 clause + +from __future__ import division + +import os +import sys +from math import sqrt +import functools +import time +import threading +import itertools +from uuid import uuid4 +from numbers import Integral +import warnings +import queue + +from ._multiprocessing_helpers import mp + +from .logger import Logger, short_format_time +from .disk import memstr_to_bytes +from ._parallel_backends import (FallbackToBackend, MultiprocessingBackend, + ThreadingBackend, SequentialBackend, + LokyBackend) +from .externals.cloudpickle import dumps, loads +from .externals import loky + +# Make sure that those two classes are part of the public joblib.parallel API +# so that 3rd party backend implementers can import them from here. +from ._parallel_backends import AutoBatchingMixin # noqa +from ._parallel_backends import ParallelBackendBase # noqa + + +BACKENDS = { + 'multiprocessing': MultiprocessingBackend, + 'threading': ThreadingBackend, + 'sequential': SequentialBackend, + 'loky': LokyBackend, +} +# name of the backend used by default by Parallel outside of any context +# managed by ``parallel_backend``. +DEFAULT_BACKEND = 'loky' +DEFAULT_N_JOBS = 1 +DEFAULT_THREAD_BACKEND = 'threading' + +# Thread local value that can be overridden by the ``parallel_backend`` context +# manager +_backend = threading.local() + +VALID_BACKEND_HINTS = ('processes', 'threads', None) +VALID_BACKEND_CONSTRAINTS = ('sharedmem', None) + + +def _register_dask(): + """ Register Dask Backend if called with parallel_backend("dask") """ + try: + from ._dask import DaskDistributedBackend + register_parallel_backend('dask', DaskDistributedBackend) + except ImportError as e: + msg = ("To use the dask.distributed backend you must install both " + "the `dask` and distributed modules.\n\n" + "See https://dask.pydata.org/en/latest/install.html for more " + "information.") + raise ImportError(msg) from e + + +EXTERNAL_BACKENDS = { + 'dask': _register_dask, +} + + +def get_active_backend(prefer=None, require=None, verbose=0): + """Return the active default backend""" + if prefer not in VALID_BACKEND_HINTS: + raise ValueError("prefer=%r is not a valid backend hint, " + "expected one of %r" % (prefer, VALID_BACKEND_HINTS)) + if require not in VALID_BACKEND_CONSTRAINTS: + raise ValueError("require=%r is not a valid backend constraint, " + "expected one of %r" + % (require, VALID_BACKEND_CONSTRAINTS)) + + if prefer == 'processes' and require == 'sharedmem': + raise ValueError("prefer == 'processes' and require == 'sharedmem'" + " are inconsistent settings") + backend_and_jobs = getattr(_backend, 'backend_and_jobs', None) + if backend_and_jobs is not None: + # Try to use the backend set by the user with the context manager. + backend, n_jobs = backend_and_jobs + nesting_level = backend.nesting_level + supports_sharedmem = getattr(backend, 'supports_sharedmem', False) + if require == 'sharedmem' and not supports_sharedmem: + # This backend does not match the shared memory constraint: + # fallback to the default thead-based backend. + sharedmem_backend = BACKENDS[DEFAULT_THREAD_BACKEND]( + nesting_level=nesting_level) + if verbose >= 10: + print("Using %s as joblib.Parallel backend instead of %s " + "as the latter does not provide shared memory semantics." + % (sharedmem_backend.__class__.__name__, + backend.__class__.__name__)) + return sharedmem_backend, DEFAULT_N_JOBS + else: + return backend_and_jobs + + # We are outside of the scope of any parallel_backend context manager, + # create the default backend instance now. + backend = BACKENDS[DEFAULT_BACKEND](nesting_level=0) + supports_sharedmem = getattr(backend, 'supports_sharedmem', False) + uses_threads = getattr(backend, 'uses_threads', False) + if ((require == 'sharedmem' and not supports_sharedmem) or + (prefer == 'threads' and not uses_threads)): + # Make sure the selected default backend match the soft hints and + # hard constraints: + backend = BACKENDS[DEFAULT_THREAD_BACKEND](nesting_level=0) + return backend, DEFAULT_N_JOBS + + +class parallel_backend(object): + """Change the default backend used by Parallel inside a with block. + + If ``backend`` is a string it must match a previously registered + implementation using the ``register_parallel_backend`` function. + + By default the following backends are available: + + - 'loky': single-host, process-based parallelism (used by default), + - 'threading': single-host, thread-based parallelism, + - 'multiprocessing': legacy single-host, process-based parallelism. + + 'loky' is recommended to run functions that manipulate Python objects. + 'threading' is a low-overhead alternative that is most efficient for + functions that release the Global Interpreter Lock: e.g. I/O-bound code or + CPU-bound code in a few calls to native code that explicitly releases the + GIL. + + In addition, if the `dask` and `distributed` Python packages are installed, + it is possible to use the 'dask' backend for better scheduling of nested + parallel calls without over-subscription and potentially distribute + parallel calls over a networked cluster of several hosts. + + It is also possible to use the distributed 'ray' backend for distributing + the workload to a cluster of nodes. To use the 'ray' joblib backend add + the following lines:: + + >>> from ray.util.joblib import register_ray # doctest: +SKIP + >>> register_ray() # doctest: +SKIP + >>> with parallel_backend("ray"): # doctest: +SKIP + ... print(Parallel()(delayed(neg)(i + 1) for i in range(5))) + [-1, -2, -3, -4, -5] + + Alternatively the backend can be passed directly as an instance. + + By default all available workers will be used (``n_jobs=-1``) unless the + caller passes an explicit value for the ``n_jobs`` parameter. + + This is an alternative to passing a ``backend='backend_name'`` argument to + the ``Parallel`` class constructor. It is particularly useful when calling + into library code that uses joblib internally but does not expose the + backend argument in its own API. + + >>> from operator import neg + >>> with parallel_backend('threading'): + ... print(Parallel()(delayed(neg)(i + 1) for i in range(5))) + ... + [-1, -2, -3, -4, -5] + + Warning: this function is experimental and subject to change in a future + version of joblib. + + Joblib also tries to limit the oversubscription by limiting the number of + threads usable in some third-party library threadpools like OpenBLAS, MKL + or OpenMP. The default limit in each worker is set to + ``max(cpu_count() // effective_n_jobs, 1)`` but this limit can be + overwritten with the ``inner_max_num_threads`` argument which will be used + to set this limit in the child processes. + + .. versionadded:: 0.10 + + """ + def __init__(self, backend, n_jobs=-1, inner_max_num_threads=None, + **backend_params): + if isinstance(backend, str): + if backend not in BACKENDS and backend in EXTERNAL_BACKENDS: + register = EXTERNAL_BACKENDS[backend] + register() + + backend = BACKENDS[backend](**backend_params) + + if inner_max_num_threads is not None: + msg = ("{} does not accept setting the inner_max_num_threads " + "argument.".format(backend.__class__.__name__)) + assert backend.supports_inner_max_num_threads, msg + backend.inner_max_num_threads = inner_max_num_threads + + # If the nesting_level of the backend is not set previously, use the + # nesting level from the previous active_backend to set it + current_backend_and_jobs = getattr(_backend, 'backend_and_jobs', None) + if backend.nesting_level is None: + if current_backend_and_jobs is None: + nesting_level = 0 + else: + nesting_level = current_backend_and_jobs[0].nesting_level + + backend.nesting_level = nesting_level + + # Save the backends info and set the active backend + self.old_backend_and_jobs = current_backend_and_jobs + self.new_backend_and_jobs = (backend, n_jobs) + + _backend.backend_and_jobs = (backend, n_jobs) + + def __enter__(self): + return self.new_backend_and_jobs + + def __exit__(self, type, value, traceback): + self.unregister() + + def unregister(self): + if self.old_backend_and_jobs is None: + if getattr(_backend, 'backend_and_jobs', None) is not None: + del _backend.backend_and_jobs + else: + _backend.backend_and_jobs = self.old_backend_and_jobs + + +# Under Linux or OS X the default start method of multiprocessing +# can cause third party libraries to crash. Under Python 3.4+ it is possible +# to set an environment variable to switch the default start method from +# 'fork' to 'forkserver' or 'spawn' to avoid this issue albeit at the cost +# of causing semantic changes and some additional pool instantiation overhead. +DEFAULT_MP_CONTEXT = None +if hasattr(mp, 'get_context'): + method = os.environ.get('JOBLIB_START_METHOD', '').strip() or None + if method is not None: + DEFAULT_MP_CONTEXT = mp.get_context(method=method) + + +class BatchedCalls(object): + """Wrap a sequence of (func, args, kwargs) tuples as a single callable""" + + def __init__(self, iterator_slice, backend_and_jobs, reducer_callback=None, + pickle_cache=None): + self.items = list(iterator_slice) + self._size = len(self.items) + self._reducer_callback = reducer_callback + if isinstance(backend_and_jobs, tuple): + self._backend, self._n_jobs = backend_and_jobs + else: + # this is for backward compatibility purposes. Before 0.12.6, + # nested backends were returned without n_jobs indications. + self._backend, self._n_jobs = backend_and_jobs, None + self._pickle_cache = pickle_cache if pickle_cache is not None else {} + + def __call__(self): + # Set the default nested backend to self._backend but do not set the + # change the default number of processes to -1 + with parallel_backend(self._backend, n_jobs=self._n_jobs): + return [func(*args, **kwargs) + for func, args, kwargs in self.items] + + def __reduce__(self): + if self._reducer_callback is not None: + self._reducer_callback() + # no need pickle the callback. + return ( + BatchedCalls, + (self.items, (self._backend, self._n_jobs), None, + self._pickle_cache) + ) + + def __len__(self): + return self._size + + +############################################################################### +# CPU count that works also when multiprocessing has been disabled via +# the JOBLIB_MULTIPROCESSING environment variable +def cpu_count(only_physical_cores=False): + """Return the number of CPUs. + + This delegates to loky.cpu_count that takes into account additional + constraints such as Linux CFS scheduler quotas (typically set by container + runtimes such as docker) and CPU affinity (for instance using the taskset + command on Linux). + + If only_physical_cores is True, do not take hyperthreading / SMT logical + cores into account. + """ + if mp is None: + return 1 + + return loky.cpu_count(only_physical_cores=only_physical_cores) + + +############################################################################### +# For verbosity + +def _verbosity_filter(index, verbose): + """ Returns False for indices increasingly apart, the distance + depending on the value of verbose. + + We use a lag increasing as the square of index + """ + if not verbose: + return True + elif verbose > 10: + return False + if index == 0: + return False + verbose = .5 * (11 - verbose) ** 2 + scale = sqrt(index / verbose) + next_scale = sqrt((index + 1) / verbose) + return (int(next_scale) == int(scale)) + + +############################################################################### +def delayed(function): + """Decorator used to capture the arguments of a function.""" + + def delayed_function(*args, **kwargs): + return function, args, kwargs + try: + delayed_function = functools.wraps(function)(delayed_function) + except AttributeError: + " functools.wraps fails on some callable objects " + return delayed_function + + +############################################################################### +class BatchCompletionCallBack(object): + """Callback used by joblib.Parallel's multiprocessing backend. + + This callable is executed by the parent process whenever a worker process + has returned the results of a batch of tasks. + + It is used for progress reporting, to update estimate of the batch + processing duration and to schedule the next batch of tasks to be + processed. + + """ + def __init__(self, dispatch_timestamp, batch_size, parallel): + self.dispatch_timestamp = dispatch_timestamp + self.batch_size = batch_size + self.parallel = parallel + + def __call__(self, out): + self.parallel.n_completed_tasks += self.batch_size + this_batch_duration = time.time() - self.dispatch_timestamp + + self.parallel._backend.batch_completed(self.batch_size, + this_batch_duration) + self.parallel.print_progress() + with self.parallel._lock: + if self.parallel._original_iterator is not None: + self.parallel.dispatch_next() + + +############################################################################### +def register_parallel_backend(name, factory, make_default=False): + """Register a new Parallel backend factory. + + The new backend can then be selected by passing its name as the backend + argument to the Parallel class. Moreover, the default backend can be + overwritten globally by setting make_default=True. + + The factory can be any callable that takes no argument and return an + instance of ``ParallelBackendBase``. + + Warning: this function is experimental and subject to change in a future + version of joblib. + + .. versionadded:: 0.10 + + """ + BACKENDS[name] = factory + if make_default: + global DEFAULT_BACKEND + DEFAULT_BACKEND = name + + +def effective_n_jobs(n_jobs=-1): + """Determine the number of jobs that can actually run in parallel + + n_jobs is the number of workers requested by the callers. Passing n_jobs=-1 + means requesting all available workers for instance matching the number of + CPU cores on the worker host(s). + + This method should return a guesstimate of the number of workers that can + actually perform work concurrently with the currently enabled default + backend. The primary use case is to make it possible for the caller to know + in how many chunks to slice the work. + + In general working on larger data chunks is more efficient (less scheduling + overhead and better use of CPU cache prefetching heuristics) as long as all + the workers have enough work to do. + + Warning: this function is experimental and subject to change in a future + version of joblib. + + .. versionadded:: 0.10 + + """ + backend, backend_n_jobs = get_active_backend() + if n_jobs is None: + n_jobs = backend_n_jobs + return backend.effective_n_jobs(n_jobs=n_jobs) + + +############################################################################### +class Parallel(Logger): + ''' Helper class for readable parallel mapping. + + Read more in the :ref:`User Guide `. + + Parameters + ----------- + n_jobs: int, default: None + The maximum number of concurrently running jobs, such as the number + of Python worker processes when backend="multiprocessing" + or the size of the thread-pool when backend="threading". + If -1 all CPUs are used. If 1 is given, no parallel computing code + is used at all, which is useful for debugging. For n_jobs below -1, + (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all + CPUs but one are used. + None is a marker for 'unset' that will be interpreted as n_jobs=1 + (sequential execution) unless the call is performed under a + parallel_backend context manager that sets another value for + n_jobs. + backend: str, ParallelBackendBase instance or None, default: 'loky' + Specify the parallelization backend implementation. + Supported backends are: + + - "loky" used by default, can induce some + communication and memory overhead when exchanging input and + output data with the worker Python processes. + - "multiprocessing" previous process-based backend based on + `multiprocessing.Pool`. Less robust than `loky`. + - "threading" is a very low-overhead backend but it suffers + from the Python Global Interpreter Lock if the called function + relies a lot on Python objects. "threading" is mostly useful + when the execution bottleneck is a compiled extension that + explicitly releases the GIL (for instance a Cython loop wrapped + in a "with nogil" block or an expensive call to a library such + as NumPy). + - finally, you can register backends by calling + register_parallel_backend. This will allow you to implement + a backend of your liking. + + It is not recommended to hard-code the backend name in a call to + Parallel in a library. Instead it is recommended to set soft hints + (prefer) or hard constraints (require) so as to make it possible + for library users to change the backend from the outside using the + parallel_backend context manager. + prefer: str in {'processes', 'threads'} or None, default: None + Soft hint to choose the default backend if no specific backend + was selected with the parallel_backend context manager. The + default process-based backend is 'loky' and the default + thread-based backend is 'threading'. Ignored if the ``backend`` + parameter is specified. + require: 'sharedmem' or None, default None + Hard constraint to select the backend. If set to 'sharedmem', + the selected backend will be single-host and thread-based even + if the user asked for a non-thread based backend with + parallel_backend. + verbose: int, optional + The verbosity level: if non zero, progress messages are + printed. Above 50, the output is sent to stdout. + The frequency of the messages increases with the verbosity level. + If it more than 10, all iterations are reported. + timeout: float, optional + Timeout limit for each task to complete. If any task takes longer + a TimeOutError will be raised. Only applied when n_jobs != 1 + pre_dispatch: {'all', integer, or expression, as in '3*n_jobs'} + The number of batches (of tasks) to be pre-dispatched. + Default is '2*n_jobs'. When batch_size="auto" this is reasonable + default and the workers should never starve. + batch_size: int or 'auto', default: 'auto' + The number of atomic tasks to dispatch at once to each + worker. When individual evaluations are very fast, dispatching + calls to workers can be slower than sequential computation because + of the overhead. Batching fast computations together can mitigate + this. + The ``'auto'`` strategy keeps track of the time it takes for a batch + to complete, and dynamically adjusts the batch size to keep the time + on the order of half a second, using a heuristic. The initial batch + size is 1. + ``batch_size="auto"`` with ``backend="threading"`` will dispatch + batches of a single task at a time as the threading backend has + very little overhead and using larger batch size has not proved to + bring any gain in that case. + temp_folder: str, optional + Folder to be used by the pool for memmapping large arrays + for sharing memory with worker processes. If None, this will try in + order: + + - a folder pointed by the JOBLIB_TEMP_FOLDER environment + variable, + - /dev/shm if the folder exists and is writable: this is a + RAM disk filesystem available by default on modern Linux + distributions, + - the default system temporary folder that can be + overridden with TMP, TMPDIR or TEMP environment + variables, typically /tmp under Unix operating systems. + + Only active when backend="loky" or "multiprocessing". + max_nbytes int, str, or None, optional, 1M by default + Threshold on the size of arrays passed to the workers that + triggers automated memory mapping in temp_folder. Can be an int + in Bytes, or a human-readable string, e.g., '1M' for 1 megabyte. + Use None to disable memmapping of large arrays. + Only active when backend="loky" or "multiprocessing". + mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, default: 'r' + Memmapping mode for numpy arrays passed to workers. None will + disable memmapping, other modes defined in the numpy.memmap doc: + https://numpy.org/doc/stable/reference/generated/numpy.memmap.html + Also, see 'max_nbytes' parameter documentation for more details. + + Notes + ----- + + This object uses workers to compute in parallel the application of a + function to many different arguments. The main functionality it brings + in addition to using the raw multiprocessing or concurrent.futures API + are (see examples for details): + + * More readable code, in particular since it avoids + constructing list of arguments. + + * Easier debugging: + - informative tracebacks even when the error happens on + the client side + - using 'n_jobs=1' enables to turn off parallel computing + for debugging without changing the codepath + - early capture of pickling errors + + * An optional progress meter. + + * Interruption of multiprocesses jobs with 'Ctrl-C' + + * Flexible pickling control for the communication to and from + the worker processes. + + * Ability to use shared memory efficiently with worker + processes for large numpy-based datastructures. + + Examples + -------- + + A simple example: + + >>> from math import sqrt + >>> from joblib import Parallel, delayed + >>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10)) + [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] + + Reshaping the output when the function has several return + values: + + >>> from math import modf + >>> from joblib import Parallel, delayed + >>> r = Parallel(n_jobs=1)(delayed(modf)(i/2.) for i in range(10)) + >>> res, i = zip(*r) + >>> res + (0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5) + >>> i + (0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0) + + The progress meter: the higher the value of `verbose`, the more + messages: + + >>> from time import sleep + >>> from joblib import Parallel, delayed + >>> r = Parallel(n_jobs=2, verbose=10)(delayed(sleep)(.2) for _ in range(10)) #doctest: +SKIP + [Parallel(n_jobs=2)]: Done 1 tasks | elapsed: 0.6s + [Parallel(n_jobs=2)]: Done 4 tasks | elapsed: 0.8s + [Parallel(n_jobs=2)]: Done 10 out of 10 | elapsed: 1.4s finished + + Traceback example, note how the line of the error is indicated + as well as the values of the parameter passed to the function that + triggered the exception, even though the traceback happens in the + child process: + + >>> from heapq import nlargest + >>> from joblib import Parallel, delayed + >>> Parallel(n_jobs=2)(delayed(nlargest)(2, n) for n in (range(4), 'abcde', 3)) #doctest: +SKIP + #... + --------------------------------------------------------------------------- + Sub-process traceback: + --------------------------------------------------------------------------- + TypeError Mon Nov 12 11:37:46 2012 + PID: 12934 Python 2.7.3: /usr/bin/python + ........................................................................... + /usr/lib/python2.7/heapq.pyc in nlargest(n=2, iterable=3, key=None) + 419 if n >= size: + 420 return sorted(iterable, key=key, reverse=True)[:n] + 421 + 422 # When key is none, use simpler decoration + 423 if key is None: + --> 424 it = izip(iterable, count(0,-1)) # decorate + 425 result = _nlargest(n, it) + 426 return map(itemgetter(0), result) # undecorate + 427 + 428 # General case, slowest method + TypeError: izip argument #1 must support iteration + ___________________________________________________________________________ + + + Using pre_dispatch in a producer/consumer situation, where the + data is generated on the fly. Note how the producer is first + called 3 times before the parallel loop is initiated, and then + called to generate new data on the fly: + + >>> from math import sqrt + >>> from joblib import Parallel, delayed + >>> def producer(): + ... for i in range(6): + ... print('Produced %s' % i) + ... yield i + >>> out = Parallel(n_jobs=2, verbose=100, pre_dispatch='1.5*n_jobs')( + ... delayed(sqrt)(i) for i in producer()) #doctest: +SKIP + Produced 0 + Produced 1 + Produced 2 + [Parallel(n_jobs=2)]: Done 1 jobs | elapsed: 0.0s + Produced 3 + [Parallel(n_jobs=2)]: Done 2 jobs | elapsed: 0.0s + Produced 4 + [Parallel(n_jobs=2)]: Done 3 jobs | elapsed: 0.0s + Produced 5 + [Parallel(n_jobs=2)]: Done 4 jobs | elapsed: 0.0s + [Parallel(n_jobs=2)]: Done 6 out of 6 | elapsed: 0.0s remaining: 0.0s + [Parallel(n_jobs=2)]: Done 6 out of 6 | elapsed: 0.0s finished + + ''' + def __init__(self, n_jobs=None, backend=None, verbose=0, timeout=None, + pre_dispatch='2 * n_jobs', batch_size='auto', + temp_folder=None, max_nbytes='1M', mmap_mode='r', + prefer=None, require=None): + active_backend, context_n_jobs = get_active_backend( + prefer=prefer, require=require, verbose=verbose) + nesting_level = active_backend.nesting_level + if backend is None and n_jobs is None: + # If we are under a parallel_backend context manager, look up + # the default number of jobs and use that instead: + n_jobs = context_n_jobs + if n_jobs is None: + # No specific context override and no specific value request: + # default to 1. + n_jobs = 1 + self.n_jobs = n_jobs + self.verbose = verbose + self.timeout = timeout + self.pre_dispatch = pre_dispatch + self._ready_batches = queue.Queue() + self._id = uuid4().hex + self._reducer_callback = None + + if isinstance(max_nbytes, str): + max_nbytes = memstr_to_bytes(max_nbytes) + + self._backend_args = dict( + max_nbytes=max_nbytes, + mmap_mode=mmap_mode, + temp_folder=temp_folder, + prefer=prefer, + require=require, + verbose=max(0, self.verbose - 50), + ) + if DEFAULT_MP_CONTEXT is not None: + self._backend_args['context'] = DEFAULT_MP_CONTEXT + elif hasattr(mp, "get_context"): + self._backend_args['context'] = mp.get_context() + + if backend is None: + backend = active_backend + + elif isinstance(backend, ParallelBackendBase): + # Use provided backend as is, with the current nesting_level if it + # is not set yet. + if backend.nesting_level is None: + backend.nesting_level = nesting_level + + elif hasattr(backend, 'Pool') and hasattr(backend, 'Lock'): + # Make it possible to pass a custom multiprocessing context as + # backend to change the start method to forkserver or spawn or + # preload modules on the forkserver helper process. + self._backend_args['context'] = backend + backend = MultiprocessingBackend(nesting_level=nesting_level) + else: + try: + backend_factory = BACKENDS[backend] + except KeyError as e: + raise ValueError("Invalid backend: %s, expected one of %r" + % (backend, sorted(BACKENDS.keys()))) from e + backend = backend_factory(nesting_level=nesting_level) + + if (require == 'sharedmem' and + not getattr(backend, 'supports_sharedmem', False)): + raise ValueError("Backend %s does not support shared memory" + % backend) + + if (batch_size == 'auto' or isinstance(batch_size, Integral) and + batch_size > 0): + self.batch_size = batch_size + else: + raise ValueError( + "batch_size must be 'auto' or a positive integer, got: %r" + % batch_size) + + self._backend = backend + self._output = None + self._jobs = list() + self._managed_backend = False + + # This lock is used coordinate the main thread of this process with + # the async callback thread of our the pool. + self._lock = threading.RLock() + + def __enter__(self): + self._managed_backend = True + self._initialize_backend() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._terminate_backend() + self._managed_backend = False + + def _initialize_backend(self): + """Build a process or thread pool and return the number of workers""" + try: + n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self, + **self._backend_args) + if self.timeout is not None and not self._backend.supports_timeout: + warnings.warn( + 'The backend class {!r} does not support timeout. ' + "You have set 'timeout={}' in Parallel but " + "the 'timeout' parameter will not be used.".format( + self._backend.__class__.__name__, + self.timeout)) + + except FallbackToBackend as e: + # Recursively initialize the backend in case of requested fallback. + self._backend = e.backend + n_jobs = self._initialize_backend() + + return n_jobs + + def _effective_n_jobs(self): + if self._backend: + return self._backend.effective_n_jobs(self.n_jobs) + return 1 + + def _terminate_backend(self): + if self._backend is not None: + self._backend.terminate() + + def _dispatch(self, batch): + """Queue the batch for computing, with or without multiprocessing + + WARNING: this method is not thread-safe: it should be only called + indirectly via dispatch_one_batch. + + """ + # If job.get() catches an exception, it closes the queue: + if self._aborting: + return + + self.n_dispatched_tasks += len(batch) + self.n_dispatched_batches += 1 + + dispatch_timestamp = time.time() + cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self) + with self._lock: + job_idx = len(self._jobs) + job = self._backend.apply_async(batch, callback=cb) + # A job can complete so quickly than its callback is + # called before we get here, causing self._jobs to + # grow. To ensure correct results ordering, .insert is + # used (rather than .append) in the following line + self._jobs.insert(job_idx, job) + + def dispatch_next(self): + """Dispatch more data for parallel processing + + This method is meant to be called concurrently by the multiprocessing + callback. We rely on the thread-safety of dispatch_one_batch to protect + against concurrent consumption of the unprotected iterator. + + """ + if not self.dispatch_one_batch(self._original_iterator): + self._iterating = False + self._original_iterator = None + + def dispatch_one_batch(self, iterator): + """Prefetch the tasks for the next batch and dispatch them. + + The effective size of the batch is computed here. + If there are no more jobs to dispatch, return False, else return True. + + The iterator consumption and dispatching is protected by the same + lock so calling this function should be thread safe. + + """ + if self.batch_size == 'auto': + batch_size = self._backend.compute_batch_size() + else: + # Fixed batch size strategy + batch_size = self.batch_size + + with self._lock: + # to ensure an even distribution of the workolad between workers, + # we look ahead in the original iterators more than batch_size + # tasks - However, we keep consuming only one batch at each + # dispatch_one_batch call. The extra tasks are stored in a local + # queue, _ready_batches, that is looked-up prior to re-consuming + # tasks from the origal iterator. + try: + tasks = self._ready_batches.get(block=False) + except queue.Empty: + # slice the iterator n_jobs * batchsize items at a time. If the + # slice returns less than that, then the current batchsize puts + # too much weight on a subset of workers, while other may end + # up starving. So in this case, re-scale the batch size + # accordingly to distribute evenly the last items between all + # workers. + n_jobs = self._cached_effective_n_jobs + big_batch_size = batch_size * n_jobs + + islice = list(itertools.islice(iterator, big_batch_size)) + if len(islice) == 0: + return False + elif (iterator is self._original_iterator + and len(islice) < big_batch_size): + # We reached the end of the original iterator (unless + # iterator is the ``pre_dispatch``-long initial slice of + # the original iterator) -- decrease the batch size to + # account for potential variance in the batches running + # time. + final_batch_size = max(1, len(islice) // (10 * n_jobs)) + else: + final_batch_size = max(1, len(islice) // n_jobs) + + # enqueue n_jobs batches in a local queue + for i in range(0, len(islice), final_batch_size): + tasks = BatchedCalls(islice[i:i + final_batch_size], + self._backend.get_nested_backend(), + self._reducer_callback, + self._pickle_cache) + self._ready_batches.put(tasks) + + # finally, get one task. + tasks = self._ready_batches.get(block=False) + if len(tasks) == 0: + # No more tasks available in the iterator: tell caller to stop. + return False + else: + self._dispatch(tasks) + return True + + def _print(self, msg, msg_args): + """Display the message on stout or stderr depending on verbosity""" + # XXX: Not using the logger framework: need to + # learn to use logger better. + if not self.verbose: + return + if self.verbose < 50: + writer = sys.stderr.write + else: + writer = sys.stdout.write + msg = msg % msg_args + writer('[%s]: %s\n' % (self, msg)) + + def print_progress(self): + """Display the process of the parallel execution only a fraction + of time, controlled by self.verbose. + """ + if not self.verbose: + return + elapsed_time = time.time() - self._start_time + + # Original job iterator becomes None once it has been fully + # consumed : at this point we know the total number of jobs and we are + # able to display an estimation of the remaining time based on already + # completed jobs. Otherwise, we simply display the number of completed + # tasks. + if self._original_iterator is not None: + if _verbosity_filter(self.n_dispatched_batches, self.verbose): + return + self._print('Done %3i tasks | elapsed: %s', + (self.n_completed_tasks, + short_format_time(elapsed_time), )) + else: + index = self.n_completed_tasks + # We are finished dispatching + total_tasks = self.n_dispatched_tasks + # We always display the first loop + if not index == 0: + # Display depending on the number of remaining items + # A message as soon as we finish dispatching, cursor is 0 + cursor = (total_tasks - index + 1 - + self._pre_dispatch_amount) + frequency = (total_tasks // self.verbose) + 1 + is_last_item = (index + 1 == total_tasks) + if (is_last_item or cursor % frequency): + return + remaining_time = (elapsed_time / index) * \ + (self.n_dispatched_tasks - index * 1.0) + # only display status if remaining time is greater or equal to 0 + self._print('Done %3i out of %3i | elapsed: %s remaining: %s', + (index, + total_tasks, + short_format_time(elapsed_time), + short_format_time(remaining_time), + )) + + def retrieve(self): + self._output = list() + while self._iterating or len(self._jobs) > 0: + if len(self._jobs) == 0: + # Wait for an async callback to dispatch new jobs + time.sleep(0.01) + continue + # We need to be careful: the job list can be filling up as + # we empty it and Python list are not thread-safe by default hence + # the use of the lock + with self._lock: + job = self._jobs.pop(0) + + try: + if getattr(self._backend, 'supports_timeout', False): + self._output.extend(job.get(timeout=self.timeout)) + else: + self._output.extend(job.get()) + + except BaseException as exception: + # Note: we catch any BaseException instead of just Exception + # instances to also include KeyboardInterrupt. + + # Stop dispatching any new job in the async callback thread + self._aborting = True + + # If the backend allows it, cancel or kill remaining running + # tasks without waiting for the results as we will raise + # the exception we got back to the caller instead of returning + # any result. + backend = self._backend + if (backend is not None and + hasattr(backend, 'abort_everything')): + # If the backend is managed externally we need to make sure + # to leave it in a working state to allow for future jobs + # scheduling. + ensure_ready = self._managed_backend + backend.abort_everything(ensure_ready=ensure_ready) + raise + + def __call__(self, iterable): + if self._jobs: + raise ValueError('This Parallel instance is already running') + # A flag used to abort the dispatching of jobs in case an + # exception is found + self._aborting = False + + if not self._managed_backend: + n_jobs = self._initialize_backend() + else: + n_jobs = self._effective_n_jobs() + + if isinstance(self._backend, LokyBackend): + # For the loky backend, we add a callback executed when reducing + # BatchCalls, that makes the loky executor use a temporary folder + # specific to this Parallel object when pickling temporary memmaps. + # This callback is necessary to ensure that several Parallel + # objects using the same resuable executor don't use the same + # temporary resources. + + def _batched_calls_reducer_callback(): + # Relevant implementation detail: the following lines, called + # when reducing BatchedCalls, are called in a thread-safe + # situation, meaning that the context of the temporary folder + # manager will not be changed in between the callback execution + # and the end of the BatchedCalls pickling. The reason is that + # pickling (the only place where set_current_context is used) + # is done from a single thread (the queue_feeder_thread). + self._backend._workers._temp_folder_manager.set_current_context( # noqa + self._id + ) + self._reducer_callback = _batched_calls_reducer_callback + + # self._effective_n_jobs should be called in the Parallel.__call__ + # thread only -- store its value in an attribute for further queries. + self._cached_effective_n_jobs = n_jobs + + backend_name = self._backend.__class__.__name__ + if n_jobs == 0: + raise RuntimeError("%s has no active worker." % backend_name) + + self._print("Using backend %s with %d concurrent workers.", + (backend_name, n_jobs)) + if hasattr(self._backend, 'start_call'): + self._backend.start_call() + iterator = iter(iterable) + pre_dispatch = self.pre_dispatch + + if pre_dispatch == 'all' or n_jobs == 1: + # prevent further dispatch via multiprocessing callback thread + self._original_iterator = None + self._pre_dispatch_amount = 0 + else: + self._original_iterator = iterator + if hasattr(pre_dispatch, 'endswith'): + pre_dispatch = eval(pre_dispatch) + self._pre_dispatch_amount = pre_dispatch = int(pre_dispatch) + + # The main thread will consume the first pre_dispatch items and + # the remaining items will later be lazily dispatched by async + # callbacks upon task completions. + + # TODO: this iterator should be batch_size * n_jobs + iterator = itertools.islice(iterator, self._pre_dispatch_amount) + + self._start_time = time.time() + self.n_dispatched_batches = 0 + self.n_dispatched_tasks = 0 + self.n_completed_tasks = 0 + # Use a caching dict for callables that are pickled with cloudpickle to + # improve performances. This cache is used only in the case of + # functions that are defined in the __main__ module, functions that are + # defined locally (inside another function) and lambda expressions. + self._pickle_cache = dict() + try: + # Only set self._iterating to True if at least a batch + # was dispatched. In particular this covers the edge + # case of Parallel used with an exhausted iterator. If + # self._original_iterator is None, then this means either + # that pre_dispatch == "all", n_jobs == 1 or that the first batch + # was very quick and its callback already dispatched all the + # remaining jobs. + self._iterating = False + if self.dispatch_one_batch(iterator): + self._iterating = self._original_iterator is not None + + while self.dispatch_one_batch(iterator): + pass + + if pre_dispatch == "all" or n_jobs == 1: + # The iterable was consumed all at once by the above for loop. + # No need to wait for async callbacks to trigger to + # consumption. + self._iterating = False + + with self._backend.retrieval_context(): + self.retrieve() + # Make sure that we get a last message telling us we are done + elapsed_time = time.time() - self._start_time + self._print('Done %3i out of %3i | elapsed: %s finished', + (len(self._output), len(self._output), + short_format_time(elapsed_time))) + finally: + if hasattr(self._backend, 'stop_call'): + self._backend.stop_call() + if not self._managed_backend: + self._terminate_backend() + self._jobs = list() + self._pickle_cache = None + output = self._output + self._output = None + return output + + def __repr__(self): + return '%s(n_jobs=%s)' % (self.__class__.__name__, self.n_jobs) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/pool.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/pool.py new file mode 100644 index 0000000000000000000000000000000000000000..8443899339e45e3d9d971e9fc4ab656aeb5b8e64 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/pool.py @@ -0,0 +1,352 @@ +"""Custom implementation of multiprocessing.Pool with custom pickler. + +This module provides efficient ways of working with data stored in +shared memory with numpy.memmap arrays without inducing any memory +copy between the parent and child processes. + +This module should not be imported if multiprocessing is not +available as it implements subclasses of multiprocessing Pool +that uses a custom alternative to SimpleQueue. + +""" +# Author: Olivier Grisel +# Copyright: 2012, Olivier Grisel +# License: BSD 3 clause + +import copyreg +import sys +import warnings +from time import sleep + +try: + WindowsError +except NameError: + WindowsError = type(None) + +from pickle import Pickler + +from pickle import HIGHEST_PROTOCOL +from io import BytesIO + +from ._memmapping_reducer import get_memmapping_reducers +from ._memmapping_reducer import TemporaryResourcesManager +from ._multiprocessing_helpers import mp, assert_spawning + +# We need the class definition to derive from it, not the multiprocessing.Pool +# factory function +from multiprocessing.pool import Pool + +try: + import numpy as np +except ImportError: + np = None + + +############################################################################### +# Enable custom pickling in Pool queues + +class CustomizablePickler(Pickler): + """Pickler that accepts custom reducers. + + TODO python2_drop : can this be simplified ? + + HIGHEST_PROTOCOL is selected by default as this pickler is used + to pickle ephemeral datastructures for interprocess communication + hence no backward compatibility is required. + + `reducers` is expected to be a dictionary with key/values + being `(type, callable)` pairs where `callable` is a function that + give an instance of `type` will return a tuple `(constructor, + tuple_of_objects)` to rebuild an instance out of the pickled + `tuple_of_objects` as would return a `__reduce__` method. See the + standard library documentation on pickling for more details. + + """ + + # We override the pure Python pickler as its the only way to be able to + # customize the dispatch table without side effects in Python 2.7 + # to 3.2. For Python 3.3+ leverage the new dispatch_table + # feature from https://bugs.python.org/issue14166 that makes it possible + # to use the C implementation of the Pickler which is faster. + + def __init__(self, writer, reducers=None, protocol=HIGHEST_PROTOCOL): + Pickler.__init__(self, writer, protocol=protocol) + if reducers is None: + reducers = {} + if hasattr(Pickler, 'dispatch'): + # Make the dispatch registry an instance level attribute instead of + # a reference to the class dictionary under Python 2 + self.dispatch = Pickler.dispatch.copy() + else: + # Under Python 3 initialize the dispatch table with a copy of the + # default registry + self.dispatch_table = copyreg.dispatch_table.copy() + for type, reduce_func in reducers.items(): + self.register(type, reduce_func) + + def register(self, type, reduce_func): + """Attach a reducer function to a given type in the dispatch table.""" + if hasattr(Pickler, 'dispatch'): + # Python 2 pickler dispatching is not explicitly customizable. + # Let us use a closure to workaround this limitation. + def dispatcher(self, obj): + reduced = reduce_func(obj) + self.save_reduce(obj=obj, *reduced) + self.dispatch[type] = dispatcher + else: + self.dispatch_table[type] = reduce_func + + +class CustomizablePicklingQueue(object): + """Locked Pipe implementation that uses a customizable pickler. + + This class is an alternative to the multiprocessing implementation + of SimpleQueue in order to make it possible to pass custom + pickling reducers, for instance to avoid memory copy when passing + memory mapped datastructures. + + `reducers` is expected to be a dict with key / values being + `(type, callable)` pairs where `callable` is a function that, given an + instance of `type`, will return a tuple `(constructor, tuple_of_objects)` + to rebuild an instance out of the pickled `tuple_of_objects` as would + return a `__reduce__` method. + + See the standard library documentation on pickling for more details. + """ + + def __init__(self, context, reducers=None): + self._reducers = reducers + self._reader, self._writer = context.Pipe(duplex=False) + self._rlock = context.Lock() + if sys.platform == 'win32': + self._wlock = None + else: + self._wlock = context.Lock() + self._make_methods() + + def __getstate__(self): + assert_spawning(self) + return (self._reader, self._writer, self._rlock, self._wlock, + self._reducers) + + def __setstate__(self, state): + (self._reader, self._writer, self._rlock, self._wlock, + self._reducers) = state + self._make_methods() + + def empty(self): + return not self._reader.poll() + + def _make_methods(self): + self._recv = recv = self._reader.recv + racquire, rrelease = self._rlock.acquire, self._rlock.release + + def get(): + racquire() + try: + return recv() + finally: + rrelease() + + self.get = get + + if self._reducers: + def send(obj): + buffer = BytesIO() + CustomizablePickler(buffer, self._reducers).dump(obj) + self._writer.send_bytes(buffer.getvalue()) + self._send = send + else: + self._send = send = self._writer.send + if self._wlock is None: + # writes to a message oriented win32 pipe are atomic + self.put = send + else: + wlock_acquire, wlock_release = ( + self._wlock.acquire, self._wlock.release) + + def put(obj): + wlock_acquire() + try: + return send(obj) + finally: + wlock_release() + + self.put = put + + +class PicklingPool(Pool): + """Pool implementation with customizable pickling reducers. + + This is useful to control how data is shipped between processes + and makes it possible to use shared memory without useless + copies induces by the default pickling methods of the original + objects passed as arguments to dispatch. + + `forward_reducers` and `backward_reducers` are expected to be + dictionaries with key/values being `(type, callable)` pairs where + `callable` is a function that, given an instance of `type`, will return a + tuple `(constructor, tuple_of_objects)` to rebuild an instance out of the + pickled `tuple_of_objects` as would return a `__reduce__` method. + See the standard library documentation about pickling for more details. + + """ + + def __init__(self, processes=None, forward_reducers=None, + backward_reducers=None, **kwargs): + if forward_reducers is None: + forward_reducers = dict() + if backward_reducers is None: + backward_reducers = dict() + self._forward_reducers = forward_reducers + self._backward_reducers = backward_reducers + poolargs = dict(processes=processes) + poolargs.update(kwargs) + super(PicklingPool, self).__init__(**poolargs) + + def _setup_queues(self): + context = getattr(self, '_ctx', mp) + self._inqueue = CustomizablePicklingQueue(context, + self._forward_reducers) + self._outqueue = CustomizablePicklingQueue(context, + self._backward_reducers) + self._quick_put = self._inqueue._send + self._quick_get = self._outqueue._recv + + +class MemmappingPool(PicklingPool): + """Process pool that shares large arrays to avoid memory copy. + + This drop-in replacement for `multiprocessing.pool.Pool` makes + it possible to work efficiently with shared memory in a numpy + context. + + Existing instances of numpy.memmap are preserved: the child + suprocesses will have access to the same shared memory in the + original mode except for the 'w+' mode that is automatically + transformed as 'r+' to avoid zeroing the original data upon + instantiation. + + Furthermore large arrays from the parent process are automatically + dumped to a temporary folder on the filesystem such as child + processes to access their content via memmapping (file system + backed shared memory). + + Note: it is important to call the terminate method to collect + the temporary folder used by the pool. + + Parameters + ---------- + processes: int, optional + Number of worker processes running concurrently in the pool. + initializer: callable, optional + Callable executed on worker process creation. + initargs: tuple, optional + Arguments passed to the initializer callable. + temp_folder: (str, callable) optional + If str: + Folder to be used by the pool for memmapping large arrays + for sharing memory with worker processes. If None, this will try in + order: + - a folder pointed by the JOBLIB_TEMP_FOLDER environment variable, + - /dev/shm if the folder exists and is writable: this is a RAMdisk + filesystem available by default on modern Linux distributions, + - the default system temporary folder that can be overridden + with TMP, TMPDIR or TEMP environment variables, typically /tmp + under Unix operating systems. + if callable: + An callable in charge of dynamically resolving a temporary folder + for memmapping large arrays. + max_nbytes int or None, optional, 1e6 by default + Threshold on the size of arrays passed to the workers that + triggers automated memory mapping in temp_folder. + Use None to disable memmapping of large arrays. + mmap_mode: {'r+', 'r', 'w+', 'c'} + Memmapping mode for numpy arrays passed to workers. + See 'max_nbytes' parameter documentation for more details. + forward_reducers: dictionary, optional + Reducers used to pickle objects passed from master to worker + processes: see below. + backward_reducers: dictionary, optional + Reducers used to pickle return values from workers back to the + master process. + verbose: int, optional + Make it possible to monitor how the communication of numpy arrays + with the subprocess is handled (pickling or memmapping) + prewarm: bool or str, optional, "auto" by default. + If True, force a read on newly memmapped array to make sure that OS + pre-cache it in memory. This can be useful to avoid concurrent disk + access when the same data array is passed to different worker + processes. If "auto" (by default), prewarm is set to True, unless the + Linux shared memory partition /dev/shm is available and used as temp + folder. + + `forward_reducers` and `backward_reducers` are expected to be + dictionaries with key/values being `(type, callable)` pairs where + `callable` is a function that give an instance of `type` will return + a tuple `(constructor, tuple_of_objects)` to rebuild an instance out + of the pickled `tuple_of_objects` as would return a `__reduce__` + method. See the standard library documentation on pickling for more + details. + + """ + + def __init__(self, processes=None, temp_folder=None, max_nbytes=1e6, + mmap_mode='r', forward_reducers=None, backward_reducers=None, + verbose=0, context_id=None, prewarm=False, **kwargs): + + if context_id is not None: + warnings.warn('context_id is deprecated and ignored in joblib' + ' 0.9.4 and will be removed in 0.11', + DeprecationWarning) + + manager = TemporaryResourcesManager(temp_folder) + self._temp_folder_manager = manager + + # The usage of a temp_folder_resolver over a simple temp_folder is + # superfluous for multiprocessing pools, as they don't get reused, see + # get_memmapping_executor for more details. We still use it for code + # simplicity. + forward_reducers, backward_reducers = \ + get_memmapping_reducers( + temp_folder_resolver=manager.resolve_temp_folder_name, + max_nbytes=max_nbytes, mmap_mode=mmap_mode, + forward_reducers=forward_reducers, + backward_reducers=backward_reducers, verbose=verbose, + unlink_on_gc_collect=False, prewarm=prewarm) + + poolargs = dict( + processes=processes, + forward_reducers=forward_reducers, + backward_reducers=backward_reducers) + poolargs.update(kwargs) + super(MemmappingPool, self).__init__(**poolargs) + + def terminate(self): + n_retries = 10 + for i in range(n_retries): + try: + super(MemmappingPool, self).terminate() + break + except OSError as e: + if isinstance(e, WindowsError): + # Workaround occasional "[Error 5] Access is denied" issue + # when trying to terminate a process under windows. + sleep(0.1) + if i + 1 == n_retries: + warnings.warn("Failed to terminate worker processes in" + " multiprocessing pool: %r" % e) + self._temp_folder_manager._unlink_temporary_resources() + + @property + def _temp_folder(self): + # Legacy property in tests. could be removed if we refactored the + # memmapping tests. SHOULD ONLY BE USED IN TESTS! + # We cache this property because it is called late in the tests - at + # this point, all context have been unregistered, and + # resolve_temp_folder_name raises an error. + if getattr(self, '_cached_temp_folder', None) is not None: + return self._cached_temp_folder + else: + self._cached_temp_folder = self._temp_folder_manager.resolve_temp_folder_name() # noqa + return self._cached_temp_folder diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/testing.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/testing.py new file mode 100644 index 0000000000000000000000000000000000000000..28f79311c971bccd5def7b8b78156ef68a303254 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/joblib/testing.py @@ -0,0 +1,77 @@ +""" +Helper for testing. +""" + +import sys +import warnings +import os.path +import re +import subprocess +import threading + +import pytest +import _pytest + + +raises = pytest.raises +warns = pytest.warns +SkipTest = _pytest.runner.Skipped +skipif = pytest.mark.skipif +fixture = pytest.fixture +parametrize = pytest.mark.parametrize +timeout = pytest.mark.timeout +xfail = pytest.mark.xfail +param = pytest.param + + +def warnings_to_stdout(): + """ Redirect all warnings to stdout. + """ + showwarning_orig = warnings.showwarning + + def showwarning(msg, cat, fname, lno, file=None, line=0): + showwarning_orig(msg, cat, os.path.basename(fname), line, sys.stdout) + + warnings.showwarning = showwarning + # warnings.simplefilter('always') + + +def check_subprocess_call(cmd, timeout=5, stdout_regex=None, + stderr_regex=None): + """Runs a command in a subprocess with timeout in seconds. + + Also checks returncode is zero, stdout if stdout_regex is set, and + stderr if stderr_regex is set. + """ + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + def kill_process(): + warnings.warn("Timeout running {}".format(cmd)) + proc.kill() + + timer = threading.Timer(timeout, kill_process) + try: + timer.start() + stdout, stderr = proc.communicate() + stdout, stderr = stdout.decode(), stderr.decode() + if proc.returncode != 0: + message = ( + 'Non-zero return code: {}.\nStdout:\n{}\n' + 'Stderr:\n{}').format( + proc.returncode, stdout, stderr) + raise ValueError(message) + + if (stdout_regex is not None and + not re.search(stdout_regex, stdout)): + raise ValueError( + "Unexpected stdout: {!r} does not match:\n{!r}".format( + stdout_regex, stdout)) + if (stderr_regex is not None and + not re.search(stderr_regex, stderr)): + raise ValueError( + "Unexpected stderr: {!r} does not match:\n{!r}".format( + stderr_regex, stderr)) + + finally: + timer.cancel() diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ccf6e63ae43dfab338e622ffd020c8503a40f3fe --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/__init__.py @@ -0,0 +1,92 @@ +""" +Machine Learning module for NeuroImaging in python +-------------------------------------------------- + +Documentation is available in the docstrings and online at +http://nilearn.github.io. + +Contents +-------- +Nilearn aims at simplifying the use of the scikit-learn package in the context of +neuroimaging. It provides specific input/output functions, algorithms and +visualization tools. + +Submodules +--------- +datasets --- Utilities to download NeuroImaging datasets +decoding --- Decoding tools and algorithms +decomposition --- Includes a subject level variant of the ICA + algorithm called Canonical ICA +connectome --- Set of tools for computing functional connectivity matrices + and for sparse multi-subjects learning of Gaussian graphical models +image --- Set of functions defining mathematical operations + working on Niimg-like objects +maskers --- Includes scikit-learn transformers. +masking --- Utilities to compute and operate on brain masks +interfaces --- Includes tools to preprocess neuro-imaging data + from various common interfaces like fMRIPrep. +mass_univariate --- Defines a Massively Univariate Linear Model + estimated with OLS and permutation test +plotting --- Plotting code for nilearn +region --- Set of functions for extracting region-defined + signals, clustering methods, connected regions extraction +signal --- Set of preprocessing functions for time series +""" + +import gzip +import os +import sys +import pkg_resources +import warnings + +from .version import ( + _check_module_dependencies, __version__, _compare_version +) + +# Workaround issue discovered in intel-openmp 2019.5: +# https://github.com/ContinuumIO/anaconda-issues/issues/11294 +# +# see also https://github.com/scikit-learn/scikit-learn/pull/15020 +os.environ.setdefault("KMP_INIT_AT_FORK", "FALSE") + + +def _py36_deprecation_warning(): + py36_warning = ("Python 3.6 support is deprecated and will be removed in " + "release 0.10 of Nilearn. Consider switching to " + "Python 3.8 or 3.9.") + warnings.filterwarnings('once', message=py36_warning) + warnings.warn(message=py36_warning, + category=FutureWarning, + stacklevel=3) + + +def _python_deprecation_warnings(): + if sys.version_info.major == 3 and sys.version_info.minor == 6: + _py36_deprecation_warning() + + +_check_module_dependencies() +_python_deprecation_warnings() + + +# Monkey-patch gzip to have faster reads on large gzip files +if hasattr(gzip.GzipFile, 'max_read_chunk'): + gzip.GzipFile.max_read_chunk = 100 * 1024 * 1024 # 100Mb + +# Boolean controlling the default globbing technique when using check_niimg +# and the os.path.expanduser usage in CacheMixin. +# Default value it True, set it to False to completely deactivate this +# behavior. +EXPAND_PATH_WILDCARDS = True + +# Boolean controlling whether the joblib caches should be +# flushed if the version of certain modules changes (eg nibabel, as it +# does not respect the backward compatibility in some of its internal +# structures +# This is used in nilearn._utils.cache_mixin +CHECK_CACHE_VERSION = True + +# list all submodules available in nilearn and version +__all__ = ['datasets', 'decoding', 'decomposition', 'connectome', + 'image', 'maskers', 'masking', 'interfaces', 'mass_univariate', + 'plotting', 'regions', 'signal', 'surface', '__version__'] diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/conftest.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..10fd7529116b3bed1be38c440fa98a80acddee0f --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/conftest.py @@ -0,0 +1,51 @@ + +import numpy as np +import pytest + +from _pytest.doctest import DoctestItem + +# we need to import these fixtures even if not used in this module +from nilearn.datasets._testing import request_mocker # noqa: F401 +from nilearn.datasets._testing import temp_nilearn_data_dir # noqa: F401 +from nilearn.version import _compare_version + + +collect_ignore = ["datasets/data/convert_templates.py"] + + +try: + import matplotlib # noqa: F401 +except ImportError: + collect_ignore.extend(['plotting', 'reporting']) + matplotlib = None + + +def pytest_configure(config): + """Use Agg so that no figures pop up.""" + if matplotlib is not None: + matplotlib.use('Agg', force=True) + + +@pytest.fixture(autouse=True) +def close_all(): + """Close all matplotlib figures.""" + yield + if matplotlib is not None: + import matplotlib.pyplot as plt + plt.close('all') # takes < 1 us so just always do it + + +def pytest_collection_modifyitems(items): + # numpy changed the str/repr formatting of numpy arrays in 1.14. + # We want to run doctests only for numpy >= 1.14.Adapted from scikit-learn + if _compare_version(np.__version__, '<', '1.14'): + reason = 'doctests are only run for numpy >= 1.14' + skip_doctests = True + else: + skip_doctests = False + + if skip_doctests: + skip_marker = pytest.mark.skip(reason=reason) + for item in items: + if isinstance(item, DoctestItem): + item.add_marker(skip_marker) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/masking.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/masking.py new file mode 100644 index 0000000000000000000000000000000000000000..60491ac0377cfe45e4bd21bc7274311c134ddfc8 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/masking.py @@ -0,0 +1,940 @@ +""" +Utilities to compute and operate on brain masks +""" +# Authors: Gael Varoquaux, Alexandre Abraham, Philippe Gervais, Ana Luisa Pinho +# License: simplified BSD +import warnings +import numbers + +import numpy as np +from scipy import ndimage +from joblib import Parallel, delayed + +from sklearn.utils import deprecated +from . import _utils +from .image import get_data, new_img_like, resampling +from ._utils import fill_doc +from ._utils.cache_mixin import cache +from ._utils.ndimage import largest_connected_component, get_border_data +from ._utils.niimg import _safe_get_data +from .datasets import (load_mni152_template, load_mni152_gm_template, + load_mni152_wm_template) + + +class MaskWarning(UserWarning): + "A class to always raise warnings" + + +warnings.simplefilter("always", MaskWarning) + + +def _load_mask_img(mask_img, allow_empty=False): + """Check that a mask is valid, ie with two values including 0 and load it. + + Parameters + ---------- + mask_img : Niimg-like object + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + The mask to check. + + allow_empty : :obj:`bool`, optional + Allow loading an empty mask (full of 0 values). + Default=False. + + Returns + ------- + mask : :class:`numpy.ndarray` + Boolean version of the mask. + + mask_affine: None or (4,4) array-like + Affine of the mask. + """ + mask_img = _utils.check_niimg_3d(mask_img) + mask = _safe_get_data(mask_img, ensure_finite=True) + values = np.unique(mask) + + if len(values) == 1: + # We accept a single value if it is not 0 (full true mask). + if values[0] == 0 and not allow_empty: + raise ValueError( + 'The mask is invalid as it is empty: it masks all data.') + elif len(values) == 2: + # If there are 2 different values, one of them must be 0 (background) + if 0 not in values: + raise ValueError('Background of the mask must be represented with' + '0. Given mask contains: %s.' % values) + elif len(values) != 2: + # If there are more than 2 values, the mask is invalid + raise ValueError('Given mask is not made of 2 values: %s' + '. Cannot interpret as true or false' + % values) + + mask = _utils.as_ndarray(mask, dtype=bool) + return mask, mask_img.affine + + +def _extrapolate_out_mask(data, mask, iterations=1): + """Extrapolate values outside of the mask.""" + if iterations > 1: + data, mask = _extrapolate_out_mask(data, mask, + iterations=iterations - 1) + new_mask = ndimage.binary_dilation(mask) + larger_mask = np.zeros(np.array(mask.shape) + 2, dtype=bool) + larger_mask[1:-1, 1:-1, 1:-1] = mask + # Use nans as missing value: ugly + masked_data = np.zeros(larger_mask.shape + data.shape[3:]) + masked_data[1:-1, 1:-1, 1:-1] = data.copy() + masked_data[np.logical_not(larger_mask)] = np.nan + outer_shell = larger_mask.copy() + outer_shell[1:-1, 1:-1, 1:-1] = np.logical_xor(new_mask, mask) + outer_shell_x, outer_shell_y, outer_shell_z = np.where(outer_shell) + extrapolation = list() + for i, j, k in [(1, 0, 0), (-1, 0, 0), + (0, 1, 0), (0, -1, 0), + (0, 0, 1), (0, 0, -1)]: + this_x = outer_shell_x + i + this_y = outer_shell_y + j + this_z = outer_shell_z + k + extrapolation.append(masked_data[this_x, this_y, this_z]) + + extrapolation = np.array(extrapolation) + extrapolation = (np.nansum(extrapolation, axis=0) / + np.sum(np.isfinite(extrapolation), axis=0)) + extrapolation[np.logical_not(np.isfinite(extrapolation))] = 0 + new_data = np.zeros_like(masked_data) + new_data[outer_shell] = extrapolation + new_data[larger_mask] = masked_data[larger_mask] + return new_data[1:-1, 1:-1, 1:-1], new_mask + + +# +# Utilities to compute masks +# +@_utils.fill_doc +def intersect_masks(mask_imgs, threshold=0.5, connected=True): + """Compute intersection of several masks. + + Given a list of input mask images, generate the output image which + is the threshold-level intersection of the inputs. + + Parameters + ---------- + mask_imgs : :obj:`list` of Niimg-like objects + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + 3D individual masks with same shape and affine. + + threshold : :obj:`float`, optional + Gives the level of the intersection, must be within [0, 1]. + threshold=1 corresponds to keeping the intersection of all + masks, whereas threshold=0 is the union of all masks. + Default=0.5. + %(connected)s + Default=True. + + Returns + ------- + grp_mask : 3D :class:`nibabel.nifti1.Nifti1Image` + Intersection of all masks. + """ + if len(mask_imgs) == 0: + raise ValueError('No mask provided for intersection') + grp_mask = None + first_mask, ref_affine = _load_mask_img(mask_imgs[0], allow_empty=True) + ref_shape = first_mask.shape + if threshold > 1: + raise ValueError('The threshold should be smaller than 1') + if threshold < 0: + raise ValueError('The threshold should be greater than 0') + threshold = min(threshold, 1 - 1.e-7) + + for this_mask in mask_imgs: + mask, affine = _load_mask_img(this_mask, allow_empty=True) + if np.any(affine != ref_affine): + raise ValueError("All masks should have the same affine") + if np.any(mask.shape != ref_shape): + raise ValueError("All masks should have the same shape") + + if grp_mask is None: + # We use int here because there may be a lot of masks to merge + grp_mask = _utils.as_ndarray(mask, dtype=int) + else: + # If this_mask is floating point and grp_mask is integer, numpy 2 + # casting rules raise an error for in-place addition. Hence we do + # it long-hand. + # XXX should the masks be coerced to int before addition? + grp_mask += mask + + grp_mask = grp_mask > (threshold * len(list(mask_imgs))) + + if np.any(grp_mask > 0) and connected: + grp_mask = largest_connected_component(grp_mask) + grp_mask = _utils.as_ndarray(grp_mask, dtype=np.int8) + return new_img_like(_utils.check_niimg_3d(mask_imgs[0]), grp_mask, + ref_affine) + + +def _post_process_mask(mask, affine, opening=2, connected=True, + warning_msg=""): + """Helper function for mask computing functions. + + Performs opening and keep only largest connected component is + ``connected=True``. + """ + if opening: + opening = int(opening) + mask = ndimage.binary_erosion(mask, iterations=opening) + mask_any = mask.any() + if not mask_any: + warnings.warn("Computed an empty mask. %s" % warning_msg, + MaskWarning, stacklevel=2) + if connected and mask_any: + mask = largest_connected_component(mask) + if opening: + mask = ndimage.binary_dilation(mask, iterations=2 * opening) + mask = ndimage.binary_erosion(mask, iterations=opening) + return mask, affine + + +@_utils.fill_doc +def compute_epi_mask(epi_img, lower_cutoff=0.2, upper_cutoff=0.85, + connected=True, opening=2, exclude_zeros=False, + ensure_finite=True, + target_affine=None, target_shape=None, + memory=None, verbose=0,): + """Compute a brain mask from :term:`fMRI` data in 3D or + 4D :class:`numpy.ndarray`. + + This is based on an heuristic proposed by T.Nichols: + find the least dense point of the histogram, between fractions + ``lower_cutoff`` and ``upper_cutoff`` of the total image histogram. + + .. note:: + + In case of failure, it is usually advisable to + increase ``lower_cutoff``. + + Parameters + ---------- + epi_img : Niimg-like object + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + :term:`EPI` image, used to compute the mask. + 3D and 4D images are accepted. + + .. note:: + If a 3D image is given, we suggest to use the mean image. + + %(lower_cutoff)s + Default=0.2. + %(upper_cutoff)s + Default=0.85. + %(connected)s + Default=True. + %(opening)s + Default=2. + ensure_finite : :obj:`bool` + If ensure_finite is True, the non-finite values (NaNs and infs) + found in the images will be replaced by zeros + Default=True. + + exclude_zeros : :obj:`bool`, optional + Consider zeros as missing values for the computation of the + threshold. This option is useful if the images have been + resliced with a large padding of zeros. + Default=False. + %(target_affine)s + + .. note:: + This parameter is passed to :func:`nilearn.image.resample_img`. + + %(target_shape)s + + .. note:: + This parameter is passed to :func:`nilearn.image.resample_img`. + + %(memory)s + %(verbose0)s + + Returns + ------- + mask : :class:`nibabel.nifti1.Nifti1Image` + The brain mask (3D image). + """ + if verbose > 0: + print("EPI mask computation") + + # Delayed import to avoid circular imports + from .image.image import _compute_mean + mean_epi, affine = \ + cache(_compute_mean, memory)(epi_img, target_affine=target_affine, + target_shape=target_shape, + smooth=(1 if opening else False)) + + if ensure_finite: + # Get rid of memmapping + mean_epi = _utils.as_ndarray(mean_epi) + # SPM tends to put NaNs in the data outside the brain + mean_epi[np.logical_not(np.isfinite(mean_epi))] = 0 + sorted_input = np.sort(np.ravel(mean_epi)) + if exclude_zeros: + sorted_input = sorted_input[sorted_input != 0] + lower_cutoff = int(np.floor(lower_cutoff * len(sorted_input))) + upper_cutoff = min(int(np.floor(upper_cutoff * len(sorted_input))), + len(sorted_input) - 1) + + delta = sorted_input[lower_cutoff + 1:upper_cutoff + 1] \ + - sorted_input[lower_cutoff:upper_cutoff] + ia = delta.argmax() + threshold = 0.5 * (sorted_input[ia + lower_cutoff] + + sorted_input[ia + lower_cutoff + 1]) + + mask = mean_epi >= threshold + + mask, affine = _post_process_mask(mask, affine, opening=opening, + connected=connected, + warning_msg="Are you sure that input " + "data are EPI images not detrended. ") + return new_img_like(epi_img, mask, affine) + + +@_utils.fill_doc +def compute_multi_epi_mask(epi_imgs, lower_cutoff=0.2, upper_cutoff=0.85, + connected=True, opening=2, threshold=0.5, + target_affine=None, target_shape=None, + exclude_zeros=False, n_jobs=1, + memory=None, verbose=0): + """Compute a common mask for several sessions or subjects + of :term:`fMRI` data. + + Uses the mask-finding algorithms to extract masks for each session + or subject, and then keep only the main connected component of the + a given fraction of the intersection of all the masks. + + Parameters + ---------- + epi_imgs : :obj:`list` of Niimg-like objects + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + A list of arrays, each item being a subject or a session. + 3D and 4D images are accepted. + + .. note:: + + If 3D images are given, we suggest to use the mean image + of each session. + + threshold : :obj:`float`, optional + The inter-session threshold: the fraction of the + total number of sessions in for which a :term:`voxel` must be + in the mask to be kept in the common mask. + threshold=1 corresponds to keeping the intersection of all + masks, whereas threshold=0 is the union of all masks. + %(lower_cutoff)s + Default=0.2. + %(upper_cutoff)s + Default=0.85. + %(connected)s + Default=True. + exclude_zeros : :obj:`bool`, optional + Consider zeros as missing values for the computation of the + threshold. This option is useful if the images have been + resliced with a large padding of zeros. + Default=False. + %(target_affine)s + + .. note:: + This parameter is passed to :func:`nilearn.image.resample_img`. + + %(target_shape)s + + .. note:: + This parameter is passed to :func:`nilearn.image.resample_img`. + + %(memory)s + %(n_jobs)s + + Returns + ------- + mask : 3D :class:`nibabel.nifti1.Nifti1Image` + The brain mask. + """ + if len(epi_imgs) == 0: + raise TypeError('An empty object - %r - was passed instead of an ' + 'image or a list of images' % epi_imgs) + masks = Parallel(n_jobs=n_jobs, verbose=verbose)( + delayed(compute_epi_mask)(epi_img, + lower_cutoff=lower_cutoff, + upper_cutoff=upper_cutoff, + connected=connected, + opening=opening, + exclude_zeros=exclude_zeros, + target_affine=target_affine, + target_shape=target_shape, + memory=memory) + for epi_img in epi_imgs) + + mask = intersect_masks(masks, connected=connected, threshold=threshold) + return mask + + +@_utils.fill_doc +def compute_background_mask(data_imgs, border_size=2, + connected=False, opening=False, + target_affine=None, target_shape=None, + memory=None, verbose=0): + """Compute a brain mask for the images by guessing the value of the + background from the border of the image. + + Parameters + ---------- + data_imgs : Niimg-like object + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + Images used to compute the mask. 3D and 4D images are accepted. + + .. note:: + + If a 3D image is given, we suggest to use the mean image. + + %(border_size)s + Default=2. + %(connected)s + Default=False. + %(opening)s + Default=False. + %(target_affine)s + + .. note:: + This parameter is passed to :func:`nilearn.image.resample_img`. + + %(target_shape)s + + .. note:: + This parameter is passed to :func:`nilearn.image.resample_img`. + + %(memory)s + %(verbose0)s + + Returns + ------- + mask : :class:`nibabel.nifti1.Nifti1Image` + The brain mask (3D image). + """ + if verbose > 0: + print("Background mask computation") + + data_imgs = _utils.check_niimg(data_imgs) + + # Delayed import to avoid circular imports + from .image.image import _compute_mean + data, affine = cache(_compute_mean, memory)(data_imgs, + target_affine=target_affine, + target_shape=target_shape, + smooth=False) + + if np.isnan(get_border_data(data, border_size)).any(): + # We absolutely need to catter for NaNs as a background: + # SPM does that by default + mask = np.logical_not(np.isnan(data)) + else: + background = np.median(get_border_data(data, border_size)) + mask = data != background + + mask, affine = _post_process_mask(mask, affine, opening=opening, + connected=connected, + warning_msg="Are you sure that input " + "images have a homogeneous background.") + return new_img_like(data_imgs, mask, affine) + + +@_utils.fill_doc +def compute_multi_background_mask(data_imgs, border_size=2, upper_cutoff=0.85, + connected=True, opening=2, threshold=0.5, + target_affine=None, target_shape=None, + exclude_zeros=False, n_jobs=1, + memory=None, verbose=0): + """Compute a common mask for several sessions or subjects of data. + + Uses the mask-finding algorithms to extract masks for each session + or subject, and then keep only the main connected component of the + a given fraction of the intersection of all the masks. + + Parameters + ---------- + data_imgs : :obj:`list` of Niimg-like objects + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + A list of arrays, each item being a subject or a session. + 3D and 4D images are accepted. + + .. note:: + If 3D images are given, we suggest to use the mean image + of each session. + + threshold : :obj:`float`, optional + The inter-session threshold: the fraction of the + total number of session in for which a :term:`voxel` must be + in the mask to be kept in the common mask. + threshold=1 corresponds to keeping the intersection of all + masks, whereas threshold=0 is the union of all masks. + %(border_size)s + Default=2. + %(connected)s + Default=True. + %(target_affine)s + + .. note:: + This parameter is passed to :func:`nilearn.image.resample_img`. + + %(target_shape)s + + .. note:: + This parameter is passed to :func:`nilearn.image.resample_img`. + + %(memory)s + %(n_jobs)s + + Returns + ------- + mask : 3D :class:`nibabel.nifti1.Nifti1Image` + The brain mask. + """ + if len(data_imgs) == 0: + raise TypeError('An empty object - %r - was passed instead of an ' + 'image or a list of images' % data_imgs) + masks = Parallel(n_jobs=n_jobs, verbose=verbose)( + delayed(compute_background_mask)(img, + border_size=border_size, + connected=connected, + opening=opening, + target_affine=target_affine, + target_shape=target_shape, + memory=memory) + for img in data_imgs) + + mask = intersect_masks(masks, connected=connected, threshold=threshold) + return mask + + +@_utils.fill_doc +def compute_brain_mask(target_img, threshold=.5, connected=True, opening=2, + memory=None, verbose=0, mask_type='whole-brain'): + """Compute the whole-brain, grey-matter or white-matter mask. + This mask is calculated using MNI152 1mm-resolution template mask onto the + target image. + + Parameters + ---------- + target_img : Niimg-like object + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + Images used to compute the mask. 3D and 4D images are accepted. + Only the shape and affine of ``target_img`` will be used here. + + threshold : :obj:`float`, optional + The value under which the :term:`MNI` template is cut off. + Default=0.5 + %(connected)s + Default=True. + %(opening)s + Default=2. + %(memory)s + %(verbose0)s + %(mask_type)s + + .. versionadded:: 0.8.1 + + Returns + ------- + mask : :class:`nibabel.nifti1.Nifti1Image` + The whole-brain mask (3D image). + """ + if verbose > 0: + print("Template", mask_type, "mask computation") + + target_img = _utils.check_niimg(target_img) + + if mask_type == 'whole-brain': + template = load_mni152_template(resolution=1) + elif mask_type == 'gm': + template = load_mni152_gm_template(resolution=1) + elif mask_type == 'wm': + template = load_mni152_wm_template(resolution=1) + else: + raise ValueError(f"Unknown mask type {mask_type}. " + "Only 'whole-brain', 'gm' or 'wm' are accepted.") + + resampled_template = cache(resampling.resample_to_img, memory)( + template, target_img) + + mask = (get_data(resampled_template) >= threshold).astype("int8") + + warning_message = (f"{mask_type} mask is empty, " + "lower the threshold or check your input FOV") + mask, affine = _post_process_mask(mask, target_img.affine, opening=opening, + connected=connected, + warning_msg=warning_message) + + return new_img_like(target_img, mask, affine) + + +@deprecated("Function 'compute_multi_gray_matter_mask' has been renamed to " + "'compute_multi_brain_mask' and 'compute_multi_gray_matter_mask' " + "will be removed in release 0.10.0") +@_utils.fill_doc +def compute_multi_gray_matter_mask(target_imgs, threshold=.5, + connected=True, opening=2, + memory=None, verbose=0, n_jobs=1, **kwargs): + """Compute a mask corresponding to the gray matter part of the brain for + a list of images. + + The gray matter part is calculated through the resampling of MNI152 + template gray matter mask onto the target image + + Parameters + ---------- + target_imgs : :obj:`list` of Niimg-like object + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + Images used to compute the mask. 3D and 4D images are accepted. + + .. note:: + The images in this list must be of same shape and affine. + The mask is calculated with the first element of the list + for only the shape/affine of the image is used for this + masking strategy. + + threshold : :obj:`float`, optional + The value under which the :term:`MNI` template is cut off. + Default=0.5. + %(connected)s + Default=True. + %(opening)s + Default=2. + %(memory)s + %(verbose0)s + %(n_jobs)s + + .. note:: + Argument not used but kept to fit the API. + + **kwargs : optional arguments + arguments such as 'target_affine' are used in the call of other + masking strategies, which then would raise an error for this function + which does not need such arguments. + + Returns + ------- + mask : :class:`nibabel.nifti1.Nifti1Image` + The brain mask (3D image). + + See also + -------- + nilearn.masking.compute_brain_mask + """ + return compute_multi_brain_mask(target_imgs=target_imgs, + threshold=threshold, connected=connected, + opening=opening, memory=memory, + verbose=verbose, n_jobs=n_jobs, + mask_type='whole-brain', **kwargs) + + +@_utils.fill_doc +def compute_multi_brain_mask(target_imgs, threshold=.5, connected=True, + opening=2, memory=None, verbose=0, n_jobs=1, + mask_type='whole-brain', **kwargs): + """Compute the whole-brain, grey-matter or white-matter mask for a list of + images. The mask is calculated through the resampling of the corresponding + MNI152 template mask onto the target image. + + .. versionadded:: 0.8.1 + + Parameters + ---------- + target_imgs : :obj:`list` of Niimg-like object + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + Images used to compute the mask. 3D and 4D images are accepted. + + .. note:: + The images in this list must be of same shape and affine. + The mask is calculated with the first element of the list + for only the shape/affine of the image is used for this + masking strategy. + + threshold : :obj:`float`, optional + The value under which the :term:`MNI` template is cut off. + Default=0.5. + %(connected)s + Default=True. + %(opening)s + Default=2. + %(mask_type)s + %(memory)s + %(verbose0)s + %(n_jobs)s + + .. note:: + Argument not used but kept to fit the API + + **kwargs : optional arguments + Arguments such as 'target_affine' are used in the call of other + masking strategies, which then would raise an error for this function + which does not need such arguments. + + Returns + ------- + mask : :class:`nibabel.nifti1.Nifti1Image` + The brain mask (3D image). + + See also + -------- + nilearn.masking.compute_brain_mask + """ + if len(target_imgs) == 0: + raise TypeError('An empty object - %r - was passed instead of an ' + 'image or a list of images' % target_imgs) + + # Check images in the list have the same FOV without loading them in memory + imgs_generator = _utils.check_niimg(target_imgs, return_iterator=True) + for _ in imgs_generator: + pass + + mask = compute_brain_mask(target_imgs[0], threshold=threshold, + connected=connected, opening=opening, + memory=memory, verbose=verbose, + mask_type=mask_type) + return mask + + +# +# Time series extraction +# + +@fill_doc +def apply_mask(imgs, mask_img, dtype='f', + smoothing_fwhm=None, ensure_finite=True): + """Extract signals from images using specified mask. + + Read the time series from the given Niimg-like object, using the mask. + + Parameters + ----------- + imgs : :obj:`list` of 4D Niimg-like objects + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + Images to be masked. list of lists of 3D images are also accepted. + + mask_img : Niimg-like object + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + 3D mask array: True where a :term:`voxel` should be used. + + dtype: numpy dtype or 'f' + The dtype of the output, if 'f', any float output is acceptable + and if the data is stored on the disk as floats the data type + will not be changed. + %(smoothing_fwhm)s + + .. note:: + + Implies ensure_finite=True. + + ensure_finite : :obj:`bool` + If ensure_finite is True, the non-finite values (NaNs and + infs) found in the images will be replaced by zeros. + Default=True. + + Returns + -------- + session_series : :class:`numpy.ndarray` + 2D array of series with shape (image number, :term:`voxel` number) + + Notes + ----- + When using smoothing, ``ensure_finite`` is set to True, as non-finite + values would spread across the image. + """ + mask_img = _utils.check_niimg_3d(mask_img) + mask, mask_affine = _load_mask_img(mask_img) + mask_img = new_img_like(mask_img, mask, mask_affine) + return _apply_mask_fmri(imgs, mask_img, dtype=dtype, + smoothing_fwhm=smoothing_fwhm, + ensure_finite=ensure_finite) + + +def _apply_mask_fmri(imgs, mask_img, dtype='f', + smoothing_fwhm=None, ensure_finite=True): + """Same as :func:`nilearn.masking.apply_mask`. + + The only difference with :func:`nilearn.masking.apply_mask` is that + some costly checks on ``mask_img`` are not performed: ``mask_img`` is + assumed to contain only two different values (this is checked for in + :func:`nilearn.masking.apply_mask`, not in this function). + """ + mask_img = _utils.check_niimg_3d(mask_img) + mask_affine = mask_img.affine + mask_data = _utils.as_ndarray(get_data(mask_img), + dtype=bool) + + if smoothing_fwhm is not None: + ensure_finite = True + + imgs_img = _utils.check_niimg(imgs) + affine = imgs_img.affine[:3, :3] + + if not np.allclose(mask_affine, imgs_img.affine): + raise ValueError('Mask affine: \n%s\n is different from img affine:' + '\n%s' % (str(mask_affine), + str(imgs_img.affine))) + + if not mask_data.shape == imgs_img.shape[:3]: + raise ValueError('Mask shape: %s is different from img shape:%s' + % (str(mask_data.shape), str(imgs_img.shape[:3]))) + + # All the following has been optimized for C order. + # Time that may be lost in conversion here is regained multiple times + # afterward, especially if smoothing is applied. + series = _safe_get_data(imgs_img) + + if dtype == 'f': + if series.dtype.kind == 'f': + dtype = series.dtype + else: + dtype = np.float32 + series = _utils.as_ndarray(series, dtype=dtype, order="C", + copy=True) + del imgs_img # frees a lot of memory + + # Delayed import to avoid circular imports + from .image.image import _smooth_array + _smooth_array(series, affine, fwhm=smoothing_fwhm, + ensure_finite=ensure_finite, copy=False) + return series[mask_data].T + + +def _unmask_3d(X, mask, order="C"): + """Take masked data and bring them back to 3D (space only). + + Parameters + ---------- + X : :class:`numpy.ndarray` + Masked data. shape: (features,) + + mask : Niimg-like object + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + Mask. mask.ndim must be equal to 3, and dtype *must* be bool. + """ + if mask.dtype != bool: + raise TypeError("mask must be a boolean array") + if X.ndim != 1: + raise TypeError("X must be a 1-dimensional array") + n_features = mask.sum() + if X.shape[0] != n_features: + raise TypeError('X must be of shape (samples, %d).' % n_features) + + data = np.zeros( + (mask.shape[0], mask.shape[1], mask.shape[2]), + dtype=X.dtype, order=order) + data[mask] = X + return data + + +def _unmask_4d(X, mask, order="C"): + """Take masked data and bring them back to 4D. + + Parameters + ---------- + X : :class:`numpy.ndarray` + Masked data. shape: (samples, features) + + mask : :class:`numpy.ndarray` + Mask. mask.ndim must be equal to 4, and dtype *must* be bool. + + Returns + ------- + data : :class:`numpy.ndarray` + Unmasked data. + Shape: (mask.shape[0], mask.shape[1], mask.shape[2], X.shape[0]) + """ + if mask.dtype != bool: + raise TypeError("mask must be a boolean array") + if X.ndim != 2: + raise TypeError("X must be a 2-dimensional array") + n_features = mask.sum() + if X.shape[1] != n_features: + raise TypeError('X must be of shape (samples, %d).' % n_features) + + data = np.zeros(mask.shape + (X.shape[0],), dtype=X.dtype, order=order) + data[mask, :] = X.T + return data + + +def unmask(X, mask_img, order="F"): + """Take masked data and bring them back into 3D/4D. + + This function can be applied to a list of masked data. + + Parameters + ---------- + X : :class:`numpy.ndarray` (or :obj:`list` of) + Masked data. shape: (samples #, features #). + If X is one-dimensional, it is assumed that samples# == 1. + + mask_img : Niimg-like object + See https://nilearn.github.io/stable/manipulating_images/input_output.html # noqa:E501 + Must be 3-dimensional. + + Returns + ------- + data : :class:`nibabel.nifti1.Nifti1Image` + Unmasked data. Depending on the shape of X, data can have + different shapes: + + - X.ndim == 2: + Shape: (mask.shape[0], mask.shape[1], mask.shape[2], X.shape[0]) + - X.ndim == 1: + Shape: (mask.shape[0], mask.shape[1], mask.shape[2]) + """ + # Handle lists. This can be a list of other lists / arrays, or a list or + # numbers. In the latter case skip. + if isinstance(X, list) and not isinstance(X[0], numbers.Number): + ret = [] + for x in X: + ret.append(unmask(x, mask_img, order=order)) # 1-level recursion + return ret + + # The code after this block assumes that X is an ndarray; ensure this + X = np.asanyarray(X) + + mask_img = _utils.check_niimg_3d(mask_img) + mask, affine = _load_mask_img(mask_img) + + if np.ndim(X) == 2: + unmasked = _unmask_4d(X, mask, order=order) + elif np.ndim(X) == 1: + unmasked = _unmask_3d(X, mask, order=order) + else: + raise TypeError("Masked data X must be 2D or 1D array; " + "got shape: %s" % str(X.shape)) + + return new_img_like(mask_img, unmasked, affine) + + +def _unmask_from_to_3d_array(w, mask): + """Unmask an image into whole brain, with off-mask :term:`voxels` + set to 0. + + Used as a stand-alone function in low-level decoding (SpaceNet) and + clustering (ReNA) functions. + + Parameters + ---------- + w : :class:`numpy.ndarray`, shape (n_features,) + The image to be unmasked. + + mask : :class:`numpy.ndarray`, shape (nx, ny, nz) + The mask used in the unmasking operation. It is required that + ``mask.sum() == n_features``. + + Returns + ------- + out : 3D :class:`numpy.ndarray` (same shape as `mask`) + The unmasked version of `w`. + """ + if mask.sum() != len(w): + raise ValueError("Expecting mask.sum() == len(w).") + out = np.zeros(mask.shape, dtype=w.dtype) + out[mask] = w + return out diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/signal.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/signal.py new file mode 100644 index 0000000000000000000000000000000000000000..b5b4b8f6f86be90e6acc8d402d48ad8937d68d72 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/signal.py @@ -0,0 +1,870 @@ +""" +Preprocessing functions for time series. + +All functions in this module should take X matrices with samples x +features +""" +# Authors: Alexandre Abraham, Gael Varoquaux, Philippe Gervais +# License: simplified BSD + +import warnings + +import numpy as np +import pandas as pd +from scipy import linalg, signal as sp_signal +from sklearn.utils import gen_even_slices, as_float_array + +from ._utils.numpy_conversions import csv_to_array, as_ndarray +from ._utils import fill_doc + + +availiable_filters = ['butterworth', + 'cosine' + ] + + +def _standardize(signals, detrend=False, standardize='zscore'): + """Center and standardize a given signal (time is along first axis). + + Parameters + ---------- + signals : :class:`numpy.ndarray` + Timeseries to standardize. + + detrend : :obj:`bool`, optional + If detrending of timeseries is requested. + Default=False. + + standardize : {'zscore', 'psc', True, False}, optional + Strategy to standardize the signal: + + - 'zscore': The signal is z-scored. Timeseries are shifted + to zero mean and scaled to unit variance. + - 'psc': Timeseries are shifted to zero mean value and scaled + to percent signal change (as compared to original mean signal). + - True: The signal is z-scored (same as option `zscore`). + Timeseries are shifted to zero mean and scaled to unit variance. + - False: Do not standardize the data. + + Default='zscore'. + + Returns + ------- + std_signals : :class:`numpy.ndarray` + Copy of signals, standardized. + """ + if standardize not in [True, False, 'psc', 'zscore']: + raise ValueError('{} is no valid standardize strategy.' + .format(standardize)) + + if detrend: + signals = _detrend(signals, inplace=False) + else: + signals = signals.copy() + + if standardize: + if signals.shape[0] == 1: + warnings.warn('Standardization of 3D signal has been requested but ' + 'would lead to zero values. Skipping.') + return signals + + elif (standardize == 'zscore') or (standardize is True): + if not detrend: + # remove mean if not already detrended + signals = signals - signals.mean(axis=0) + + std = signals.std(axis=0) + std[std < np.finfo(np.float64).eps] = 1. # avoid numerical problems + signals /= std + + elif standardize == 'psc': + mean_signal = signals.mean(axis=0) + invalid_ix = np.absolute(mean_signal) < np.finfo(np.float64).eps + signals = (signals - mean_signal) / np.absolute(mean_signal) + signals *= 100 + + if np.any(invalid_ix): + warnings.warn('psc standardization strategy is meaningless ' + 'for features that have a mean of 0. ' + 'These time series are set to 0.') + signals[:, invalid_ix] = 0 + + return signals + + +def _mean_of_squares(signals, n_batches=20): + """Compute mean of squares for each signal. + + This function is equivalent to: + + .. code-block:: python + + var = np.copy(signals) + var **= 2 + var = var.mean(axis=0) + + but uses a lot less memory. + + Parameters + ---------- + signals : :class:`numpy.ndarray`, shape (n_samples, n_features) + Signal whose mean of squares must be computed. + + n_batches : :obj:`int`, optional + Number of batches to use in the computation. + + .. note:: + Tweaking this value can lead to variation of memory usage + and computation time. The higher the value, the lower the + memory consumption. + + Default=20. + + Returns + ------- + var : :class:`numpy.ndarray` + 1D array holding the mean of squares. + """ + # No batching for small arrays + if signals.shape[1] < 500: + n_batches = 1 + + # Fastest for C order + var = np.empty(signals.shape[1]) + for batch in gen_even_slices(signals.shape[1], n_batches): + tvar = np.copy(signals[:, batch]) + tvar **= 2 + var[batch] = tvar.mean(axis=0) + + return var + + +def _row_sum_of_squares(signals, n_batches=20): + """Compute sum of squares for each signal. + + This function is equivalent to: + + .. code-block:: python + + signals **= 2 + signals = signals.sum(axis=0) + + but uses a lot less memory. + + Parameters + ---------- + signals : :class:`numpy.ndarray`, shape (n_samples, n_features) + Signal whose sum of squares must be computed. + + n_batches : :obj:`int`, optional + Number of batches to use in the computation. + + .. note:: + Tweaking this value can lead to variation of memory usage + and computation time. The higher the value, the lower the + memory consumption. + + Default=20. + + Returns + ------- + var : :class:`numpy.ndarray` + 1D array holding the sum of squares. + """ + # No batching for small arrays + if signals.shape[1] < 500: + n_batches = 1 + + # Fastest for C order + var = np.empty(signals.shape[1]) + for batch in gen_even_slices(signals.shape[1], n_batches): + var[batch] = np.sum(signals[:, batch] ** 2, 0) + + return var + + +def _detrend(signals, inplace=False, type="linear", n_batches=10): + """Detrend columns of input array. + + Signals are supposed to be columns of `signals`. + This function is significantly faster than :func:`scipy.signal.detrend` + on this case and uses a lot less memory. + + Parameters + ---------- + signals : :class:`numpy.ndarray` + This parameter must be two-dimensional. + Signals to detrend. A signal is a column. + + inplace : :obj:`bool`, optional + Tells if the computation must be made inplace or not. + Default=False. + + type : {"linear", "constant"}, optional + Detrending type, either "linear" or "constant". + See also :func:`scipy.signal.detrend`. + Default="linear". + + n_batches : :obj:`int`, optional + Number of batches to use in the computation. + + .. note:: + Tweaking this value can lead to variation of memory usage + and computation time. The higher the value, the lower the + memory consumption. + + Returns + ------- + detrended_signals : :class:`numpy.ndarray` + Detrended signals. The shape is that of ``signals``. + + Notes + ----- + If a signal of length 1 is given, it is returned unchanged. + """ + signals = as_float_array(signals, copy=not inplace) + if signals.shape[0] == 1: + warnings.warn('Detrending of 3D signal has been requested but ' + 'would lead to zero values. Skipping.') + return signals + + signals -= np.mean(signals, axis=0) + if type == "linear": + # Keeping "signals" dtype avoids some type conversion further down, + # and can save a lot of memory if dtype is single-precision. + regressor = np.arange(signals.shape[0], dtype=signals.dtype) + regressor -= regressor.mean() + std = np.sqrt((regressor ** 2).sum()) + # avoid numerical problems + if not std < np.finfo(np.float64).eps: + regressor /= std + regressor = regressor[:, np.newaxis] + + # No batching for small arrays + if signals.shape[1] < 500: + n_batches = 1 + + # This is fastest for C order. + for batch in gen_even_slices(signals.shape[1], n_batches): + signals[:, batch] -= np.dot(regressor[:, 0], signals[:, batch] + ) * regressor + return signals + + +def _check_wn(btype, freq, nyq): + wn = freq / float(nyq) + if wn >= 1.: + # results looked unstable when the critical frequencies are + # exactly at the Nyquist frequency. See issue at SciPy + # https://github.com/scipy/scipy/issues/6265. Before, SciPy 1.0.0 ("wn + # should be btw 0 and 1"). But, after ("0 < wn < 1"). Due to unstable + # results as pointed in the issue above. Hence, we forced the + # critical frequencies to be slightly less than 1. but not 1. + wn = 1 - 10 * np.finfo(1.).eps + warnings.warn( + 'The frequency specified for the %s pass filter is ' + 'too high to be handled by a digital filter (superior to ' + 'nyquist frequency). It has been lowered to %.2f (nyquist ' + 'frequency).' % (btype, wn)) + + if wn < 0.0: # equal to 0.0 is okay + wn = np.finfo(1.).eps + warnings.warn( + 'The frequency specified for the %s pass filter is ' + 'too low to be handled by a digital filter (must be non-negative).' + ' It has been set to eps: %.5e' % (btype, wn)) + + return wn + + +@fill_doc +def butterworth(signals, sampling_rate, low_pass=None, high_pass=None, + order=5, copy=False): + """Apply a low-pass, high-pass or band-pass + `Butterworth filter `_. + + Apply a filter to remove signal below the `low` frequency and above the + `high` frequency. + + Parameters + ---------- + signals : :class:`numpy.ndarray` (1D sequence or n_samples x n_sources) + Signals to be filtered. A signal is assumed to be a column + of `signals`. + + sampling_rate : :obj:`float` + Number of samples per time unit (sample frequency). + %(low_pass)s + %(high_pass)s + order : :obj:`int`, optional + Order of the `Butterworth filter + `_. + When filtering signals, the filter has a decay to avoid ringing. + Increasing the order sharpens this decay. Be aware that very high + orders can lead to numerical instability. + Default=5. + + copy : :obj:`bool`, optional + If False, `signals` is modified inplace, and memory consumption is + lower than for ``copy=True``, though computation time is higher. + + Returns + ------- + filtered_signals : :class:`numpy.ndarray` + Signals filtered according to the given parameters. + """ + if low_pass is None and high_pass is None: + if copy: + return signals.copy() + else: + return signals + + if low_pass is not None and high_pass is not None \ + and high_pass >= low_pass: + raise ValueError( + "High pass cutoff frequency (%f) is greater or equal" + "to low pass filter frequency (%f). This case is not handled " + "by this function." + % (high_pass, low_pass)) + + nyq = sampling_rate * 0.5 + + critical_freq = [] + if high_pass is not None: + btype = 'high' + critical_freq.append(_check_wn(btype, high_pass, nyq)) + + if low_pass is not None: + btype = 'low' + critical_freq.append(_check_wn(btype, low_pass, nyq)) + + if len(critical_freq) == 2: + btype = 'band' + else: + critical_freq = critical_freq[0] + + b, a = sp_signal.butter(order, critical_freq, btype=btype, output='ba') + if signals.ndim == 1: + # 1D case + output = sp_signal.filtfilt(b, a, signals) + if copy: # filtfilt does a copy in all cases. + signals = output + else: + signals[...] = output + else: + if copy: + # No way to save memory when a copy has been requested, + # because filtfilt does out-of-place processing + signals = sp_signal.filtfilt(b, a, signals, axis=0) + else: + # Lesser memory consumption, slower. + for timeseries in signals.T: + timeseries[:] = sp_signal.filtfilt(b, a, timeseries) + + # results returned in-place + + return signals + + +@fill_doc +def high_variance_confounds(series, n_confounds=5, percentile=2., + detrend=True): + """Return confounds time series extracted from series with highest + variance. + + Parameters + ---------- + series : :class:`numpy.ndarray` + Timeseries. A timeseries is a column in the "series" array. + shape (sample number, feature number) + + n_confounds : :obj:`int`, optional + Number of confounds to return. Default=5. + + percentile : :obj:`float`, optional + Highest-variance series percentile to keep before computing the + singular value decomposition, 0. <= `percentile` <= 100. + ``series.shape[0] * percentile / 100`` must be greater + than ``n_confounds``. Default=2.0. + %(detrend)s + Default=True. + + Returns + ------- + v : :class:`numpy.ndarray` + Highest variance confounds. Shape: (samples, n_confounds) + + Notes + ----- + This method is related to what has been published in the literature + as 'CompCor' :footcite:`BEHZADI200790`. + + The implemented algorithm does the following: + + - compute sum of squares for each time series (no mean removal) + - keep a given percentile of series with highest variances (percentile) + - compute an svd of the extracted series + - return a given number (n_confounds) of series from the svd with + highest singular values. + + References + ---------- + .. footbibliography:: + + See also + -------- + nilearn.image.high_variance_confounds + """ + if detrend: + series = _detrend(series) # copy + + # Retrieve the voxels|features with highest variance + + # Compute variance without mean removal. + var = _mean_of_squares(series) + var_thr = np.nanpercentile(var, 100. - percentile) + series = series[:, var > var_thr] # extract columns (i.e. features) + # Return the singular vectors with largest singular values + # We solve the symmetric eigenvalue problem here, increasing stability + s, u = linalg.eigh(series.dot(series.T) / series.shape[0]) + ix_ = np.argsort(s)[::-1] + u = u[:, ix_[:n_confounds]].copy() + return u + + +def _ensure_float(data): + "Make sure that data is a float type" + if not data.dtype.kind == 'f': + if data.dtype.itemsize == '8': + data = data.astype(np.float64) + else: + data = data.astype(np.float32) + return data + + +@fill_doc +def clean(signals, runs=None, detrend=True, standardize='zscore', + sample_mask=None, confounds=None, standardize_confounds=True, + filter='butterworth', low_pass=None, high_pass=None, t_r=2.5, + ensure_finite=False): + """Improve :term:`SNR` on masked :term:`fMRI` signals. + + This function can do several things on the input signals, in + the following order: + + - detrend + - low- and high-pass filter + - remove confounds + - standardize + + Low-pass filtering improves specificity. + + High-pass filtering should be kept small, to keep some sensitivity. + + Filtering is only meaningful on evenly-sampled signals. + + According to :footcite:`Lindquist407676`, removal of confounds will be done + orthogonally to temporal filters (low- and/or high-pass filters), if both + are specified. + + Parameters + ---------- + signals : :class:`numpy.ndarray` + Timeseries. Must have shape (instant number, features number). + This array is not modified. + + runs : :class:`numpy.ndarray`, optional + Add a run level to the cleaning process. Each run will be + cleaned independently. Must be a 1D array of n_samples elements. + Default is None. + + confounds : :class:`numpy.ndarray`, :obj:`str`,\ + :class:`pandas.DataFrame` or :obj:`list` of + Confounds timeseries. Shape must be + (instant number, confound number), or just (instant number,) + The number of time instants in ``signals`` and ``confounds`` must be + identical (i.e. ``signals.shape[0] == confounds.shape[0]``). + If a string is provided, it is assumed to be the name of a csv file + containing signals as columns, with an optional one-line header. + If a list is provided, all confounds are removed from the input + signal, as if all were in the same array. + Default is None. + + sample_mask : None, :class:`numpy.ndarray`, :obj:`list`,\ + :obj:`tuple`, or :obj:`list` of + shape: (number of scans - number of volumes removed, ) + Masks the niimgs along time/fourth dimension to perform scrubbing + (remove volumes with high motion) and/or non-steady-state volumes. + This masking step is applied before signal cleaning. When supplying run + information, sample_mask must be a list containing sets of indexes for + each run. + + .. versionadded:: 0.8.0 + + Default is None. + %(t_r)s + Default=2.5. + filter : {'butterworth', 'cosine', False}, optional + Filtering methods: + + - 'butterworth': perform butterworth filtering. + - 'cosine': generate discrete cosine transformation drift terms. + - False: Do not perform filtering. + + Default='butterworth'. + %(low_pass)s + + .. note:: + `low_pass` is not implemented for filter='cosine'. + + %(high_pass)s + %(detrend)s + standardize : {'zscore', 'psc', False}, optional + Strategy to standardize the signal: + + - 'zscore': The signal is z-scored. Timeseries are shifted + to zero mean and scaled to unit variance. + - 'psc': Timeseries are shifted to zero mean value and scaled + to percent signal change (as compared to original mean signal). + - True: The signal is z-scored (same as option `zscore`). + Timeseries are shifted to zero mean and scaled to unit variance. + - False: Do not standardize the data. + + Default="zscore". + %(standardize_confounds)s + %(ensure_finite)s + Default=False. + + Returns + ------- + cleaned_signals : :class:`numpy.ndarray` + Input signals, cleaned. Same shape as `signals`. + + Notes + ----- + Confounds removal is based on a projection on the orthogonal + of the signal space. See :footcite:`Friston1994`. + + Orthogonalization between temporal filters and confound removal is based on + suggestions in :footcite:`Lindquist407676`. + + References + ---------- + .. footbibliography:: + + See Also + -------- + nilearn.image.clean_img + """ + # Raise warning for some parameter combinations when confounds present + if confounds is not None: + _check_signal_parameters(detrend, standardize_confounds) + + # Read confounds and signals + signals, runs, confounds = _sanitize_inputs( + signals, runs, confounds, sample_mask, ensure_finite + ) + use_filter = _check_filter_parameters(filter, low_pass, high_pass, t_r) + # Restrict the signal to the orthogonal of the confounds + if runs is not None: + signals = _process_runs(signals, runs, detrend, standardize, + confounds, low_pass, high_pass, t_r) + + # Detrend + # Detrend and filtering should apply to confounds, if confound presents + # keep filters orthogonal (according to Lindquist et al. (2018)) + if detrend: + mean_signals = signals.mean(axis=0) + signals = _standardize(signals, standardize=False, detrend=detrend) + if confounds is not None: + confounds = _standardize(confounds, standardize=False, + detrend=detrend) + if use_filter: + # check if filter parameters are satisfied and filter according to the strategy + signals, confounds = _filter_signal(signals, confounds, filter, + low_pass, high_pass, t_r) + + # Remove confounds + if confounds is not None: + confounds = _standardize(confounds, standardize=standardize_confounds, + detrend=False) + if not standardize_confounds: + # Improve numerical stability by controlling the range of + # confounds. We don't rely on _standardize as it removes any + # constant contribution to confounds. + confound_max = np.max(np.abs(confounds), axis=0) + confound_max[confound_max == 0] = 1 + confounds /= confound_max + + # Pivoting in qr decomposition was added in scipy 0.10 + Q, R, _ = linalg.qr(confounds, mode='economic', pivoting=True) + Q = Q[:, np.abs(np.diag(R)) > np.finfo(np.float64).eps * 100.] + signals -= Q.dot(Q.T).dot(signals) + + # Standardize + if detrend and (standardize == 'psc'): + # If the signal is detrended, we have to know the original mean + # signal to calculate the psc. + signals = _standardize(signals + mean_signals, standardize=standardize, + detrend=False) + else: + signals = _standardize(signals, standardize=standardize, + detrend=False) + + return signals + + +def _filter_signal(signals, confounds, filter, low_pass, high_pass, t_r): + '''Filter signal based on provided strategy.''' + if filter == 'butterworth': + signals = butterworth(signals, sampling_rate=1. / t_r, + low_pass=low_pass, high_pass=high_pass) + if confounds is not None: + # Apply low- and high-pass filters to keep filters orthogonal + # (according to Lindquist et al. (2018)) + confounds = butterworth(confounds, sampling_rate=1. / t_r, + low_pass=low_pass, high_pass=high_pass) + elif filter == 'cosine': + from .glm.first_level.design_matrix import _cosine_drift + frame_times = np.arange(signals.shape[0]) * t_r + cosine_drift = _cosine_drift(high_pass, frame_times) + if confounds is None: + confounds = cosine_drift.copy() + else: + confounds = np.hstack((confounds, cosine_drift)) + return signals, confounds + + +def _process_runs(signals, runs, detrend, standardize, confounds, + low_pass, high_pass, t_r): + """Process each run independently.""" + if len(runs) != len(signals): + raise ValueError( + ( + 'The length of the run vector (%i) ' + 'does not match the length of the signals (%i)' + ) % (len(runs), len(signals)) + ) + for run in np.unique(runs): + run_confounds = None + if confounds is not None: + run_confounds = confounds[runs == run] + signals[runs == run, :] = \ + clean(signals[runs == run], + detrend=detrend, standardize=standardize, + confounds=run_confounds, low_pass=low_pass, + high_pass=high_pass, t_r=t_r) + return signals + + +def _sanitize_inputs(signals, runs, confounds, sample_mask, ensure_finite): + """Clean up signals and confounds before processing.""" + n_time = len(signals) # original length of the signal + n_runs, runs = _sanitize_runs(n_time, runs) + confounds = _sanitize_confounds(n_time, n_runs, confounds) + sample_mask = _sanitize_sample_mask(n_time, n_runs, runs, sample_mask) + signals = _sanitize_signals(signals, ensure_finite) + + if sample_mask is None: + return signals, runs, confounds + + if confounds is not None: + confounds = confounds[sample_mask, :] + if runs is not None: + runs = runs[sample_mask] + return signals[sample_mask, :], runs, confounds + + +def _sanitize_confounds(n_time, n_runs, confounds): + """Check confounds are the correct type. When passing multiple runs, ensure the + number of runs matches the sets of confound regressors. + """ + if confounds is None: + return confounds + + if not isinstance(confounds, (list, tuple, str, np.ndarray, pd.DataFrame)): + raise TypeError( + "confounds keyword has an unhandled type: %s" % confounds.__class__ + ) + + if not isinstance(confounds, (list, tuple)): + confounds = (confounds,) + + all_confounds = [] + for confound in confounds: + confound = _sanitize_confound_dtype(n_time, confound) + all_confounds.append(confound) + confounds = np.hstack(all_confounds) + return _ensure_float(confounds) + + +def _sanitize_sample_mask(n_time, n_runs, runs, sample_mask): + """Check sample_mask is the right data type and matches the run index.""" + if sample_mask is None: + return sample_mask + if not isinstance(sample_mask, (list, tuple, np.ndarray)): + raise TypeError( + "sample_mask has an unhandled type: %s" % sample_mask.__class__ + ) + if not isinstance(sample_mask, (list, tuple)): + sample_mask = (sample_mask, ) + + if len(sample_mask) != n_runs: + raise ValueError( + "Number of sample_mask ({}) not matching " + "number of runs ({}).".format(len(sample_mask), n_runs) + ) + + if runs is None: + runs = np.zeros(n_time) + + # handle multiple runs + masks = [] + starting_index = 0 + for i, current_mask in enumerate(sample_mask): + _check_sample_mask_index(i, n_runs, runs, current_mask) + current_mask += starting_index + masks.append(current_mask) + starting_index = sum(i == runs) + sample_mask = np.hstack(masks) + return sample_mask + + +def _check_sample_mask_index(i, n_runs, runs, current_mask): + """Ensure the index in sample mask is valid.""" + len_run = sum(i == runs) + len_current_mask = len(current_mask) + # sample_mask longer than signal + if len_current_mask > len_run: + raise IndexError( + "sample_mask {} of {} is has more timepoints than the current " + "run ;sample_mask contains {} index but the run has {} " + "timepoints.".format( + (i + 1), n_runs, len_current_mask, len_run + ) + ) + # sample_mask index exceed signal timepoints + invalid_index = current_mask[current_mask > len_run] + if invalid_index.size > 0: + raise IndexError( + "sample_mask {} of {} contains invalid index {}; " + "The signal contains {} time points.".format( + (i + 1), n_runs, invalid_index, len_run + ) + ) + + +def _sanitize_runs(n_time, runs): + """Check runs are supplied in the correct format and detect the number of + unique runs. + """ + if runs is not None and len(runs) != n_time: + raise ValueError( + ( + "The length of the run vector (%i) " + "does not match the length of the signals (%i)" + ) + % (len(runs), n_time) + ) + n_runs = 1 if runs is None else len(np.unique(runs)) + return n_runs, runs + + +def _sanitize_confound_dtype(n_signal, confound): + """Check confound is the correct datatype.""" + if isinstance(confound, pd.DataFrame): + confound = confound.values + if isinstance(confound, str): + filename = confound + confound = csv_to_array(filename) + if np.isnan(confound.flat[0]): + # There may be a header + confound = csv_to_array(filename, skip_header=1) + if confound.shape[0] != n_signal: + raise ValueError( + "Confound signal has an incorrect length" + "Signal length: {0}; confound length: {1}".format( + n_signal, confound.shape[0]) + ) + elif isinstance(confound, np.ndarray): + if confound.ndim == 1: + confound = np.atleast_2d(confound).T + elif confound.ndim != 2: + raise ValueError("confound array has an incorrect number " + "of dimensions: %d" % confound.ndim) + if confound.shape[0] != n_signal: + raise ValueError( + "Confound signal has an incorrect length" + "Signal length: {0}; confound length: {1}".format( + n_signal, confound.shape[0]) + ) + + else: + raise TypeError("confound has an unhandled type: %s" + % confound.__class__) + return confound + + +def _check_filter_parameters(filter, low_pass, high_pass, t_r): + """Check all filter related parameters are set correctly.""" + if not filter: + if any(isinstance(item, float) for item in [low_pass, high_pass]): + warnings.warn( + "No filter type selected but cutoff frequency provided." + "Will not perform filtering." + ) + return False + elif filter in availiable_filters: + if filter == 'cosine' and not all(isinstance(item, float) + for item in [t_r, high_pass]): + raise ValueError( + "Repetition time (t_r) and low cutoff frequency " + "(high_pass) must be specified for cosine filtering." + "t_r='{0}', high_pass='{1}'".format(t_r, high_pass) + ) + if filter == 'butterworth': + if all(item is None for item in [low_pass, high_pass, t_r]): + # Butterworth was switched off by passing + # None to all these parameters + return False + if t_r is None: + raise ValueError("Repetition time (t_r) must be specified for " + "butterworth filtering.") + if any(isinstance(item, bool) for item in [low_pass, high_pass]): + raise TypeError( + "high/low pass must be float or None but you provided " + "high_pass='{0}', low_pass='{1}'" + .format(high_pass, low_pass) + ) + return True + else: + raise ValueError("Filter method {} not implemented.".format(filter)) + + +def _sanitize_signals(signals, ensure_finite): + """Ensure signals are in the correct state.""" + if not isinstance(ensure_finite, bool): + raise ValueError("'ensure_finite' must be boolean type True or False " + "but you provided ensure_finite={0}" + .format(ensure_finite)) + signals = signals.copy() + if not isinstance(signals, np.ndarray): + signals = as_ndarray(signals) + if ensure_finite: + mask = np.logical_not(np.isfinite(signals)) + if mask.any(): + signals[mask] = 0 + return _ensure_float(signals) + + +def _check_signal_parameters(detrend, standardize_confounds): + """Raise warning if the combination is illogical""" + if not detrend and not standardize_confounds: + warnings.warn("When confounds are provided, one must perform detrend " + "and/or standardize confounds. You provided " + "detrend={0}, standardize_confounds={1}. If confounds " + "were not standardized or demeaned before passing to " + "signal.clean signal will not be correctly " + "cleaned. ".format( + detrend, standardize_confounds) + ) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/version.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/version.py new file mode 100644 index 0000000000000000000000000000000000000000..66cf1163c23c932bd7763497a8eb015eab97b686 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/nilearn/version.py @@ -0,0 +1,183 @@ +# *- encoding: utf-8 -*- +""" +nilearn version, required package versions, and utilities for checking +""" +# Author: Loic Esteve, Ben Cipollini +# License: simplified BSD + +# PEP0440 compatible formatted version, see: +# https://www.python.org/dev/peps/pep-0440/ +# +# Generic release markers: +# X.Y +# X.Y.Z # For bugfix releases +# +# Admissible pre-release markers: +# X.YaN # Alpha release +# X.YbN # Beta release +# X.YrcN # Release Candidate +# X.Y # Final release +# +# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. +# 'X.Y.dev0' is the canonical version of 'X.Y.dev' +# +__version__ = '0.9.1' + +_NILEARN_INSTALL_MSG = 'See %s for installation information.' % ( + 'http://nilearn.github.io/introduction.html#installation') + +import operator + +# This is a tuple to preserve order, so that dependencies are checked +# in some meaningful order (more => less 'core'). +REQUIRED_MODULE_METADATA = ( + ('numpy', { + 'min_version': '1.16', + 'required_at_installation': True, + 'install_info': _NILEARN_INSTALL_MSG}), + ('scipy', { + 'min_version': '1.2', + 'required_at_installation': True, + 'install_info': _NILEARN_INSTALL_MSG}), + ('sklearn', { + 'min_version': '0.21', + 'required_at_installation': True, + 'install_info': _NILEARN_INSTALL_MSG}), + ('joblib', { + 'min_version': '0.12', + 'required_at_installation': True, + 'install_info': _NILEARN_INSTALL_MSG}), + ('nibabel', { + 'min_version': '2.5', + 'required_at_installation': False}), + ('pandas', { + 'min_version': '0.24.0', + 'required_at_installation': True, + 'install_info': _NILEARN_INSTALL_MSG}), + ("requests", { + "min_version": "2", + "required_at_installation": False + }) +) + +OPTIONAL_MATPLOTLIB_MIN_VERSION = '2.0' + + +def _import_module_with_version_check( + module_name, + minimum_version, + install_info=None): + """Check that module is installed with a recent enough version.""" + try: + module = __import__(module_name) + except ImportError as exc: + user_friendly_info = ('Module "{0}" could not be found. {1}').format( + module_name, + install_info or 'Please install it properly to use nilearn.') + exc.args += (user_friendly_info,) + # Necessary for Python 3 because the repr/str of ImportError + # objects was changed in Python 3 + if hasattr(exc, 'msg'): + exc.msg += '. ' + user_friendly_info + raise + + # Avoid choking on modules with no __version__ attribute + module_version = getattr(module, '__version__', '0.0.0') + + version_too_old = ( + not _compare_version(module_version, '>=', minimum_version) + ) + + if version_too_old: + message = ( + 'A {module_name} version of at least {minimum_version} ' + 'is required to use nilearn. {module_version} was found. ' + 'Please upgrade {module_name}').format( + module_name=module_name, + minimum_version=minimum_version, + module_version=module_version) + + raise ImportError(message) + + return module + + +VERSION_OPERATORS = { + "==": operator.eq, + "!=": operator.ne, + ">": operator.gt, + ">=": operator.ge, + "<": operator.lt, + "<=": operator.le, +} + + +def _compare_version(version_a, operator, version_b): + """Compare two version strings via a user-specified operator. + + ``distutils`` has been deprecated since Python 3.10 and is scheduled + for removal from the standard library with the release of Python 3.12. + For version comparisons, we use setuptools's `parse_version` if available. + + Note: This function is inspired from MNE-Python. + See https://github.com/mne-tools/mne-python/blob/main/mne/fixes.py + + Parameters + ---------- + version_a : :obj:`str` + First version string. + + operator : {'==', '!=','>', '<', '>=', '<='} + Operator to compare ``version_a`` and ``version_b`` in the form of + ``version_a operator version_b``. + + version_b : :obj:`str` + Second version string. + + Returns + ------- + result : :obj:`bool` + The result of the version comparison. + + """ + # TODO: + # The setuptools doc encourages the use of importlib.metadata instead + # of pkg_resources. However, importlib.metadata is only part of the stdlib + # for Python >= 3.8. When Nilearn will only support Python >= 3.8, + # please consider changing the following line to: + # from importlib.metadata import version as parse + try: + from pkg_resources import parse_version as parse # noqa:F401 + except ImportError: + from distutils.version import LooseVersion as parse # noqa:F401 + if operator not in VERSION_OPERATORS: + raise ValueError( + "'_compare_version' received an unexpected " + "operator {0}.".format(operator) + ) + return VERSION_OPERATORS[operator](parse(version_a), parse(version_b)) + + +def _check_module_dependencies(is_nilearn_installing=False): + """Throw an exception if nilearn dependencies are not installed. + + Parameters + ---------- + is_nilearn_installing: boolean + if True, only error on missing packages that cannot be auto-installed. + if False, error on any missing package. + + Throws + ------- + ImportError + """ + + for (module_name, module_metadata) in REQUIRED_MODULE_METADATA: + if not (is_nilearn_installing and + not module_metadata['required_at_installation']): + # Skip check only when installing and it's a module that + # will be auto-installed. + _import_module_with_version_check( + module_name=module_name, + minimum_version=module_metadata['min_version'], + install_info=module_metadata.get('install_info')) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/INSTALLER b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/LICENSE b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..02c61aad24e92db5eedf41d71bd61dced3122212 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2020-2021, Jiri Borovec +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/METADATA b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..aa0a352ca7b7fc687933633060a9a93dc52bd3a5 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/METADATA @@ -0,0 +1,335 @@ +Metadata-Version: 2.1 +Name: pyDeprecate +Version: 0.3.1 +Summary: Deprecation tooling +Home-page: https://borda.github.io/pyDeprecate +Author: Jiri Borovec +Author-email: jiri.borovec@fel.cvut.cz +License: MIT +Project-URL: Source Code, https://github.com/Borda/pyDeprecate +Keywords: python,development,deprecation +Platform: UNKNOWN +Classifier: Environment :: Console +Classifier: Natural Language :: English +Classifier: Development Status :: 3 - Alpha +Classifier: Intended Audience :: Developers +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Requires-Python: >=3.6 +Description-Content-Type: text/markdown +License-File: LICENSE + +# pyDeprecate + +**Simple tooling for marking deprecated functions or classes and re-routing to the new successors' instance.** + +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pyDeprecate)](https://pypi.org/project/pyDeprecate/) +[![PyPI Status](https://badge.fury.io/py/pyDeprecate.svg)](https://badge.fury.io/py/pyDeprecate) +[![PyPI Status](https://pepy.tech/badge/pyDeprecate)](https://pepy.tech/project/pyDeprecate) +[![Conda](https://img.shields.io/conda/v/conda-forge/pyDeprecate?label=conda&color=success)](https://anaconda.org/conda-forge/pyDeprecate) +![Conda](https://img.shields.io/conda/dn/conda-forge/pyDeprecate) +[![license](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/Borda/pyDeprecate/blob/master/LICENSE) + +[![CI testing](https://github.com/Borda/pyDeprecate/actions/workflows/ci_testing.yml/badge.svg?tag=0.3.1)](https://github.com/Borda/pyDeprecate/actions/workflows/ci_testing.yml) +[![Code formatting](https://github.com/Borda/pyDeprecate/actions/workflows/code-format.yml/badge.svg?tag=0.3.1)](https://github.com/Borda/pyDeprecate/actions/workflows/code-format.yml) +[![codecov](https://codecov.io/gh/Borda/pyDeprecate/release/0.3.1/graph/badge.svg?token=BG7RQ86UJA)](https://codecov.io/gh/Borda/pyDeprecate) +[![CodeFactor](https://www.codefactor.io/repository/github/borda/pydeprecate/badge)](https://www.codefactor.io/repository/github/borda/pydeprecate) +[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/Borda/pyDeprecate/main.svg)](https://results.pre-commit.ci/latest/github/Borda/pyDeprecate/main) + + + +--- + +The common use-case is moving your functions across codebase or outsourcing some functionalities to new packages. +For most of these cases, you want to hold some compatibility, so you cannot simply remove past function, and also for some time you want to warn users that functionality they have been using is moved and not it is deprecated in favor of another function (which shall be used instead) and soon it will be removed completely. + +Another good aspect is to do not overwhelm a user with too many warnings, so per function/class, this warning is raised only N times in the preferable stream (warning, logger, etc.). + +## Installation + +Simple installation from PyPI: +```bash +pip install pyDeprecate +``` + +
+ Other installations + + Simply install with pip from source: + ```bash + pip install https://github.com/Borda/pyDeprecate/archive/main.zip + ``` + +
+ +## Use-cases + +The functionality is kept simple and all default shall be reasonable, but still you can do extra customization such as: + +* define user warning message and preferable stream +* extended argument mapping to target function/method +* define deprecation logic for self arguments +* specify warning count per: + - called function (for func deprecation) + - used arguments (for argument deprecation) +* define conditional skip (e.g. depending on some package version) + +In particular the target values (cases): + +- _None_ - raise only warning message (ignore all argument mapping) +- _True_ - deprecation some argument of itself (argument mapping shall be specified) +- _Callable_ - forward call to new methods (optional also argument mapping or extras) + +### Simple function forwarding + +It is very straight forward, you forward your function call to new function and all arguments are mapped: + +```python +def base_sum(a: int = 0, b: int = 3) -> int: + """My new function anywhere in codebase or even other package.""" + return a + b + +# --------------------------- + +from deprecate import deprecated + +@deprecated(target=base_sum, deprecated_in="0.1", remove_in="0.5") +def depr_sum(a: int, b: int = 5) -> int: + """ + My deprecated function which now has empty body + as all calls are routed to the new function. + """ + pass # or you can just place docstring as one above + +# call this function will raise deprecation warning: +# The `depr_sum` was deprecated since v0.1 in favor of `__main__.base_sum`. +# It will be removed in v0.5. +print(depr_sum(1, 2)) +``` +
+ sample output: + ``` + 3 + ``` +
+ +### Advanced target argument mapping + +Another more complex example is using argument mapping is: + + +
+ Advanced example + + ```python + import logging + from sklearn.metrics import accuracy_score + from deprecate import deprecated, void + + @deprecated( + # use standard sklearn accuracy implementation + target=accuracy_score, + # custom warning stream + stream=logging.warning, + # number or warnings per lifetime (with -1 for always_ + num_warns=5, + # custom message template + template_mgs="`%(source_name)s` was deprecated, use `%(target_path)s`", + # as target args are different, define mapping from source to target func + args_mapping={'preds': 'y_pred', 'target': 'y_true', 'blabla': None} + ) + def depr_accuracy(preds: list, target: list, blabla: float) -> float: + """My deprecated function which is mapping to sklearn accuracy.""" + # to stop complain your IDE about unused argument you can use void/empty function + return void(preds, target, blabla) + + # call this function will raise deprecation warning: + # WARNING:root:`depr_accuracy` was deprecated, use `sklearn.metrics.accuracy_score` + print(depr_accuracy([1, 0, 1, 2], [0, 1, 1, 2], 1.23)) + ``` + sample output: + ``` + 0.5 + ``` + +
+ + +### Deprecation warning only + +Base use-case with no forwarding and just raising warning : + +```python +from deprecate import deprecated + +@deprecated(target=None, deprecated_in="0.1", remove_in="0.5") +def my_sum(a: int, b: int = 5) -> int: + """My deprecated function which still has to have implementation.""" + return a + b + +# call this function will raise deprecation warning: +# The `my_sum` was deprecated since v0.1. It will be removed in v0.5. +print(my_sum(1, 2)) +``` +
+ sample output: + ``` + 3 + ``` +
+ +### Self argument mapping + +We also support deprecation and argument mapping for the function itself: + +```python +from deprecate import deprecated + +@deprecated( + # define as depreaction some self argument - mapping + target=True, args_mapping={'coef': 'new_coef'}, + # common version info + deprecated_in="0.2", remove_in="0.4", +) +def any_pow(base: float, coef: float = 0, new_coef: float = 0) -> float: + """My function with deprecated argument `coef` mapped to `new_coef`.""" + return base ** new_coef + +# call this function will raise deprecation warning: +# The `any_pow` uses deprecated arguments: `coef` -> `new_coef`. +# They were deprecated since v0.2 and will be removed in v0.4. +print(any_pow(2, 3)) +``` +
+ sample output: + ``` + 8 + ``` +
+ +### Multiple deprecation levels + +Eventually you can set multiple deprecation levels via chaining deprecation arguments as each could be deprecated in another version: + +
+ Multiple deprecation levels + + ```python + from deprecate import deprecated + + @deprecated( + True, "0.3", "0.6", args_mapping=dict(c1='nc1'), + template_mgs="Depr: v%(deprecated_in)s rm v%(remove_in)s for args: %(argument_map)s." + ) + @deprecated( + True, "0.4", "0.7", args_mapping=dict(nc1='nc2'), + template_mgs="Depr: v%(deprecated_in)s rm v%(remove_in)s for args: %(argument_map)s." + ) + def any_pow(base, c1: float = 0, nc1: float = 0, nc2: float = 2) -> float: + return base**nc2 + + # call this function will raise deprecation warning: + # DeprecationWarning('Depr: v0.3 rm v0.6 for args: `c1` -> `nc1`.') + # DeprecationWarning('Depr: v0.4 rm v0.7 for args: `nc1` -> `nc2`.') + print(any_pow(2, 3)) + ``` + sample output: + ``` + 8 + ``` + +
+ +### Conditional skip + +Conditional skip of which can be used for mapping between different target functions depending on additional input such as package version + +```python +from deprecate import deprecated + +FAKE_VERSION = 1 + +def version_greater_1(): + return FAKE_VERSION > 1 + +@deprecated( + True, "0.3", "0.6", args_mapping=dict(c1='nc1'), skip_if=version_greater_1 +) +def skip_pow(base, c1: float = 1, nc1: float = 1) -> float: + return base**(c1 - nc1) + +# call this function will raise deprecation warning +print(skip_pow(2, 3)) + +# change the fake versions +FAKE_VERSION = 2 + +# Will not raise any warning +print(skip_pow(2, 3)) +``` +
+ sample output: + ``` + 0.25 + 4 + ``` +
+ +This can be beneficial with multiple deprecation levels shown above... + +### Class deprecation + +This case can be quite complex as you may deprecate just some methods, here we show full class deprecation: + +```python +class NewCls: + """My new class anywhere in the codebase or other package.""" + + def __init__(self, c: float, d: str = "abc"): + self.my_c = c + self.my_d = d + +# --------------------------- + +from deprecate import deprecated, void + +class PastCls(NewCls): + """ + The deprecated class shall be inherited from the successor class + to hold all methods. + """ + + @deprecated(target=NewCls, deprecated_in="0.2", remove_in="0.4") + def __init__(self, c: int, d: str = "efg"): + """ + You place the decorator around __init__ as you want + to warn user just at the time of creating object. + """ + return void(c, d) + +# call this function will raise deprecation warning: +# The `PastCls` was deprecated since v0.2 in favor of `__main__.NewCls`. +# It will be removed in v0.4. +inst = PastCls(7) +print(inst.my_c) # returns: 7 +print(inst.my_d) # returns: "efg" +``` +
+ sample output: + ``` + 7 + efg + ``` +
+ +## Contribution + +Have you faced this in past or even now, do you have good ideas for improvement, all is welcome! + + diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/RECORD b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..3106b144fd4fc5edf2e3e1543fbe2f4643c7731a --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/RECORD @@ -0,0 +1,13 @@ +deprecate/__init__.py,sha256=37Y-nppyEVah9PsRW7D7MpOK3tcD0De-skCst_ajOdo,546 +deprecate/__pycache__/__init__.cpython-38.pyc,, +deprecate/__pycache__/deprecation.cpython-38.pyc,, +deprecate/__pycache__/utils.cpython-38.pyc,, +deprecate/deprecation.py,sha256=FtZNg7SjB-ZTP2cs6NChzR6joIvfB8VWUWNdvDDicTk,12622 +deprecate/utils.py,sha256=5nyZjgm8rxgpaQUwLWTebTt4cE9HE4nx2HpQDkIFmTU,1869 +pyDeprecate-0.3.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +pyDeprecate-0.3.1.dist-info/LICENSE,sha256=5Ekt3qiROL3RpcpWxvJ20J4roFW1qjyfBndPq1rDmEw,1488 +pyDeprecate-0.3.1.dist-info/METADATA,sha256=ET6d0pHoG_SezgJi1RLzqZDoXi8QLOvnvBBk_bjzo1U,10837 +pyDeprecate-0.3.1.dist-info/RECORD,, +pyDeprecate-0.3.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pyDeprecate-0.3.1.dist-info/WHEEL,sha256=OqRkF0eY5GHssMorFjlbTIq072vpHpF60fIQA6lS9xA,92 +pyDeprecate-0.3.1.dist-info/top_level.txt,sha256=JoS3qR1D0-NtCnKrbT4roM65b8LE7ETaLj3AOdlTU1E,10 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/REQUESTED b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/WHEEL b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..385faab0525ccdbfd1070a8bebcca3ac8617236e --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.36.2) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/top_level.txt b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..3ba432ae78bb35d213f3ea3a5a07079487fd66ae --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/pyDeprecate-0.3.1.dist-info/top_level.txt @@ -0,0 +1 @@ +deprecate diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eb06564ab033a2b0b501f7f41efb169dacd1f801 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/__init__.py @@ -0,0 +1,3 @@ +from .regex import * +from . import regex +__all__ = regex.__all__ diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/_regex_core.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/_regex_core.py new file mode 100644 index 0000000000000000000000000000000000000000..0c77984a0cec8c5bc803a06595af3fd09037a298 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/_regex_core.py @@ -0,0 +1,4498 @@ +# +# Secret Labs' Regular Expression Engine core module +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# This version of the SRE library can be redistributed under CNRI's +# Python 1.6 license. For any other use, please contact Secret Labs +# AB (info@pythonware.com). +# +# Portions of this engine have been developed in cooperation with +# CNRI. Hewlett-Packard provided funding for 1.6 integration and +# other compatibility work. +# +# 2010-01-16 mrab Python front-end re-written and extended + +import enum +import string +import unicodedata +from collections import defaultdict + +import regex._regex as _regex + +__all__ = ["A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH", + "F", "FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "P", + "POSIX", "R", "REVERSE", "S", "DOTALL", "T", "TEMPLATE", "U", "UNICODE", + "V0", "VERSION0", "V1", "VERSION1", "W", "WORD", "X", "VERBOSE", "error", + "Scanner", "RegexFlag"] + +# The regex exception. +class error(Exception): + """Exception raised for invalid regular expressions. + + Attributes: + + msg: The unformatted error message + pattern: The regular expression pattern + pos: The position in the pattern where compilation failed, or None + lineno: The line number where compilation failed, unless pos is None + colno: The column number where compilation failed, unless pos is None + """ + + def __init__(self, message, pattern=None, pos=None): + newline = '\n' if isinstance(pattern, str) else b'\n' + self.msg = message + self.pattern = pattern + self.pos = pos + if pattern is not None and pos is not None: + self.lineno = pattern.count(newline, 0, pos) + 1 + self.colno = pos - pattern.rfind(newline, 0, pos) + + message = "{} at position {}".format(message, pos) + + if newline in pattern: + message += " (line {}, column {})".format(self.lineno, + self.colno) + + Exception.__init__(self, message) + +# The exception for when a positional flag has been turned on in the old +# behaviour. +class _UnscopedFlagSet(Exception): + pass + +# The exception for when parsing fails and we want to try something else. +class ParseError(Exception): + pass + +# The exception for when there isn't a valid first set. +class _FirstSetError(Exception): + pass + +# Flags. +class RegexFlag(enum.IntFlag): + A = ASCII = 0x80 # Assume ASCII locale. + B = BESTMATCH = 0x1000 # Best fuzzy match. + D = DEBUG = 0x200 # Print parsed pattern. + E = ENHANCEMATCH = 0x8000 # Attempt to improve the fit after finding the first + # fuzzy match. + F = FULLCASE = 0x4000 # Unicode full case-folding. + I = IGNORECASE = 0x2 # Ignore case. + L = LOCALE = 0x4 # Assume current 8-bit locale. + M = MULTILINE = 0x8 # Make anchors look for newline. + P = POSIX = 0x10000 # POSIX-style matching (leftmost longest). + R = REVERSE = 0x400 # Search backwards. + S = DOTALL = 0x10 # Make dot match newline. + U = UNICODE = 0x20 # Assume Unicode locale. + V0 = VERSION0 = 0x2000 # Old legacy behaviour. + V1 = VERSION1 = 0x100 # New enhanced behaviour. + W = WORD = 0x800 # Default Unicode word breaks. + X = VERBOSE = 0x40 # Ignore whitespace and comments. + T = TEMPLATE = 0x1 # Template (present because re module has it). + + def __repr__(self): + if self._name_ is not None: + return 'regex.%s' % self._name_ + + value = self._value_ + members = [] + negative = value < 0 + + if negative: + value = ~value + + for m in self.__class__: + if value & m._value_: + value &= ~m._value_ + members.append('regex.%s' % m._name_) + + if value: + members.append(hex(value)) + + res = '|'.join(members) + + if negative: + if len(members) > 1: + res = '~(%s)' % res + else: + res = '~%s' % res + + return res + + __str__ = object.__str__ + +globals().update(RegexFlag.__members__) + +DEFAULT_VERSION = VERSION1 + +_ALL_VERSIONS = VERSION0 | VERSION1 +_ALL_ENCODINGS = ASCII | LOCALE | UNICODE + +# The default flags for the various versions. +DEFAULT_FLAGS = {VERSION0: 0, VERSION1: FULLCASE} + +# The mask for the flags. +GLOBAL_FLAGS = (_ALL_VERSIONS | BESTMATCH | DEBUG | ENHANCEMATCH | POSIX | + REVERSE) +SCOPED_FLAGS = (FULLCASE | IGNORECASE | MULTILINE | DOTALL | WORD | VERBOSE | + _ALL_ENCODINGS) + +ALPHA = frozenset(string.ascii_letters) +DIGITS = frozenset(string.digits) +ALNUM = ALPHA | DIGITS +OCT_DIGITS = frozenset(string.octdigits) +HEX_DIGITS = frozenset(string.hexdigits) +SPECIAL_CHARS = frozenset("()|?*+{^$.[\\#") | frozenset([""]) +NAMED_CHAR_PART = ALNUM | frozenset(" -") +PROPERTY_NAME_PART = ALNUM | frozenset(" &_-.") +SET_OPS = ("||", "~~", "&&", "--") + +# The width of the code words inside the regex engine. +BYTES_PER_CODE = _regex.get_code_size() +BITS_PER_CODE = BYTES_PER_CODE * 8 + +# The repeat count which represents infinity. +UNLIMITED = (1 << BITS_PER_CODE) - 1 + +# The regular expression flags. +REGEX_FLAGS = {"a": ASCII, "b": BESTMATCH, "e": ENHANCEMATCH, "f": FULLCASE, + "i": IGNORECASE, "L": LOCALE, "m": MULTILINE, "p": POSIX, "r": REVERSE, + "s": DOTALL, "u": UNICODE, "V0": VERSION0, "V1": VERSION1, "w": WORD, "x": + VERBOSE} + +# The case flags. +CASE_FLAGS = FULLCASE | IGNORECASE +NOCASE = 0 +FULLIGNORECASE = FULLCASE | IGNORECASE + +FULL_CASE_FOLDING = UNICODE | FULLIGNORECASE + +CASE_FLAGS_COMBINATIONS = {0: 0, FULLCASE: 0, IGNORECASE: IGNORECASE, + FULLIGNORECASE: FULLIGNORECASE} + +# The number of digits in hexadecimal escapes. +HEX_ESCAPES = {"x": 2, "u": 4, "U": 8} + +# The names of the opcodes. +OPCODES = """ +FAILURE +SUCCESS +ANY +ANY_ALL +ANY_ALL_REV +ANY_REV +ANY_U +ANY_U_REV +ATOMIC +BOUNDARY +BRANCH +CALL_REF +CHARACTER +CHARACTER_IGN +CHARACTER_IGN_REV +CHARACTER_REV +CONDITIONAL +DEFAULT_BOUNDARY +DEFAULT_END_OF_WORD +DEFAULT_START_OF_WORD +END +END_OF_LINE +END_OF_LINE_U +END_OF_STRING +END_OF_STRING_LINE +END_OF_STRING_LINE_U +END_OF_WORD +FUZZY +GRAPHEME_BOUNDARY +GREEDY_REPEAT +GROUP +GROUP_CALL +GROUP_EXISTS +KEEP +LAZY_REPEAT +LOOKAROUND +NEXT +PROPERTY +PROPERTY_IGN +PROPERTY_IGN_REV +PROPERTY_REV +PRUNE +RANGE +RANGE_IGN +RANGE_IGN_REV +RANGE_REV +REF_GROUP +REF_GROUP_FLD +REF_GROUP_FLD_REV +REF_GROUP_IGN +REF_GROUP_IGN_REV +REF_GROUP_REV +SEARCH_ANCHOR +SET_DIFF +SET_DIFF_IGN +SET_DIFF_IGN_REV +SET_DIFF_REV +SET_INTER +SET_INTER_IGN +SET_INTER_IGN_REV +SET_INTER_REV +SET_SYM_DIFF +SET_SYM_DIFF_IGN +SET_SYM_DIFF_IGN_REV +SET_SYM_DIFF_REV +SET_UNION +SET_UNION_IGN +SET_UNION_IGN_REV +SET_UNION_REV +SKIP +START_OF_LINE +START_OF_LINE_U +START_OF_STRING +START_OF_WORD +STRING +STRING_FLD +STRING_FLD_REV +STRING_IGN +STRING_IGN_REV +STRING_REV +FUZZY_EXT +""" + +# Define the opcodes in a namespace. +class Namespace: + pass + +OP = Namespace() +for i, op in enumerate(OPCODES.split()): + setattr(OP, op, i) + +def _shrink_cache(cache_dict, args_dict, locale_sensitive, max_length, divisor=5): + """Make room in the given cache. + + Args: + cache_dict: The cache dictionary to modify. + args_dict: The dictionary of named list args used by patterns. + max_length: Maximum # of entries in cache_dict before it is shrunk. + divisor: Cache will shrink to max_length - 1/divisor*max_length items. + """ + # Toss out a fraction of the entries at random to make room for new ones. + # A random algorithm was chosen as opposed to simply cache_dict.popitem() + # as popitem could penalize the same regular expression repeatedly based + # on its internal hash value. Being random should spread the cache miss + # love around. + cache_keys = tuple(cache_dict.keys()) + overage = len(cache_keys) - max_length + if overage < 0: + # Cache is already within limits. Normally this should not happen + # but it could due to multithreading. + return + + number_to_toss = max_length // divisor + overage + + # The import is done here to avoid a circular dependency. + import random + if not hasattr(random, 'sample'): + # Do nothing while resolving the circular dependency: + # re->random->warnings->tokenize->string->re + return + + for doomed_key in random.sample(cache_keys, number_to_toss): + try: + del cache_dict[doomed_key] + except KeyError: + # Ignore problems if the cache changed from another thread. + pass + + # Rebuild the arguments and locale-sensitivity dictionaries. + args_dict.clear() + sensitivity_dict = {} + for pattern, pattern_type, flags, args, default_version, locale in tuple(cache_dict): + args_dict[pattern, pattern_type, flags, default_version, locale] = args + try: + sensitivity_dict[pattern_type, pattern] = locale_sensitive[pattern_type, pattern] + except KeyError: + pass + + locale_sensitive.clear() + locale_sensitive.update(sensitivity_dict) + +def _fold_case(info, string): + "Folds the case of a string." + flags = info.flags + if (flags & _ALL_ENCODINGS) == 0: + flags |= info.guess_encoding + + return _regex.fold_case(flags, string) + +def is_cased_i(info, char): + "Checks whether a character is cased." + return len(_regex.get_all_cases(info.flags, char)) > 1 + +def is_cased_f(flags, char): + "Checks whether a character is cased." + return len(_regex.get_all_cases(flags, char)) > 1 + +def _compile_firstset(info, fs): + "Compiles the firstset for the pattern." + reverse = bool(info.flags & REVERSE) + fs = _check_firstset(info, reverse, fs) + if not fs: + return [] + + # Compile the firstset. + return fs.compile(reverse) + +def _check_firstset(info, reverse, fs): + "Checks the firstset for the pattern." + if not fs or None in fs: + return None + + # If we ignore the case, for simplicity we won't build a firstset. + members = set() + case_flags = NOCASE + for i in fs: + if isinstance(i, Character) and not i.positive: + return None + +# if i.case_flags: +# if isinstance(i, Character): +# if is_cased_i(info, i.value): +# return [] +# elif isinstance(i, SetBase): +# return [] + case_flags |= i.case_flags + members.add(i.with_flags(case_flags=NOCASE)) + + if case_flags == (FULLCASE | IGNORECASE): + return None + + # Build the firstset. + fs = SetUnion(info, list(members), case_flags=case_flags & ~FULLCASE, + zerowidth=True) + fs = fs.optimise(info, reverse, in_set=True) + + return fs + +def _flatten_code(code): + "Flattens the code from a list of tuples." + flat_code = [] + for c in code: + flat_code.extend(c) + + return flat_code + +def make_case_flags(info): + "Makes the case flags." + flags = info.flags & CASE_FLAGS + + # Turn off FULLCASE if ASCII is turned on. + if info.flags & ASCII: + flags &= ~FULLCASE + + return flags + +def make_character(info, value, in_set=False): + "Makes a character literal." + if in_set: + # A character set is built case-sensitively. + return Character(value) + + return Character(value, case_flags=make_case_flags(info)) + +def make_ref_group(info, name, position): + "Makes a group reference." + return RefGroup(info, name, position, case_flags=make_case_flags(info)) + +def make_string_set(info, name): + "Makes a string set." + return StringSet(info, name, case_flags=make_case_flags(info)) + +def make_property(info, prop, in_set): + "Makes a property." + if in_set: + return prop + + return prop.with_flags(case_flags=make_case_flags(info)) + +def _parse_pattern(source, info): + "Parses a pattern, eg. 'a|b|c'." + branches = [parse_sequence(source, info)] + while source.match("|"): + branches.append(parse_sequence(source, info)) + + if len(branches) == 1: + return branches[0] + return Branch(branches) + +def parse_sequence(source, info): + "Parses a sequence, eg. 'abc'." + sequence = [None] + case_flags = make_case_flags(info) + while True: + saved_pos = source.pos + ch = source.get() + if ch in SPECIAL_CHARS: + if ch in ")|": + # The end of a sequence. At the end of the pattern ch is "". + source.pos = saved_pos + break + elif ch == "\\": + # An escape sequence outside a set. + sequence.append(parse_escape(source, info, False)) + elif ch == "(": + # A parenthesised subpattern or a flag. + element = parse_paren(source, info) + if element is None: + case_flags = make_case_flags(info) + else: + sequence.append(element) + elif ch == ".": + # Any character. + if info.flags & DOTALL: + sequence.append(AnyAll()) + elif info.flags & WORD: + sequence.append(AnyU()) + else: + sequence.append(Any()) + elif ch == "[": + # A character set. + sequence.append(parse_set(source, info)) + elif ch == "^": + # The start of a line or the string. + if info.flags & MULTILINE: + if info.flags & WORD: + sequence.append(StartOfLineU()) + else: + sequence.append(StartOfLine()) + else: + sequence.append(StartOfString()) + elif ch == "$": + # The end of a line or the string. + if info.flags & MULTILINE: + if info.flags & WORD: + sequence.append(EndOfLineU()) + else: + sequence.append(EndOfLine()) + else: + if info.flags & WORD: + sequence.append(EndOfStringLineU()) + else: + sequence.append(EndOfStringLine()) + elif ch in "?*+{": + # Looks like a quantifier. + counts = parse_quantifier(source, info, ch) + if counts: + # It _is_ a quantifier. + apply_quantifier(source, info, counts, case_flags, ch, + saved_pos, sequence) + sequence.append(None) + else: + # It's not a quantifier. Maybe it's a fuzzy constraint. + constraints = parse_fuzzy(source, info, ch, case_flags) + if constraints: + # It _is_ a fuzzy constraint. + apply_constraint(source, info, constraints, case_flags, + saved_pos, sequence) + sequence.append(None) + else: + # The element was just a literal. + sequence.append(Character(ord(ch), + case_flags=case_flags)) + else: + # A literal. + sequence.append(Character(ord(ch), case_flags=case_flags)) + else: + # A literal. + sequence.append(Character(ord(ch), case_flags=case_flags)) + + sequence = [item for item in sequence if item is not None] + return Sequence(sequence) + +def apply_quantifier(source, info, counts, case_flags, ch, saved_pos, + sequence): + element = sequence.pop() + if element is None: + if sequence: + raise error("multiple repeat", source.string, saved_pos) + raise error("nothing to repeat", source.string, saved_pos) + + if isinstance(element, (GreedyRepeat, LazyRepeat, PossessiveRepeat)): + raise error("multiple repeat", source.string, saved_pos) + + min_count, max_count = counts + saved_pos = source.pos + ch = source.get() + if ch == "?": + # The "?" suffix that means it's a lazy repeat. + repeated = LazyRepeat + elif ch == "+": + # The "+" suffix that means it's a possessive repeat. + repeated = PossessiveRepeat + else: + # No suffix means that it's a greedy repeat. + source.pos = saved_pos + repeated = GreedyRepeat + + # Ignore the quantifier if it applies to a zero-width item or the number of + # repeats is fixed at 1. + if not element.is_empty() and (min_count != 1 or max_count != 1): + element = repeated(element, min_count, max_count) + + sequence.append(element) + +def apply_constraint(source, info, constraints, case_flags, saved_pos, + sequence): + element = sequence.pop() + if element is None: + raise error("nothing for fuzzy constraint", source.string, saved_pos) + + # If a group is marked as fuzzy then put all of the fuzzy part in the + # group. + if isinstance(element, Group): + element.subpattern = Fuzzy(element.subpattern, constraints) + sequence.append(element) + else: + sequence.append(Fuzzy(element, constraints)) + +_QUANTIFIERS = {"?": (0, 1), "*": (0, None), "+": (1, None)} + +def parse_quantifier(source, info, ch): + "Parses a quantifier." + q = _QUANTIFIERS.get(ch) + if q: + # It's a quantifier. + return q + + if ch == "{": + # Looks like a limited repeated element, eg. 'a{2,3}'. + counts = parse_limited_quantifier(source) + if counts: + return counts + + return None + +def is_above_limit(count): + "Checks whether a count is above the maximum." + return count is not None and count >= UNLIMITED + +def parse_limited_quantifier(source): + "Parses a limited quantifier." + saved_pos = source.pos + min_count = parse_count(source) + if source.match(","): + max_count = parse_count(source) + + # No minimum means 0 and no maximum means unlimited. + min_count = int(min_count or 0) + max_count = int(max_count) if max_count else None + else: + if not min_count: + source.pos = saved_pos + return None + + min_count = max_count = int(min_count) + + if not source.match ("}"): + source.pos = saved_pos + return None + + if is_above_limit(min_count) or is_above_limit(max_count): + raise error("repeat count too big", source.string, saved_pos) + + if max_count is not None and min_count > max_count: + raise error("min repeat greater than max repeat", source.string, + saved_pos) + + return min_count, max_count + +def parse_fuzzy(source, info, ch, case_flags): + "Parses a fuzzy setting, if present." + saved_pos = source.pos + + if ch != "{": + return None + + constraints = {} + try: + parse_fuzzy_item(source, constraints) + while source.match(","): + parse_fuzzy_item(source, constraints) + except ParseError: + source.pos = saved_pos + return None + + if source.match(":"): + constraints["test"] = parse_fuzzy_test(source, info, case_flags) + + if not source.match("}"): + raise error("expected }", source.string, source.pos) + + return constraints + +def parse_fuzzy_item(source, constraints): + "Parses a fuzzy setting item." + saved_pos = source.pos + try: + parse_cost_constraint(source, constraints) + except ParseError: + source.pos = saved_pos + + parse_cost_equation(source, constraints) + +def parse_cost_constraint(source, constraints): + "Parses a cost constraint." + saved_pos = source.pos + ch = source.get() + if ch in ALPHA: + # Syntax: constraint [("<=" | "<") cost] + constraint = parse_constraint(source, constraints, ch) + + max_inc = parse_fuzzy_compare(source) + + if max_inc is None: + # No maximum cost. + constraints[constraint] = 0, None + else: + # There's a maximum cost. + cost_pos = source.pos + max_cost = parse_cost_limit(source) + + # Inclusive or exclusive limit? + if not max_inc: + max_cost -= 1 + + if max_cost < 0: + raise error("bad fuzzy cost limit", source.string, cost_pos) + + constraints[constraint] = 0, max_cost + elif ch in DIGITS: + # Syntax: cost ("<=" | "<") constraint ("<=" | "<") cost + source.pos = saved_pos + + # Minimum cost. + cost_pos = source.pos + min_cost = parse_cost_limit(source) + + min_inc = parse_fuzzy_compare(source) + if min_inc is None: + raise ParseError() + + constraint = parse_constraint(source, constraints, source.get()) + + max_inc = parse_fuzzy_compare(source) + if max_inc is None: + raise ParseError() + + # Maximum cost. + cost_pos = source.pos + max_cost = parse_cost_limit(source) + + # Inclusive or exclusive limits? + if not min_inc: + min_cost += 1 + if not max_inc: + max_cost -= 1 + + if not 0 <= min_cost <= max_cost: + raise error("bad fuzzy cost limit", source.string, cost_pos) + + constraints[constraint] = min_cost, max_cost + else: + raise ParseError() + +def parse_cost_limit(source): + "Parses a cost limit." + cost_pos = source.pos + digits = parse_count(source) + + try: + return int(digits) + except ValueError: + pass + + raise error("bad fuzzy cost limit", source.string, cost_pos) + +def parse_constraint(source, constraints, ch): + "Parses a constraint." + if ch not in "deis": + raise ParseError() + + if ch in constraints: + raise ParseError() + + return ch + +def parse_fuzzy_compare(source): + "Parses a cost comparator." + if source.match("<="): + return True + elif source.match("<"): + return False + else: + return None + +def parse_cost_equation(source, constraints): + "Parses a cost equation." + if "cost" in constraints: + raise error("more than one cost equation", source.string, source.pos) + + cost = {} + + parse_cost_term(source, cost) + while source.match("+"): + parse_cost_term(source, cost) + + max_inc = parse_fuzzy_compare(source) + if max_inc is None: + raise ParseError() + + max_cost = int(parse_count(source)) + + if not max_inc: + max_cost -= 1 + + if max_cost < 0: + raise error("bad fuzzy cost limit", source.string, source.pos) + + cost["max"] = max_cost + + constraints["cost"] = cost + +def parse_cost_term(source, cost): + "Parses a cost equation term." + coeff = parse_count(source) + ch = source.get() + if ch not in "dis": + raise ParseError() + + if ch in cost: + raise error("repeated fuzzy cost", source.string, source.pos) + + cost[ch] = int(coeff or 1) + +def parse_fuzzy_test(source, info, case_flags): + saved_pos = source.pos + ch = source.get() + if ch in SPECIAL_CHARS: + if ch == "\\": + # An escape sequence outside a set. + return parse_escape(source, info, False) + elif ch == ".": + # Any character. + if info.flags & DOTALL: + return AnyAll() + elif info.flags & WORD: + return AnyU() + else: + return Any() + elif ch == "[": + # A character set. + return parse_set(source, info) + else: + raise error("expected character set", source.string, saved_pos) + elif ch: + # A literal. + return Character(ord(ch), case_flags=case_flags) + else: + raise error("expected character set", source.string, saved_pos) + +def parse_count(source): + "Parses a quantifier's count, which can be empty." + return source.get_while(DIGITS) + +def parse_paren(source, info): + """Parses a parenthesised subpattern or a flag. Returns FLAGS if it's an + inline flag. + """ + saved_pos = source.pos + ch = source.get(True) + if ch == "?": + # (?... + saved_pos_2 = source.pos + ch = source.get(True) + if ch == "<": + # (?<... + saved_pos_3 = source.pos + ch = source.get() + if ch in ("=", "!"): + # (?<=... or (?") + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + info.close_group() + return Group(info, group, subpattern) + if ch in ("=", "!"): + # (?=... or (?!...: lookahead. + return parse_lookaround(source, info, False, ch == "=") + if ch == "P": + # (?P...: a Python extension. + return parse_extension(source, info) + if ch == "#": + # (?#...: a comment. + return parse_comment(source) + if ch == "(": + # (?(...: a conditional subpattern. + return parse_conditional(source, info) + if ch == ">": + # (?>...: an atomic subpattern. + return parse_atomic(source, info) + if ch == "|": + # (?|...: a common/reset groups branch. + return parse_common(source, info) + if ch == "R" or "0" <= ch <= "9": + # (?R...: probably a call to a group. + return parse_call_group(source, info, ch, saved_pos_2) + if ch == "&": + # (?&...: a call to a named group. + return parse_call_named_group(source, info, saved_pos_2) + + # (?...: probably a flags subpattern. + source.pos = saved_pos_2 + return parse_flags_subpattern(source, info) + + if ch == "*": + # (*... + saved_pos_2 = source.pos + word = source.get_while(set(")>"), include=False) + if word[ : 1].isalpha(): + verb = VERBS.get(word) + if not verb: + raise error("unknown verb", source.string, saved_pos_2) + + source.expect(")") + + return verb + + # (...: an unnamed capture group. + source.pos = saved_pos + group = info.open_group() + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + info.close_group() + + return Group(info, group, subpattern) + +def parse_extension(source, info): + "Parses a Python extension." + saved_pos = source.pos + ch = source.get() + if ch == "<": + # (?P<...: a named capture group. + name = parse_name(source) + group = info.open_group(name) + source.expect(">") + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + info.close_group() + + return Group(info, group, subpattern) + if ch == "=": + # (?P=...: a named group reference. + name = parse_name(source, allow_numeric=True) + source.expect(")") + if info.is_open_group(name): + raise error("cannot refer to an open group", source.string, + saved_pos) + + return make_ref_group(info, name, saved_pos) + if ch == ">" or ch == "&": + # (?P>...: a call to a group. + return parse_call_named_group(source, info, saved_pos) + + source.pos = saved_pos + raise error("unknown extension", source.string, saved_pos) + +def parse_comment(source): + "Parses a comment." + while True: + saved_pos = source.pos + c = source.get(True) + + if not c or c == ")": + break + + if c == "\\": + c = source.get(True) + + source.pos = saved_pos + source.expect(")") + + return None + +def parse_lookaround(source, info, behind, positive): + "Parses a lookaround." + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + return LookAround(behind, positive, subpattern) + +def parse_conditional(source, info): + "Parses a conditional subpattern." + saved_flags = info.flags + saved_pos = source.pos + ch = source.get() + if ch == "?": + # (?(?... + ch = source.get() + if ch in ("=", "!"): + # (?(?=... or (?(?!...: lookahead conditional. + return parse_lookaround_conditional(source, info, False, ch == "=") + if ch == "<": + # (?(?<... + ch = source.get() + if ch in ("=", "!"): + # (?(?<=... or (?(?"), include=False) + + if not name: + raise error("missing group name", source.string, source.pos) + + if name.isdigit(): + min_group = 0 if allow_group_0 else 1 + if not allow_numeric or int(name) < min_group: + raise error("bad character in group name", source.string, + source.pos) + else: + if not name.isidentifier(): + raise error("bad character in group name", source.string, + source.pos) + + return name + +def is_octal(string): + "Checks whether a string is octal." + return all(ch in OCT_DIGITS for ch in string) + +def is_decimal(string): + "Checks whether a string is decimal." + return all(ch in DIGITS for ch in string) + +def is_hexadecimal(string): + "Checks whether a string is hexadecimal." + return all(ch in HEX_DIGITS for ch in string) + +def parse_escape(source, info, in_set): + "Parses an escape sequence." + saved_ignore = source.ignore_space + source.ignore_space = False + ch = source.get() + source.ignore_space = saved_ignore + if not ch: + # A backslash at the end of the pattern. + raise error("bad escape (end of pattern)", source.string, source.pos) + if ch in HEX_ESCAPES: + # A hexadecimal escape sequence. + return parse_hex_escape(source, info, ch, HEX_ESCAPES[ch], in_set, ch) + elif ch == "g" and not in_set: + # A group reference. + saved_pos = source.pos + try: + return parse_group_ref(source, info) + except error: + # Invalid as a group reference, so assume it's a literal. + source.pos = saved_pos + + return make_character(info, ord(ch), in_set) + elif ch == "G" and not in_set: + # A search anchor. + return SearchAnchor() + elif ch == "L" and not in_set: + # A string set. + return parse_string_set(source, info) + elif ch == "N": + # A named codepoint. + return parse_named_char(source, info, in_set) + elif ch in "pP": + # A Unicode property, positive or negative. + return parse_property(source, info, ch == "p", in_set) + elif ch == "X" and not in_set: + # A grapheme cluster. + return Grapheme() + elif ch in ALPHA: + # An alphabetic escape sequence. + # Positional escapes aren't allowed inside a character set. + if not in_set: + if info.flags & WORD: + value = WORD_POSITION_ESCAPES.get(ch) + else: + value = POSITION_ESCAPES.get(ch) + + if value: + return value + + value = CHARSET_ESCAPES.get(ch) + if value: + return value + + value = CHARACTER_ESCAPES.get(ch) + if value: + return Character(ord(value)) + + raise error("bad escape \\%s" % ch, source.string, source.pos) + elif ch in DIGITS: + # A numeric escape sequence. + return parse_numeric_escape(source, info, ch, in_set) + else: + # A literal. + return make_character(info, ord(ch), in_set) + +def parse_numeric_escape(source, info, ch, in_set): + "Parses a numeric escape sequence." + if in_set or ch == "0": + # Octal escape sequence, max 3 digits. + return parse_octal_escape(source, info, [ch], in_set) + + # At least 1 digit, so either octal escape or group. + digits = ch + saved_pos = source.pos + ch = source.get() + if ch in DIGITS: + # At least 2 digits, so either octal escape or group. + digits += ch + saved_pos = source.pos + ch = source.get() + if is_octal(digits) and ch in OCT_DIGITS: + # 3 octal digits, so octal escape sequence. + encoding = info.flags & _ALL_ENCODINGS + if encoding == ASCII or encoding == LOCALE: + octal_mask = 0xFF + else: + octal_mask = 0x1FF + + value = int(digits + ch, 8) & octal_mask + return make_character(info, value) + + # Group reference. + source.pos = saved_pos + if info.is_open_group(digits): + raise error("cannot refer to an open group", source.string, source.pos) + + return make_ref_group(info, digits, source.pos) + +def parse_octal_escape(source, info, digits, in_set): + "Parses an octal escape sequence." + saved_pos = source.pos + ch = source.get() + while len(digits) < 3 and ch in OCT_DIGITS: + digits.append(ch) + saved_pos = source.pos + ch = source.get() + + source.pos = saved_pos + try: + value = int("".join(digits), 8) + return make_character(info, value, in_set) + except ValueError: + if digits[0] in OCT_DIGITS: + raise error("incomplete escape \\%s" % ''.join(digits), + source.string, source.pos) + else: + raise error("bad escape \\%s" % digits[0], source.string, + source.pos) + +def parse_hex_escape(source, info, esc, expected_len, in_set, type): + "Parses a hex escape sequence." + saved_pos = source.pos + digits = [] + for i in range(expected_len): + ch = source.get() + if ch not in HEX_DIGITS: + raise error("incomplete escape \\%s%s" % (type, ''.join(digits)), + source.string, saved_pos) + digits.append(ch) + + try: + value = int("".join(digits), 16) + except ValueError: + pass + else: + if value < 0x110000: + return make_character(info, value, in_set) + + # Bad hex escape. + raise error("bad hex escape \\%s%s" % (esc, ''.join(digits)), + source.string, saved_pos) + +def parse_group_ref(source, info): + "Parses a group reference." + source.expect("<") + saved_pos = source.pos + name = parse_name(source, True) + source.expect(">") + if info.is_open_group(name): + raise error("cannot refer to an open group", source.string, source.pos) + + return make_ref_group(info, name, saved_pos) + +def parse_string_set(source, info): + "Parses a string set reference." + source.expect("<") + name = parse_name(source, True) + source.expect(">") + if name is None or name not in info.kwargs: + raise error("undefined named list", source.string, source.pos) + + return make_string_set(info, name) + +def parse_named_char(source, info, in_set): + "Parses a named character." + saved_pos = source.pos + if source.match("{"): + name = source.get_while(NAMED_CHAR_PART) + if source.match("}"): + try: + value = unicodedata.lookup(name) + return make_character(info, ord(value), in_set) + except KeyError: + raise error("undefined character name", source.string, + source.pos) + + source.pos = saved_pos + return make_character(info, ord("N"), in_set) + +def parse_property(source, info, positive, in_set): + "Parses a Unicode property." + saved_pos = source.pos + ch = source.get() + if ch == "{": + negate = source.match("^") + prop_name, name = parse_property_name(source) + if source.match("}"): + # It's correctly delimited. + prop = lookup_property(prop_name, name, positive != negate, source) + return make_property(info, prop, in_set) + elif ch and ch in "CLMNPSZ": + # An abbreviated property, eg \pL. + prop = lookup_property(None, ch, positive, source) + return make_property(info, prop, in_set) + + # Not a property, so treat as a literal "p" or "P". + source.pos = saved_pos + ch = "p" if positive else "P" + return make_character(info, ord(ch), in_set) + +def parse_property_name(source): + "Parses a property name, which may be qualified." + name = source.get_while(PROPERTY_NAME_PART) + saved_pos = source.pos + + ch = source.get() + if ch and ch in ":=": + prop_name = name + name = source.get_while(ALNUM | set(" &_-./")).strip() + + if name: + # Name after the ":" or "=", so it's a qualified name. + saved_pos = source.pos + else: + # No name after the ":" or "=", so assume it's an unqualified name. + prop_name, name = None, prop_name + else: + prop_name = None + + source.pos = saved_pos + return prop_name, name + +def parse_set(source, info): + "Parses a character set." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + + saved_ignore = source.ignore_space + source.ignore_space = False + # Negative set? + negate = source.match("^") + try: + if version == VERSION0: + item = parse_set_imp_union(source, info) + else: + item = parse_set_union(source, info) + + if not source.match("]"): + raise error("missing ]", source.string, source.pos) + finally: + source.ignore_space = saved_ignore + + if negate: + item = item.with_flags(positive=not item.positive) + + item = item.with_flags(case_flags=make_case_flags(info)) + + return item + +def parse_set_union(source, info): + "Parses a set union ([x||y])." + items = [parse_set_symm_diff(source, info)] + while source.match("||"): + items.append(parse_set_symm_diff(source, info)) + + if len(items) == 1: + return items[0] + return SetUnion(info, items) + +def parse_set_symm_diff(source, info): + "Parses a set symmetric difference ([x~~y])." + items = [parse_set_inter(source, info)] + while source.match("~~"): + items.append(parse_set_inter(source, info)) + + if len(items) == 1: + return items[0] + return SetSymDiff(info, items) + +def parse_set_inter(source, info): + "Parses a set intersection ([x&&y])." + items = [parse_set_diff(source, info)] + while source.match("&&"): + items.append(parse_set_diff(source, info)) + + if len(items) == 1: + return items[0] + return SetInter(info, items) + +def parse_set_diff(source, info): + "Parses a set difference ([x--y])." + items = [parse_set_imp_union(source, info)] + while source.match("--"): + items.append(parse_set_imp_union(source, info)) + + if len(items) == 1: + return items[0] + return SetDiff(info, items) + +def parse_set_imp_union(source, info): + "Parses a set implicit union ([xy])." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + + items = [parse_set_member(source, info)] + while True: + saved_pos = source.pos + if source.match("]"): + # End of the set. + source.pos = saved_pos + break + + if version == VERSION1 and any(source.match(op) for op in SET_OPS): + # The new behaviour has set operators. + source.pos = saved_pos + break + + items.append(parse_set_member(source, info)) + + if len(items) == 1: + return items[0] + return SetUnion(info, items) + +def parse_set_member(source, info): + "Parses a member in a character set." + # Parse a set item. + start = parse_set_item(source, info) + saved_pos1 = source.pos + if (not isinstance(start, Character) or not start.positive or not + source.match("-")): + # It's not the start of a range. + return start + + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + + # It looks like the start of a range of characters. + saved_pos2 = source.pos + if version == VERSION1 and source.match("-"): + # It's actually the set difference operator '--', so return the + # character. + source.pos = saved_pos1 + return start + + if source.match("]"): + # We've reached the end of the set, so return both the character and + # hyphen. + source.pos = saved_pos2 + return SetUnion(info, [start, Character(ord("-"))]) + + # Parse a set item. + end = parse_set_item(source, info) + if not isinstance(end, Character) or not end.positive: + # It's not a range, so return the character, hyphen and property. + return SetUnion(info, [start, Character(ord("-")), end]) + + # It _is_ a range. + if start.value > end.value: + raise error("bad character range", source.string, source.pos) + + if start.value == end.value: + return start + + return Range(start.value, end.value) + +def parse_set_item(source, info): + "Parses an item in a character set." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + + if source.match("\\"): + # An escape sequence in a set. + return parse_escape(source, info, True) + + saved_pos = source.pos + if source.match("[:"): + # Looks like a POSIX character class. + try: + return parse_posix_class(source, info) + except ParseError: + # Not a POSIX character class. + source.pos = saved_pos + + if version == VERSION1 and source.match("["): + # It's the start of a nested set. + + # Negative set? + negate = source.match("^") + item = parse_set_union(source, info) + + if not source.match("]"): + raise error("missing ]", source.string, source.pos) + + if negate: + item = item.with_flags(positive=not item.positive) + + return item + + ch = source.get() + if not ch: + raise error("unterminated character set", source.string, source.pos) + + return Character(ord(ch)) + +def parse_posix_class(source, info): + "Parses a POSIX character class." + negate = source.match("^") + prop_name, name = parse_property_name(source) + if not source.match(":]"): + raise ParseError() + + return lookup_property(prop_name, name, not negate, source, posix=True) + +def float_to_rational(flt): + "Converts a float to a rational pair." + int_part = int(flt) + error = flt - int_part + if abs(error) < 0.0001: + return int_part, 1 + + den, num = float_to_rational(1.0 / error) + + return int_part * den + num, den + +def numeric_to_rational(numeric): + "Converts a numeric string to a rational string, if possible." + if numeric[ : 1] == "-": + sign, numeric = numeric[0], numeric[1 : ] + else: + sign = "" + + parts = numeric.split("/") + if len(parts) == 2: + num, den = float_to_rational(float(parts[0]) / float(parts[1])) + elif len(parts) == 1: + num, den = float_to_rational(float(parts[0])) + else: + raise ValueError() + + result = "{}{}/{}".format(sign, num, den) + if result.endswith("/1"): + return result[ : -2] + + return result + +def standardise_name(name): + "Standardises a property or value name." + try: + return numeric_to_rational("".join(name)) + except (ValueError, ZeroDivisionError): + return "".join(ch for ch in name if ch not in "_- ").upper() + +_POSIX_CLASSES = set('ALNUM DIGIT PUNCT XDIGIT'.split()) + +_BINARY_VALUES = set('YES Y NO N TRUE T FALSE F'.split()) + +def lookup_property(property, value, positive, source=None, posix=False): + "Looks up a property." + # Normalise the names (which may still be lists). + property = standardise_name(property) if property else None + value = standardise_name(value) + + if (property, value) == ("GENERALCATEGORY", "ASSIGNED"): + property, value, positive = "GENERALCATEGORY", "UNASSIGNED", not positive + + if posix and not property and value.upper() in _POSIX_CLASSES: + value = 'POSIX' + value + + if property: + # Both the property and the value are provided. + prop = PROPERTIES.get(property) + if not prop: + if not source: + raise error("unknown property") + + raise error("unknown property", source.string, source.pos) + + prop_id, value_dict = prop + val_id = value_dict.get(value) + if val_id is None: + if not source: + raise error("unknown property value") + + raise error("unknown property value", source.string, source.pos) + + return Property((prop_id << 16) | val_id, positive) + + # Only the value is provided. + # It might be the name of a GC, script or block value. + for property in ("GC", "SCRIPT", "BLOCK"): + prop_id, value_dict = PROPERTIES.get(property) + val_id = value_dict.get(value) + if val_id is not None: + return Property((prop_id << 16) | val_id, positive) + + # It might be the name of a binary property. + prop = PROPERTIES.get(value) + if prop: + prop_id, value_dict = prop + if set(value_dict) == _BINARY_VALUES: + return Property((prop_id << 16) | 1, positive) + + return Property(prop_id << 16, not positive) + + # It might be the name of a binary property starting with a prefix. + if value.startswith("IS"): + prop = PROPERTIES.get(value[2 : ]) + if prop: + prop_id, value_dict = prop + if "YES" in value_dict: + return Property((prop_id << 16) | 1, positive) + + # It might be the name of a script or block starting with a prefix. + for prefix, property in (("IS", "SCRIPT"), ("IN", "BLOCK")): + if value.startswith(prefix): + prop_id, value_dict = PROPERTIES.get(property) + val_id = value_dict.get(value[2 : ]) + if val_id is not None: + return Property((prop_id << 16) | val_id, positive) + + # Unknown property. + if not source: + raise error("unknown property") + + raise error("unknown property", source.string, source.pos) + +def _compile_replacement(source, pattern, is_unicode): + "Compiles a replacement template escape sequence." + ch = source.get() + if ch in ALPHA: + # An alphabetic escape sequence. + value = CHARACTER_ESCAPES.get(ch) + if value: + return False, [ord(value)] + + if ch in HEX_ESCAPES and (ch == "x" or is_unicode): + # A hexadecimal escape sequence. + return False, [parse_repl_hex_escape(source, HEX_ESCAPES[ch], ch)] + + if ch == "g": + # A group preference. + return True, [compile_repl_group(source, pattern)] + + if ch == "N" and is_unicode: + # A named character. + value = parse_repl_named_char(source) + if value is not None: + return False, [value] + + raise error("bad escape \\%s" % ch, source.string, source.pos) + + if isinstance(source.sep, bytes): + octal_mask = 0xFF + else: + octal_mask = 0x1FF + + if ch == "0": + # An octal escape sequence. + digits = ch + while len(digits) < 3: + saved_pos = source.pos + ch = source.get() + if ch not in OCT_DIGITS: + source.pos = saved_pos + break + digits += ch + + return False, [int(digits, 8) & octal_mask] + + if ch in DIGITS: + # Either an octal escape sequence (3 digits) or a group reference (max + # 2 digits). + digits = ch + saved_pos = source.pos + ch = source.get() + if ch in DIGITS: + digits += ch + saved_pos = source.pos + ch = source.get() + if ch and is_octal(digits + ch): + # An octal escape sequence. + return False, [int(digits + ch, 8) & octal_mask] + + # A group reference. + source.pos = saved_pos + return True, [int(digits)] + + if ch == "\\": + # An escaped backslash is a backslash. + return False, [ord("\\")] + + if not ch: + # A trailing backslash. + raise error("bad escape (end of pattern)", source.string, source.pos) + + # An escaped non-backslash is a backslash followed by the literal. + return False, [ord("\\"), ord(ch)] + +def parse_repl_hex_escape(source, expected_len, type): + "Parses a hex escape sequence in a replacement string." + digits = [] + for i in range(expected_len): + ch = source.get() + if ch not in HEX_DIGITS: + raise error("incomplete escape \\%s%s" % (type, ''.join(digits)), + source.string, source.pos) + digits.append(ch) + + return int("".join(digits), 16) + +def parse_repl_named_char(source): + "Parses a named character in a replacement string." + saved_pos = source.pos + if source.match("{"): + name = source.get_while(ALPHA | set(" ")) + + if source.match("}"): + try: + value = unicodedata.lookup(name) + return ord(value) + except KeyError: + raise error("undefined character name", source.string, + source.pos) + + source.pos = saved_pos + return None + +def compile_repl_group(source, pattern): + "Compiles a replacement template group reference." + source.expect("<") + name = parse_name(source, True, True) + + source.expect(">") + if name.isdigit(): + index = int(name) + if not 0 <= index <= pattern.groups: + raise error("invalid group reference", source.string, source.pos) + + return index + + try: + return pattern.groupindex[name] + except KeyError: + raise IndexError("unknown group") + +# The regular expression is parsed into a syntax tree. The different types of +# node are defined below. + +INDENT = " " +POSITIVE_OP = 0x1 +ZEROWIDTH_OP = 0x2 +FUZZY_OP = 0x4 +REVERSE_OP = 0x8 +REQUIRED_OP = 0x10 + +POS_TEXT = {False: "NON-MATCH", True: "MATCH"} +CASE_TEXT = {NOCASE: "", IGNORECASE: " SIMPLE_IGNORE_CASE", FULLCASE: "", + FULLIGNORECASE: " FULL_IGNORE_CASE"} + +def make_sequence(items): + if len(items) == 1: + return items[0] + return Sequence(items) + +# Common base class for all nodes. +class RegexBase: + def __init__(self): + self._key = self.__class__ + + def with_flags(self, positive=None, case_flags=None, zerowidth=None): + if positive is None: + positive = self.positive + else: + positive = bool(positive) + if case_flags is None: + case_flags = self.case_flags + else: + case_flags = CASE_FLAGS_COMBINATIONS[case_flags & CASE_FLAGS] + if zerowidth is None: + zerowidth = self.zerowidth + else: + zerowidth = bool(zerowidth) + + if (positive == self.positive and case_flags == self.case_flags and + zerowidth == self.zerowidth): + return self + + return self.rebuild(positive, case_flags, zerowidth) + + def fix_groups(self, pattern, reverse, fuzzy): + pass + + def optimise(self, info, reverse): + return self + + def pack_characters(self, info): + return self + + def remove_captures(self): + return self + + def is_atomic(self): + return True + + def can_be_affix(self): + return True + + def contains_group(self): + return False + + def get_firstset(self, reverse): + raise _FirstSetError() + + def has_simple_start(self): + return False + + def compile(self, reverse=False, fuzzy=False): + return self._compile(reverse, fuzzy) + + def is_empty(self): + return False + + def __hash__(self): + return hash(self._key) + + def __eq__(self, other): + return type(self) is type(other) and self._key == other._key + + def __ne__(self, other): + return not self.__eq__(other) + + def get_required_string(self, reverse): + return self.max_width(), None + +# Base class for zero-width nodes. +class ZeroWidthBase(RegexBase): + def __init__(self, positive=True): + RegexBase.__init__(self) + self.positive = bool(positive) + + self._key = self.__class__, self.positive + + def get_firstset(self, reverse): + return set([None]) + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if fuzzy: + flags |= FUZZY_OP + if reverse: + flags |= REVERSE_OP + return [(self._opcode, flags)] + + def dump(self, indent, reverse): + print("{}{} {}".format(INDENT * indent, self._op_name, + POS_TEXT[self.positive])) + + def max_width(self): + return 0 + +class Any(RegexBase): + _opcode = {False: OP.ANY, True: OP.ANY_REV} + _op_name = "ANY" + + def has_simple_start(self): + return True + + def _compile(self, reverse, fuzzy): + flags = 0 + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[reverse], flags)] + + def dump(self, indent, reverse): + print("{}{}".format(INDENT * indent, self._op_name)) + + def max_width(self): + return 1 + +class AnyAll(Any): + _opcode = {False: OP.ANY_ALL, True: OP.ANY_ALL_REV} + _op_name = "ANY_ALL" + +class AnyU(Any): + _opcode = {False: OP.ANY_U, True: OP.ANY_U_REV} + _op_name = "ANY_U" + +class Atomic(RegexBase): + def __init__(self, subpattern): + RegexBase.__init__(self) + self.subpattern = subpattern + + def fix_groups(self, pattern, reverse, fuzzy): + self.subpattern.fix_groups(pattern, reverse, fuzzy) + + def optimise(self, info, reverse): + self.subpattern = self.subpattern.optimise(info, reverse) + + if self.subpattern.is_empty(): + return self.subpattern + return self + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + self.subpattern = self.subpattern.remove_captures() + return self + + def can_be_affix(self): + return self.subpattern.can_be_affix() + + def contains_group(self): + return self.subpattern.contains_group() + + def get_firstset(self, reverse): + return self.subpattern.get_firstset(reverse) + + def has_simple_start(self): + return self.subpattern.has_simple_start() + + def _compile(self, reverse, fuzzy): + return ([(OP.ATOMIC, )] + self.subpattern.compile(reverse, fuzzy) + + [(OP.END, )]) + + def dump(self, indent, reverse): + print("{}ATOMIC".format(INDENT * indent)) + self.subpattern.dump(indent + 1, reverse) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return (type(self) is type(other) and self.subpattern == + other.subpattern) + + def max_width(self): + return self.subpattern.max_width() + + def get_required_string(self, reverse): + return self.subpattern.get_required_string(reverse) + +class Boundary(ZeroWidthBase): + _opcode = OP.BOUNDARY + _op_name = "BOUNDARY" + +class Branch(RegexBase): + def __init__(self, branches): + RegexBase.__init__(self) + self.branches = branches + + def fix_groups(self, pattern, reverse, fuzzy): + for b in self.branches: + b.fix_groups(pattern, reverse, fuzzy) + + def optimise(self, info, reverse): + if not self.branches: + return Sequence([]) + + # Flatten branches within branches. + branches = Branch._flatten_branches(info, reverse, self.branches) + + # Move any common prefix or suffix out of the branches. + if reverse: + suffix, branches = Branch._split_common_suffix(info, branches) + prefix = [] + else: + prefix, branches = Branch._split_common_prefix(info, branches) + suffix = [] + + # Try to reduce adjacent single-character branches to sets. + branches = Branch._reduce_to_set(info, reverse, branches) + + if len(branches) > 1: + sequence = [Branch(branches)] + + if not prefix or not suffix: + # We might be able to add a quick precheck before the branches. + firstset = self._add_precheck(info, reverse, branches) + + if firstset: + if reverse: + sequence.append(firstset) + else: + sequence.insert(0, firstset) + else: + sequence = branches + + return make_sequence(prefix + sequence + suffix) + + def _add_precheck(self, info, reverse, branches): + charset = set() + pos = -1 if reverse else 0 + + for branch in branches: + if type(branch) is Literal and branch.case_flags == NOCASE: + charset.add(branch.characters[pos]) + else: + return + + if not charset: + return None + + return _check_firstset(info, reverse, [Character(c) for c in charset]) + + def pack_characters(self, info): + self.branches = [b.pack_characters(info) for b in self.branches] + return self + + def remove_captures(self): + self.branches = [b.remove_captures() for b in self.branches] + return self + + def is_atomic(self): + return all(b.is_atomic() for b in self.branches) + + def can_be_affix(self): + return all(b.can_be_affix() for b in self.branches) + + def contains_group(self): + return any(b.contains_group() for b in self.branches) + + def get_firstset(self, reverse): + fs = set() + for b in self.branches: + fs |= b.get_firstset(reverse) + + return fs or set([None]) + + def _compile(self, reverse, fuzzy): + code = [(OP.BRANCH, )] + for b in self.branches: + code.extend(b.compile(reverse, fuzzy)) + code.append((OP.NEXT, )) + + code[-1] = (OP.END, ) + + return code + + def dump(self, indent, reverse): + print("{}BRANCH".format(INDENT * indent)) + self.branches[0].dump(indent + 1, reverse) + for b in self.branches[1 : ]: + print("{}OR".format(INDENT * indent)) + b.dump(indent + 1, reverse) + + @staticmethod + def _flatten_branches(info, reverse, branches): + # Flatten the branches so that there aren't branches of branches. + new_branches = [] + for b in branches: + b = b.optimise(info, reverse) + if isinstance(b, Branch): + new_branches.extend(b.branches) + else: + new_branches.append(b) + + return new_branches + + @staticmethod + def _split_common_prefix(info, branches): + # Common leading items can be moved out of the branches. + # Get the items in the branches. + alternatives = [] + for b in branches: + if isinstance(b, Sequence): + alternatives.append(b.items) + else: + alternatives.append([b]) + + # What is the maximum possible length of the prefix? + max_count = min(len(a) for a in alternatives) + + # What is the longest common prefix? + prefix = alternatives[0] + pos = 0 + end_pos = max_count + while pos < end_pos and prefix[pos].can_be_affix() and all(a[pos] == + prefix[pos] for a in alternatives): + pos += 1 + count = pos + + if info.flags & UNICODE: + # We need to check that we're not splitting a sequence of + # characters which could form part of full case-folding. + count = pos + while count > 0 and not all(Branch._can_split(a, count) for a in + alternatives): + count -= 1 + + # No common prefix is possible. + if count == 0: + return [], branches + + # Rebuild the branches. + new_branches = [] + for a in alternatives: + new_branches.append(make_sequence(a[count : ])) + + return prefix[ : count], new_branches + + @staticmethod + def _split_common_suffix(info, branches): + # Common trailing items can be moved out of the branches. + # Get the items in the branches. + alternatives = [] + for b in branches: + if isinstance(b, Sequence): + alternatives.append(b.items) + else: + alternatives.append([b]) + + # What is the maximum possible length of the suffix? + max_count = min(len(a) for a in alternatives) + + # What is the longest common suffix? + suffix = alternatives[0] + pos = -1 + end_pos = -1 - max_count + while pos > end_pos and suffix[pos].can_be_affix() and all(a[pos] == + suffix[pos] for a in alternatives): + pos -= 1 + count = -1 - pos + + if info.flags & UNICODE: + # We need to check that we're not splitting a sequence of + # characters which could form part of full case-folding. + while count > 0 and not all(Branch._can_split_rev(a, count) for a + in alternatives): + count -= 1 + + # No common suffix is possible. + if count == 0: + return [], branches + + # Rebuild the branches. + new_branches = [] + for a in alternatives: + new_branches.append(make_sequence(a[ : -count])) + + return suffix[-count : ], new_branches + + @staticmethod + def _can_split(items, count): + # Check the characters either side of the proposed split. + if not Branch._is_full_case(items, count - 1): + return True + + if not Branch._is_full_case(items, count): + return True + + # Check whether a 1-1 split would be OK. + if Branch._is_folded(items[count - 1 : count + 1]): + return False + + # Check whether a 1-2 split would be OK. + if (Branch._is_full_case(items, count + 2) and + Branch._is_folded(items[count - 1 : count + 2])): + return False + + # Check whether a 2-1 split would be OK. + if (Branch._is_full_case(items, count - 2) and + Branch._is_folded(items[count - 2 : count + 1])): + return False + + return True + + @staticmethod + def _can_split_rev(items, count): + end = len(items) + + # Check the characters either side of the proposed split. + if not Branch._is_full_case(items, end - count): + return True + + if not Branch._is_full_case(items, end - count - 1): + return True + + # Check whether a 1-1 split would be OK. + if Branch._is_folded(items[end - count - 1 : end - count + 1]): + return False + + # Check whether a 1-2 split would be OK. + if (Branch._is_full_case(items, end - count + 2) and + Branch._is_folded(items[end - count - 1 : end - count + 2])): + return False + + # Check whether a 2-1 split would be OK. + if (Branch._is_full_case(items, end - count - 2) and + Branch._is_folded(items[end - count - 2 : end - count + 1])): + return False + + return True + + @staticmethod + def _merge_common_prefixes(info, reverse, branches): + # Branches with the same case-sensitive character prefix can be grouped + # together if they are separated only by other branches with a + # character prefix. + prefixed = defaultdict(list) + order = {} + new_branches = [] + for b in branches: + if Branch._is_simple_character(b): + # Branch starts with a simple character. + prefixed[b.value].append([b]) + order.setdefault(b.value, len(order)) + elif (isinstance(b, Sequence) and b.items and + Branch._is_simple_character(b.items[0])): + # Branch starts with a simple character. + prefixed[b.items[0].value].append(b.items) + order.setdefault(b.items[0].value, len(order)) + else: + Branch._flush_char_prefix(info, reverse, prefixed, order, + new_branches) + + new_branches.append(b) + + Branch._flush_char_prefix(info, prefixed, order, new_branches) + + return new_branches + + @staticmethod + def _is_simple_character(c): + return isinstance(c, Character) and c.positive and not c.case_flags + + @staticmethod + def _reduce_to_set(info, reverse, branches): + # Can the branches be reduced to a set? + new_branches = [] + items = set() + case_flags = NOCASE + for b in branches: + if isinstance(b, (Character, Property, SetBase)): + # Branch starts with a single character. + if b.case_flags != case_flags: + # Different case sensitivity, so flush. + Branch._flush_set_members(info, reverse, items, case_flags, + new_branches) + + case_flags = b.case_flags + + items.add(b.with_flags(case_flags=NOCASE)) + else: + Branch._flush_set_members(info, reverse, items, case_flags, + new_branches) + + new_branches.append(b) + + Branch._flush_set_members(info, reverse, items, case_flags, + new_branches) + + return new_branches + + @staticmethod + def _flush_char_prefix(info, reverse, prefixed, order, new_branches): + # Flush the prefixed branches. + if not prefixed: + return + + for value, branches in sorted(prefixed.items(), key=lambda pair: + order[pair[0]]): + if len(branches) == 1: + new_branches.append(make_sequence(branches[0])) + else: + subbranches = [] + optional = False + for b in branches: + if len(b) > 1: + subbranches.append(make_sequence(b[1 : ])) + elif not optional: + subbranches.append(Sequence()) + optional = True + + sequence = Sequence([Character(value), Branch(subbranches)]) + new_branches.append(sequence.optimise(info, reverse)) + + prefixed.clear() + order.clear() + + @staticmethod + def _flush_set_members(info, reverse, items, case_flags, new_branches): + # Flush the set members. + if not items: + return + + if len(items) == 1: + item = list(items)[0] + else: + item = SetUnion(info, list(items)).optimise(info, reverse) + + new_branches.append(item.with_flags(case_flags=case_flags)) + + items.clear() + + @staticmethod + def _is_full_case(items, i): + if not 0 <= i < len(items): + return False + + item = items[i] + return (isinstance(item, Character) and item.positive and + (item.case_flags & FULLIGNORECASE) == FULLIGNORECASE) + + @staticmethod + def _is_folded(items): + if len(items) < 2: + return False + + for i in items: + if (not isinstance(i, Character) or not i.positive or not + i.case_flags): + return False + + folded = "".join(chr(i.value) for i in items) + folded = _regex.fold_case(FULL_CASE_FOLDING, folded) + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + for c in expanding_chars: + if folded == _regex.fold_case(FULL_CASE_FOLDING, c): + return True + + return False + + def is_empty(self): + return all(b.is_empty() for b in self.branches) + + def __eq__(self, other): + return type(self) is type(other) and self.branches == other.branches + + def max_width(self): + return max(b.max_width() for b in self.branches) + +class CallGroup(RegexBase): + def __init__(self, info, group, position): + RegexBase.__init__(self) + self.info = info + self.group = group + self.position = position + + self._key = self.__class__, self.group + + def fix_groups(self, pattern, reverse, fuzzy): + try: + self.group = int(self.group) + except ValueError: + try: + self.group = self.info.group_index[self.group] + except KeyError: + raise error("invalid group reference", pattern, self.position) + + if not 0 <= self.group <= self.info.group_count: + raise error("unknown group", pattern, self.position) + + if self.group > 0 and self.info.open_group_count[self.group] > 1: + raise error("ambiguous group reference", pattern, self.position) + + self.info.group_calls.append((self, reverse, fuzzy)) + + self._key = self.__class__, self.group + + def remove_captures(self): + raise error("group reference not allowed", pattern, self.position) + + def _compile(self, reverse, fuzzy): + return [(OP.GROUP_CALL, self.call_ref)] + + def dump(self, indent, reverse): + print("{}GROUP_CALL {}".format(INDENT * indent, self.group)) + + def __eq__(self, other): + return type(self) is type(other) and self.group == other.group + + def max_width(self): + return UNLIMITED + + def __del__(self): + self.info = None + +class CallRef(RegexBase): + def __init__(self, ref, parsed): + self.ref = ref + self.parsed = parsed + + def _compile(self, reverse, fuzzy): + return ([(OP.CALL_REF, self.ref)] + self.parsed._compile(reverse, + fuzzy) + [(OP.END, )]) + +class Character(RegexBase): + _opcode = {(NOCASE, False): OP.CHARACTER, (IGNORECASE, False): + OP.CHARACTER_IGN, (FULLCASE, False): OP.CHARACTER, (FULLIGNORECASE, + False): OP.CHARACTER_IGN, (NOCASE, True): OP.CHARACTER_REV, (IGNORECASE, + True): OP.CHARACTER_IGN_REV, (FULLCASE, True): OP.CHARACTER_REV, + (FULLIGNORECASE, True): OP.CHARACTER_IGN_REV} + + def __init__(self, value, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.value = value + self.positive = bool(positive) + self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] + self.zerowidth = bool(zerowidth) + + if (self.positive and (self.case_flags & FULLIGNORECASE) == + FULLIGNORECASE): + self.folded = _regex.fold_case(FULL_CASE_FOLDING, chr(self.value)) + else: + self.folded = chr(self.value) + + self._key = (self.__class__, self.value, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return Character(self.value, positive, case_flags, zerowidth) + + def optimise(self, info, reverse, in_set=False): + return self + + def get_firstset(self, reverse): + return set([self]) + + def has_simple_start(self): + return True + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + + code = PrecompiledCode([self._opcode[self.case_flags, reverse], flags, + self.value]) + + if len(self.folded) > 1: + # The character expands on full case-folding. + code = Branch([code, String([ord(c) for c in self.folded], + case_flags=self.case_flags)]) + + return code.compile(reverse, fuzzy) + + def dump(self, indent, reverse): + display = ascii(chr(self.value)).lstrip("bu") + print("{}CHARACTER {} {}{}".format(INDENT * indent, + POS_TEXT[self.positive], display, CASE_TEXT[self.case_flags])) + + def matches(self, ch): + return (ch == self.value) == self.positive + + def max_width(self): + return len(self.folded) + + def get_required_string(self, reverse): + if not self.positive: + return 1, None + + self.folded_characters = tuple(ord(c) for c in self.folded) + + return 0, self + +class Conditional(RegexBase): + def __init__(self, info, group, yes_item, no_item, position): + RegexBase.__init__(self) + self.info = info + self.group = group + self.yes_item = yes_item + self.no_item = no_item + self.position = position + + def fix_groups(self, pattern, reverse, fuzzy): + try: + self.group = int(self.group) + except ValueError: + try: + self.group = self.info.group_index[self.group] + except KeyError: + if self.group == 'DEFINE': + # 'DEFINE' is a special name unless there's a group with + # that name. + self.group = 0 + else: + raise error("unknown group", pattern, self.position) + + if not 0 <= self.group <= self.info.group_count: + raise error("invalid group reference", pattern, self.position) + + self.yes_item.fix_groups(pattern, reverse, fuzzy) + self.no_item.fix_groups(pattern, reverse, fuzzy) + + def optimise(self, info, reverse): + yes_item = self.yes_item.optimise(info, reverse) + no_item = self.no_item.optimise(info, reverse) + + return Conditional(info, self.group, yes_item, no_item, self.position) + + def pack_characters(self, info): + self.yes_item = self.yes_item.pack_characters(info) + self.no_item = self.no_item.pack_characters(info) + return self + + def remove_captures(self): + self.yes_item = self.yes_item.remove_captures() + self.no_item = self.no_item.remove_captures() + + def is_atomic(self): + return self.yes_item.is_atomic() and self.no_item.is_atomic() + + def can_be_affix(self): + return self.yes_item.can_be_affix() and self.no_item.can_be_affix() + + def contains_group(self): + return self.yes_item.contains_group() or self.no_item.contains_group() + + def get_firstset(self, reverse): + return (self.yes_item.get_firstset(reverse) | + self.no_item.get_firstset(reverse)) + + def _compile(self, reverse, fuzzy): + code = [(OP.GROUP_EXISTS, self.group)] + code.extend(self.yes_item.compile(reverse, fuzzy)) + add_code = self.no_item.compile(reverse, fuzzy) + if add_code: + code.append((OP.NEXT, )) + code.extend(add_code) + + code.append((OP.END, )) + + return code + + def dump(self, indent, reverse): + print("{}GROUP_EXISTS {}".format(INDENT * indent, self.group)) + self.yes_item.dump(indent + 1, reverse) + if not self.no_item.is_empty(): + print("{}OR".format(INDENT * indent)) + self.no_item.dump(indent + 1, reverse) + + def is_empty(self): + return self.yes_item.is_empty() and self.no_item.is_empty() + + def __eq__(self, other): + return type(self) is type(other) and (self.group, self.yes_item, + self.no_item) == (other.group, other.yes_item, other.no_item) + + def max_width(self): + return max(self.yes_item.max_width(), self.no_item.max_width()) + + def __del__(self): + self.info = None + +class DefaultBoundary(ZeroWidthBase): + _opcode = OP.DEFAULT_BOUNDARY + _op_name = "DEFAULT_BOUNDARY" + +class DefaultEndOfWord(ZeroWidthBase): + _opcode = OP.DEFAULT_END_OF_WORD + _op_name = "DEFAULT_END_OF_WORD" + +class DefaultStartOfWord(ZeroWidthBase): + _opcode = OP.DEFAULT_START_OF_WORD + _op_name = "DEFAULT_START_OF_WORD" + +class EndOfLine(ZeroWidthBase): + _opcode = OP.END_OF_LINE + _op_name = "END_OF_LINE" + +class EndOfLineU(EndOfLine): + _opcode = OP.END_OF_LINE_U + _op_name = "END_OF_LINE_U" + +class EndOfString(ZeroWidthBase): + _opcode = OP.END_OF_STRING + _op_name = "END_OF_STRING" + +class EndOfStringLine(ZeroWidthBase): + _opcode = OP.END_OF_STRING_LINE + _op_name = "END_OF_STRING_LINE" + +class EndOfStringLineU(EndOfStringLine): + _opcode = OP.END_OF_STRING_LINE_U + _op_name = "END_OF_STRING_LINE_U" + +class EndOfWord(ZeroWidthBase): + _opcode = OP.END_OF_WORD + _op_name = "END_OF_WORD" + +class Failure(ZeroWidthBase): + _op_name = "FAILURE" + + def _compile(self, reverse, fuzzy): + return [(OP.FAILURE, )] + +class Fuzzy(RegexBase): + def __init__(self, subpattern, constraints=None): + RegexBase.__init__(self) + if constraints is None: + constraints = {} + self.subpattern = subpattern + self.constraints = constraints + + # If an error type is mentioned in the cost equation, then its maximum + # defaults to unlimited. + if "cost" in constraints: + for e in "dis": + if e in constraints["cost"]: + constraints.setdefault(e, (0, None)) + + # If any error type is mentioned, then all the error maxima default to + # 0, otherwise they default to unlimited. + if set(constraints) & set("dis"): + for e in "dis": + constraints.setdefault(e, (0, 0)) + else: + for e in "dis": + constraints.setdefault(e, (0, None)) + + # The maximum of the generic error type defaults to unlimited. + constraints.setdefault("e", (0, None)) + + # The cost equation defaults to equal costs. Also, the cost of any + # error type not mentioned in the cost equation defaults to 0. + if "cost" in constraints: + for e in "dis": + constraints["cost"].setdefault(e, 0) + else: + constraints["cost"] = {"d": 1, "i": 1, "s": 1, "max": + constraints["e"][1]} + + def fix_groups(self, pattern, reverse, fuzzy): + self.subpattern.fix_groups(pattern, reverse, True) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + self.subpattern = self.subpattern.remove_captures() + return self + + def is_atomic(self): + return self.subpattern.is_atomic() + + def contains_group(self): + return self.subpattern.contains_group() + + def _compile(self, reverse, fuzzy): + # The individual limits. + arguments = [] + for e in "dise": + v = self.constraints[e] + arguments.append(v[0]) + arguments.append(UNLIMITED if v[1] is None else v[1]) + + # The coeffs of the cost equation. + for e in "dis": + arguments.append(self.constraints["cost"][e]) + + # The maximum of the cost equation. + v = self.constraints["cost"]["max"] + arguments.append(UNLIMITED if v is None else v) + + flags = 0 + if reverse: + flags |= REVERSE_OP + + test = self.constraints.get("test") + + if test: + return ([(OP.FUZZY_EXT, flags) + tuple(arguments)] + + test.compile(reverse, True) + [(OP.NEXT,)] + + self.subpattern.compile(reverse, True) + [(OP.END,)]) + + return ([(OP.FUZZY, flags) + tuple(arguments)] + + self.subpattern.compile(reverse, True) + [(OP.END,)]) + + def dump(self, indent, reverse): + constraints = self._constraints_to_string() + if constraints: + constraints = " " + constraints + print("{}FUZZY{}".format(INDENT * indent, constraints)) + self.subpattern.dump(indent + 1, reverse) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return (type(self) is type(other) and self.subpattern == + other.subpattern and self.constraints == other.constraints) + + def max_width(self): + return UNLIMITED + + def _constraints_to_string(self): + constraints = [] + + for name in "ids": + min, max = self.constraints[name] + if max == 0: + continue + + con = "" + + if min > 0: + con = "{}<=".format(min) + + con += name + + if max is not None: + con += "<={}".format(max) + + constraints.append(con) + + cost = [] + for name in "ids": + coeff = self.constraints["cost"][name] + if coeff > 0: + cost.append("{}{}".format(coeff, name)) + + limit = self.constraints["cost"]["max"] + if limit is not None and limit > 0: + cost = "{}<={}".format("+".join(cost), limit) + constraints.append(cost) + + return ",".join(constraints) + +class Grapheme(RegexBase): + def _compile(self, reverse, fuzzy): + # Match at least 1 character until a grapheme boundary is reached. Note + # that this is the same whether matching forwards or backwards. + grapheme_matcher = Atomic(Sequence([LazyRepeat(AnyAll(), 1, None), + GraphemeBoundary()])) + + return grapheme_matcher.compile(reverse, fuzzy) + + def dump(self, indent, reverse): + print("{}GRAPHEME".format(INDENT * indent)) + + def max_width(self): + return UNLIMITED + +class GraphemeBoundary: + def compile(self, reverse, fuzzy): + return [(OP.GRAPHEME_BOUNDARY, 1)] + +class GreedyRepeat(RegexBase): + _opcode = OP.GREEDY_REPEAT + _op_name = "GREEDY_REPEAT" + + def __init__(self, subpattern, min_count, max_count): + RegexBase.__init__(self) + self.subpattern = subpattern + self.min_count = min_count + self.max_count = max_count + + def fix_groups(self, pattern, reverse, fuzzy): + self.subpattern.fix_groups(pattern, reverse, fuzzy) + + def optimise(self, info, reverse): + subpattern = self.subpattern.optimise(info, reverse) + + return type(self)(subpattern, self.min_count, self.max_count) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + self.subpattern = self.subpattern.remove_captures() + return self + + def is_atomic(self): + return self.min_count == self.max_count and self.subpattern.is_atomic() + + def can_be_affix(self): + return False + + def contains_group(self): + return self.subpattern.contains_group() + + def get_firstset(self, reverse): + fs = self.subpattern.get_firstset(reverse) + if self.min_count == 0: + fs.add(None) + + return fs + + def _compile(self, reverse, fuzzy): + repeat = [self._opcode, self.min_count] + if self.max_count is None: + repeat.append(UNLIMITED) + else: + repeat.append(self.max_count) + + subpattern = self.subpattern.compile(reverse, fuzzy) + if not subpattern: + return [] + + return ([tuple(repeat)] + subpattern + [(OP.END, )]) + + def dump(self, indent, reverse): + if self.max_count is None: + limit = "INF" + else: + limit = self.max_count + print("{}{} {} {}".format(INDENT * indent, self._op_name, + self.min_count, limit)) + + self.subpattern.dump(indent + 1, reverse) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return type(self) is type(other) and (self.subpattern, self.min_count, + self.max_count) == (other.subpattern, other.min_count, + other.max_count) + + def max_width(self): + if self.max_count is None: + return UNLIMITED + + return self.subpattern.max_width() * self.max_count + + def get_required_string(self, reverse): + max_count = UNLIMITED if self.max_count is None else self.max_count + if self.min_count == 0: + w = self.subpattern.max_width() * max_count + return min(w, UNLIMITED), None + + ofs, req = self.subpattern.get_required_string(reverse) + if req: + return ofs, req + + w = self.subpattern.max_width() * max_count + return min(w, UNLIMITED), None + +class PossessiveRepeat(GreedyRepeat): + def is_atomic(self): + return True + + def _compile(self, reverse, fuzzy): + subpattern = self.subpattern.compile(reverse, fuzzy) + if not subpattern: + return [] + + repeat = [self._opcode, self.min_count] + if self.max_count is None: + repeat.append(UNLIMITED) + else: + repeat.append(self.max_count) + + return ([(OP.ATOMIC, ), tuple(repeat)] + subpattern + [(OP.END, ), + (OP.END, )]) + + def dump(self, indent, reverse): + print("{}ATOMIC".format(INDENT * indent)) + + if self.max_count is None: + limit = "INF" + else: + limit = self.max_count + print("{}{} {} {}".format(INDENT * (indent + 1), self._op_name, + self.min_count, limit)) + + self.subpattern.dump(indent + 2, reverse) + +class Group(RegexBase): + def __init__(self, info, group, subpattern): + RegexBase.__init__(self) + self.info = info + self.group = group + self.subpattern = subpattern + + self.call_ref = None + + def fix_groups(self, pattern, reverse, fuzzy): + self.info.defined_groups[self.group] = (self, reverse, fuzzy) + self.subpattern.fix_groups(pattern, reverse, fuzzy) + + def optimise(self, info, reverse): + subpattern = self.subpattern.optimise(info, reverse) + + return Group(self.info, self.group, subpattern) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + return self.subpattern.remove_captures() + + def is_atomic(self): + return self.subpattern.is_atomic() + + def can_be_affix(self): + return False + + def contains_group(self): + return True + + def get_firstset(self, reverse): + return self.subpattern.get_firstset(reverse) + + def has_simple_start(self): + return self.subpattern.has_simple_start() + + def _compile(self, reverse, fuzzy): + code = [] + + key = self.group, reverse, fuzzy + ref = self.info.call_refs.get(key) + if ref is not None: + code += [(OP.CALL_REF, ref)] + + public_group = private_group = self.group + if private_group < 0: + public_group = self.info.private_groups[private_group] + private_group = self.info.group_count - private_group + + code += ([(OP.GROUP, int(not reverse), private_group, public_group)] + + self.subpattern.compile(reverse, fuzzy) + [(OP.END, )]) + + if ref is not None: + code += [(OP.END, )] + + return code + + def dump(self, indent, reverse): + group = self.group + if group < 0: + group = private_groups[group] + print("{}GROUP {}".format(INDENT * indent, group)) + self.subpattern.dump(indent + 1, reverse) + + def __eq__(self, other): + return (type(self) is type(other) and (self.group, self.subpattern) == + (other.group, other.subpattern)) + + def max_width(self): + return self.subpattern.max_width() + + def get_required_string(self, reverse): + return self.subpattern.get_required_string(reverse) + + def __del__(self): + self.info = None + +class Keep(ZeroWidthBase): + _opcode = OP.KEEP + _op_name = "KEEP" + +class LazyRepeat(GreedyRepeat): + _opcode = OP.LAZY_REPEAT + _op_name = "LAZY_REPEAT" + +class LookAround(RegexBase): + _dir_text = {False: "AHEAD", True: "BEHIND"} + + def __init__(self, behind, positive, subpattern): + RegexBase.__init__(self) + self.behind = bool(behind) + self.positive = bool(positive) + self.subpattern = subpattern + + def fix_groups(self, pattern, reverse, fuzzy): + self.subpattern.fix_groups(pattern, self.behind, fuzzy) + + def optimise(self, info, reverse): + subpattern = self.subpattern.optimise(info, self.behind) + if self.positive and subpattern.is_empty(): + return subpattern + + return LookAround(self.behind, self.positive, subpattern) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + return self.subpattern.remove_captures() + + def is_atomic(self): + return self.subpattern.is_atomic() + + def can_be_affix(self): + return self.subpattern.can_be_affix() + + def contains_group(self): + return self.subpattern.contains_group() + + def get_firstset(self, reverse): + if self.positive and self.behind == reverse: + return self.subpattern.get_firstset(reverse) + + return set([None]) + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if fuzzy: + flags |= FUZZY_OP + if reverse: + flags |= REVERSE_OP + + return ([(OP.LOOKAROUND, flags, int(not self.behind))] + + self.subpattern.compile(self.behind) + [(OP.END, )]) + + def dump(self, indent, reverse): + print("{}LOOK{} {}".format(INDENT * indent, + self._dir_text[self.behind], POS_TEXT[self.positive])) + self.subpattern.dump(indent + 1, self.behind) + + def is_empty(self): + return self.positive and self.subpattern.is_empty() + + def __eq__(self, other): + return type(self) is type(other) and (self.behind, self.positive, + self.subpattern) == (other.behind, other.positive, other.subpattern) + + def max_width(self): + return 0 + +class LookAroundConditional(RegexBase): + _dir_text = {False: "AHEAD", True: "BEHIND"} + + def __init__(self, behind, positive, subpattern, yes_item, no_item): + RegexBase.__init__(self) + self.behind = bool(behind) + self.positive = bool(positive) + self.subpattern = subpattern + self.yes_item = yes_item + self.no_item = no_item + + def fix_groups(self, pattern, reverse, fuzzy): + self.subpattern.fix_groups(pattern, reverse, fuzzy) + self.yes_item.fix_groups(pattern, reverse, fuzzy) + self.no_item.fix_groups(pattern, reverse, fuzzy) + + def optimise(self, info, reverse): + subpattern = self.subpattern.optimise(info, self.behind) + yes_item = self.yes_item.optimise(info, self.behind) + no_item = self.no_item.optimise(info, self.behind) + + return LookAroundConditional(self.behind, self.positive, subpattern, + yes_item, no_item) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + self.yes_item = self.yes_item.pack_characters(info) + self.no_item = self.no_item.pack_characters(info) + return self + + def remove_captures(self): + self.subpattern = self.subpattern.remove_captures() + self.yes_item = self.yes_item.remove_captures() + self.no_item = self.no_item.remove_captures() + + def is_atomic(self): + return (self.subpattern.is_atomic() and self.yes_item.is_atomic() and + self.no_item.is_atomic()) + + def can_be_affix(self): + return (self.subpattern.can_be_affix() and self.yes_item.can_be_affix() + and self.no_item.can_be_affix()) + + def contains_group(self): + return (self.subpattern.contains_group() or + self.yes_item.contains_group() or self.no_item.contains_group()) + + def _compile(self, reverse, fuzzy): + code = [(OP.CONDITIONAL, int(self.positive), int(not self.behind))] + code.extend(self.subpattern.compile(self.behind, fuzzy)) + code.append((OP.NEXT, )) + code.extend(self.yes_item.compile(reverse, fuzzy)) + add_code = self.no_item.compile(reverse, fuzzy) + if add_code: + code.append((OP.NEXT, )) + code.extend(add_code) + + code.append((OP.END, )) + + return code + + def dump(self, indent, reverse): + print("{}CONDITIONAL {} {}".format(INDENT * indent, + self._dir_text[self.behind], POS_TEXT[self.positive])) + self.subpattern.dump(indent + 1, self.behind) + print("{}EITHER".format(INDENT * indent)) + self.yes_item.dump(indent + 1, reverse) + if not self.no_item.is_empty(): + print("{}OR".format(INDENT * indent)) + self.no_item.dump(indent + 1, reverse) + + def is_empty(self): + return (self.subpattern.is_empty() and self.yes_item.is_empty() or + self.no_item.is_empty()) + + def __eq__(self, other): + return type(self) is type(other) and (self.subpattern, self.yes_item, + self.no_item) == (other.subpattern, other.yes_item, other.no_item) + + def max_width(self): + return max(self.yes_item.max_width(), self.no_item.max_width()) + + def get_required_string(self, reverse): + return self.max_width(), None + +class PrecompiledCode(RegexBase): + def __init__(self, code): + self.code = code + + def _compile(self, reverse, fuzzy): + return [tuple(self.code)] + +class Property(RegexBase): + _opcode = {(NOCASE, False): OP.PROPERTY, (IGNORECASE, False): + OP.PROPERTY_IGN, (FULLCASE, False): OP.PROPERTY, (FULLIGNORECASE, False): + OP.PROPERTY_IGN, (NOCASE, True): OP.PROPERTY_REV, (IGNORECASE, True): + OP.PROPERTY_IGN_REV, (FULLCASE, True): OP.PROPERTY_REV, (FULLIGNORECASE, + True): OP.PROPERTY_IGN_REV} + + def __init__(self, value, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.value = value + self.positive = bool(positive) + self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] + self.zerowidth = bool(zerowidth) + + self._key = (self.__class__, self.value, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return Property(self.value, positive, case_flags, zerowidth) + + def optimise(self, info, reverse, in_set=False): + return self + + def get_firstset(self, reverse): + return set([self]) + + def has_simple_start(self): + return True + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[self.case_flags, reverse], flags, self.value)] + + def dump(self, indent, reverse): + prop = PROPERTY_NAMES[self.value >> 16] + name, value = prop[0], prop[1][self.value & 0xFFFF] + print("{}PROPERTY {} {}:{}{}".format(INDENT * indent, + POS_TEXT[self.positive], name, value, CASE_TEXT[self.case_flags])) + + def matches(self, ch): + return _regex.has_property_value(self.value, ch) == self.positive + + def max_width(self): + return 1 + +class Prune(ZeroWidthBase): + _op_name = "PRUNE" + + def _compile(self, reverse, fuzzy): + return [(OP.PRUNE, )] + +class Range(RegexBase): + _opcode = {(NOCASE, False): OP.RANGE, (IGNORECASE, False): OP.RANGE_IGN, + (FULLCASE, False): OP.RANGE, (FULLIGNORECASE, False): OP.RANGE_IGN, + (NOCASE, True): OP.RANGE_REV, (IGNORECASE, True): OP.RANGE_IGN_REV, + (FULLCASE, True): OP.RANGE_REV, (FULLIGNORECASE, True): OP.RANGE_IGN_REV} + _op_name = "RANGE" + + def __init__(self, lower, upper, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.lower = lower + self.upper = upper + self.positive = bool(positive) + self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] + self.zerowidth = bool(zerowidth) + + self._key = (self.__class__, self.lower, self.upper, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return Range(self.lower, self.upper, positive, case_flags, zerowidth) + + def optimise(self, info, reverse, in_set=False): + # Is the range case-sensitive? + if not self.positive or not (self.case_flags & IGNORECASE) or in_set: + return self + + # Is full case-folding possible? + if (not (info.flags & UNICODE) or (self.case_flags & FULLIGNORECASE) != + FULLIGNORECASE): + return self + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + # Get the folded characters in the range. + items = [] + for ch in expanding_chars: + if self.lower <= ord(ch) <= self.upper: + folded = _regex.fold_case(FULL_CASE_FOLDING, ch) + items.append(String([ord(c) for c in folded], + case_flags=self.case_flags)) + + if not items: + # We can fall back to simple case-folding. + return self + + if len(items) < self.upper - self.lower + 1: + # Not all the characters are covered by the full case-folding. + items.insert(0, self) + + return Branch(items) + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[self.case_flags, reverse], flags, self.lower, + self.upper)] + + def dump(self, indent, reverse): + display_lower = ascii(chr(self.lower)).lstrip("bu") + display_upper = ascii(chr(self.upper)).lstrip("bu") + print("{}RANGE {} {} {}{}".format(INDENT * indent, + POS_TEXT[self.positive], display_lower, display_upper, + CASE_TEXT[self.case_flags])) + + def matches(self, ch): + return (self.lower <= ch <= self.upper) == self.positive + + def max_width(self): + return 1 + +class RefGroup(RegexBase): + _opcode = {(NOCASE, False): OP.REF_GROUP, (IGNORECASE, False): + OP.REF_GROUP_IGN, (FULLCASE, False): OP.REF_GROUP, (FULLIGNORECASE, + False): OP.REF_GROUP_FLD, (NOCASE, True): OP.REF_GROUP_REV, (IGNORECASE, + True): OP.REF_GROUP_IGN_REV, (FULLCASE, True): OP.REF_GROUP_REV, + (FULLIGNORECASE, True): OP.REF_GROUP_FLD_REV} + + def __init__(self, info, group, position, case_flags=NOCASE): + RegexBase.__init__(self) + self.info = info + self.group = group + self.position = position + self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] + + self._key = self.__class__, self.group, self.case_flags + + def fix_groups(self, pattern, reverse, fuzzy): + try: + self.group = int(self.group) + except ValueError: + try: + self.group = self.info.group_index[self.group] + except KeyError: + raise error("unknown group", pattern, self.position) + + if not 1 <= self.group <= self.info.group_count: + raise error("invalid group reference", pattern, self.position) + + self._key = self.__class__, self.group, self.case_flags + + def remove_captures(self): + raise error("group reference not allowed", pattern, self.position) + + def _compile(self, reverse, fuzzy): + flags = 0 + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[self.case_flags, reverse], flags, self.group)] + + def dump(self, indent, reverse): + print("{}REF_GROUP {}{}".format(INDENT * indent, self.group, + CASE_TEXT[self.case_flags])) + + def max_width(self): + return UNLIMITED + + def __del__(self): + self.info = None + +class SearchAnchor(ZeroWidthBase): + _opcode = OP.SEARCH_ANCHOR + _op_name = "SEARCH_ANCHOR" + +class Sequence(RegexBase): + def __init__(self, items=None): + RegexBase.__init__(self) + if items is None: + items = [] + + self.items = items + + def fix_groups(self, pattern, reverse, fuzzy): + for s in self.items: + s.fix_groups(pattern, reverse, fuzzy) + + def optimise(self, info, reverse): + # Flatten the sequences. + items = [] + for s in self.items: + s = s.optimise(info, reverse) + if isinstance(s, Sequence): + items.extend(s.items) + else: + items.append(s) + + return make_sequence(items) + + def pack_characters(self, info): + "Packs sequences of characters into strings." + items = [] + characters = [] + case_flags = NOCASE + for s in self.items: + if type(s) is Character and s.positive and not s.zerowidth: + if s.case_flags != case_flags: + # Different case sensitivity, so flush, unless neither the + # previous nor the new character are cased. + if s.case_flags or is_cased_i(info, s.value): + Sequence._flush_characters(info, characters, + case_flags, items) + + case_flags = s.case_flags + + characters.append(s.value) + elif type(s) is String or type(s) is Literal: + if s.case_flags != case_flags: + # Different case sensitivity, so flush, unless the neither + # the previous nor the new string are cased. + if s.case_flags or any(is_cased_i(info, c) for c in + characters): + Sequence._flush_characters(info, characters, + case_flags, items) + + case_flags = s.case_flags + + characters.extend(s.characters) + else: + Sequence._flush_characters(info, characters, case_flags, items) + + items.append(s.pack_characters(info)) + + Sequence._flush_characters(info, characters, case_flags, items) + + return make_sequence(items) + + def remove_captures(self): + self.items = [s.remove_captures() for s in self.items] + return self + + def is_atomic(self): + return all(s.is_atomic() for s in self.items) + + def can_be_affix(self): + return False + + def contains_group(self): + return any(s.contains_group() for s in self.items) + + def get_firstset(self, reverse): + fs = set() + items = self.items + if reverse: + items.reverse() + for s in items: + fs |= s.get_firstset(reverse) + if None not in fs: + return fs + fs.discard(None) + + return fs | set([None]) + + def has_simple_start(self): + return bool(self.items) and self.items[0].has_simple_start() + + def _compile(self, reverse, fuzzy): + seq = self.items + if reverse: + seq = seq[::-1] + + code = [] + for s in seq: + code.extend(s.compile(reverse, fuzzy)) + + return code + + def dump(self, indent, reverse): + for s in self.items: + s.dump(indent, reverse) + + @staticmethod + def _flush_characters(info, characters, case_flags, items): + if not characters: + return + + # Disregard case_flags if all of the characters are case-less. + if case_flags & IGNORECASE: + if not any(is_cased_i(info, c) for c in characters): + case_flags = NOCASE + + if (case_flags & FULLIGNORECASE) == FULLIGNORECASE: + literals = Sequence._fix_full_casefold(characters) + + for item in literals: + chars = item.characters + + if len(chars) == 1: + items.append(Character(chars[0], case_flags=item.case_flags)) + else: + items.append(String(chars, case_flags=item.case_flags)) + else: + if len(characters) == 1: + items.append(Character(characters[0], case_flags=case_flags)) + else: + items.append(String(characters, case_flags=case_flags)) + + characters[:] = [] + + @staticmethod + def _fix_full_casefold(characters): + # Split a literal needing full case-folding into chunks that need it + # and chunks that can use simple case-folding, which is faster. + expanded = [_regex.fold_case(FULL_CASE_FOLDING, c) for c in + _regex.get_expand_on_folding()] + string = _regex.fold_case(FULL_CASE_FOLDING, ''.join(chr(c) + for c in characters)).lower() + chunks = [] + + for e in expanded: + found = string.find(e) + + while found >= 0: + chunks.append((found, found + len(e))) + found = string.find(e, found + 1) + + pos = 0 + literals = [] + + for start, end in Sequence._merge_chunks(chunks): + if pos < start: + literals.append(Literal(characters[pos : start], + case_flags=IGNORECASE)) + + literals.append(Literal(characters[start : end], + case_flags=FULLIGNORECASE)) + pos = end + + if pos < len(characters): + literals.append(Literal(characters[pos : ], case_flags=IGNORECASE)) + + return literals + + @staticmethod + def _merge_chunks(chunks): + if len(chunks) < 2: + return chunks + + chunks.sort() + + start, end = chunks[0] + new_chunks = [] + + for s, e in chunks[1 : ]: + if s <= end: + end = max(end, e) + else: + new_chunks.append((start, end)) + start, end = s, e + + new_chunks.append((start, end)) + + return new_chunks + + def is_empty(self): + return all(i.is_empty() for i in self.items) + + def __eq__(self, other): + return type(self) is type(other) and self.items == other.items + + def max_width(self): + return sum(s.max_width() for s in self.items) + + def get_required_string(self, reverse): + seq = self.items + if reverse: + seq = seq[::-1] + + offset = 0 + + for s in seq: + ofs, req = s.get_required_string(reverse) + offset += ofs + if req: + return offset, req + + return offset, None + +class SetBase(RegexBase): + def __init__(self, info, items, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.info = info + self.items = tuple(items) + self.positive = bool(positive) + self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] + self.zerowidth = bool(zerowidth) + + self.char_width = 1 + + self._key = (self.__class__, self.items, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return type(self)(self.info, self.items, positive, case_flags, + zerowidth).optimise(self.info, False) + + def get_firstset(self, reverse): + return set([self]) + + def has_simple_start(self): + return True + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + code = [(self._opcode[self.case_flags, reverse], flags)] + for m in self.items: + code.extend(m.compile()) + + code.append((OP.END, )) + + return code + + def dump(self, indent, reverse): + print("{}{} {}{}".format(INDENT * indent, self._op_name, + POS_TEXT[self.positive], CASE_TEXT[self.case_flags])) + for i in self.items: + i.dump(indent + 1, reverse) + + def _handle_case_folding(self, info, in_set): + # Is the set case-sensitive? + if not self.positive or not (self.case_flags & IGNORECASE) or in_set: + return self + + # Is full case-folding possible? + if (not (self.info.flags & UNICODE) or (self.case_flags & + FULLIGNORECASE) != FULLIGNORECASE): + return self + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + # Get the folded characters in the set. + items = [] + seen = set() + for ch in expanding_chars: + if self.matches(ord(ch)): + folded = _regex.fold_case(FULL_CASE_FOLDING, ch) + if folded not in seen: + items.append(String([ord(c) for c in folded], + case_flags=self.case_flags)) + seen.add(folded) + + if not items: + # We can fall back to simple case-folding. + return self + + return Branch([self] + items) + + def max_width(self): + # Is the set case-sensitive? + if not self.positive or not (self.case_flags & IGNORECASE): + return 1 + + # Is full case-folding possible? + if (not (self.info.flags & UNICODE) or (self.case_flags & + FULLIGNORECASE) != FULLIGNORECASE): + return 1 + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + # Get the folded characters in the set. + seen = set() + for ch in expanding_chars: + if self.matches(ord(ch)): + folded = _regex.fold_case(FULL_CASE_FOLDING, ch) + seen.add(folded) + + if not seen: + return 1 + + return max(len(folded) for folded in seen) + + def __del__(self): + self.info = None + +class SetDiff(SetBase): + _opcode = {(NOCASE, False): OP.SET_DIFF, (IGNORECASE, False): + OP.SET_DIFF_IGN, (FULLCASE, False): OP.SET_DIFF, (FULLIGNORECASE, False): + OP.SET_DIFF_IGN, (NOCASE, True): OP.SET_DIFF_REV, (IGNORECASE, True): + OP.SET_DIFF_IGN_REV, (FULLCASE, True): OP.SET_DIFF_REV, (FULLIGNORECASE, + True): OP.SET_DIFF_IGN_REV} + _op_name = "SET_DIFF" + + def optimise(self, info, reverse, in_set=False): + items = self.items + if len(items) > 2: + items = [items[0], SetUnion(info, items[1 : ])] + + if len(items) == 1: + return items[0].with_flags(case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, reverse, in_set) + + self.items = tuple(m.optimise(info, reverse, in_set=True) for m in + items) + + return self._handle_case_folding(info, in_set) + + def matches(self, ch): + m = self.items[0].matches(ch) and not self.items[1].matches(ch) + return m == self.positive + +class SetInter(SetBase): + _opcode = {(NOCASE, False): OP.SET_INTER, (IGNORECASE, False): + OP.SET_INTER_IGN, (FULLCASE, False): OP.SET_INTER, (FULLIGNORECASE, + False): OP.SET_INTER_IGN, (NOCASE, True): OP.SET_INTER_REV, (IGNORECASE, + True): OP.SET_INTER_IGN_REV, (FULLCASE, True): OP.SET_INTER_REV, + (FULLIGNORECASE, True): OP.SET_INTER_IGN_REV} + _op_name = "SET_INTER" + + def optimise(self, info, reverse, in_set=False): + items = [] + for m in self.items: + m = m.optimise(info, reverse, in_set=True) + if isinstance(m, SetInter) and m.positive: + # Intersection in intersection. + items.extend(m.items) + else: + items.append(m) + + if len(items) == 1: + return items[0].with_flags(case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, reverse, in_set) + + self.items = tuple(items) + + return self._handle_case_folding(info, in_set) + + def matches(self, ch): + m = all(i.matches(ch) for i in self.items) + return m == self.positive + +class SetSymDiff(SetBase): + _opcode = {(NOCASE, False): OP.SET_SYM_DIFF, (IGNORECASE, False): + OP.SET_SYM_DIFF_IGN, (FULLCASE, False): OP.SET_SYM_DIFF, (FULLIGNORECASE, + False): OP.SET_SYM_DIFF_IGN, (NOCASE, True): OP.SET_SYM_DIFF_REV, + (IGNORECASE, True): OP.SET_SYM_DIFF_IGN_REV, (FULLCASE, True): + OP.SET_SYM_DIFF_REV, (FULLIGNORECASE, True): OP.SET_SYM_DIFF_IGN_REV} + _op_name = "SET_SYM_DIFF" + + def optimise(self, info, reverse, in_set=False): + items = [] + for m in self.items: + m = m.optimise(info, reverse, in_set=True) + if isinstance(m, SetSymDiff) and m.positive: + # Symmetric difference in symmetric difference. + items.extend(m.items) + else: + items.append(m) + + if len(items) == 1: + return items[0].with_flags(case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, reverse, in_set) + + self.items = tuple(items) + + return self._handle_case_folding(info, in_set) + + def matches(self, ch): + m = False + for i in self.items: + m = m != i.matches(ch) + + return m == self.positive + +class SetUnion(SetBase): + _opcode = {(NOCASE, False): OP.SET_UNION, (IGNORECASE, False): + OP.SET_UNION_IGN, (FULLCASE, False): OP.SET_UNION, (FULLIGNORECASE, + False): OP.SET_UNION_IGN, (NOCASE, True): OP.SET_UNION_REV, (IGNORECASE, + True): OP.SET_UNION_IGN_REV, (FULLCASE, True): OP.SET_UNION_REV, + (FULLIGNORECASE, True): OP.SET_UNION_IGN_REV} + _op_name = "SET_UNION" + + def optimise(self, info, reverse, in_set=False): + items = [] + for m in self.items: + m = m.optimise(info, reverse, in_set=True) + if isinstance(m, SetUnion) and m.positive: + # Union in union. + items.extend(m.items) + else: + items.append(m) + + if len(items) == 1: + i = items[0] + return i.with_flags(positive=i.positive == self.positive, + case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, reverse, in_set) + + self.items = tuple(items) + + return self._handle_case_folding(info, in_set) + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + + characters, others = defaultdict(list), [] + for m in self.items: + if isinstance(m, Character): + characters[m.positive].append(m.value) + else: + others.append(m) + + code = [(self._opcode[self.case_flags, reverse], flags)] + + for positive, values in characters.items(): + flags = 0 + if positive: + flags |= POSITIVE_OP + if len(values) == 1: + code.append((OP.CHARACTER, flags, values[0])) + else: + code.append((OP.STRING, flags, len(values)) + tuple(values)) + + for m in others: + code.extend(m.compile()) + + code.append((OP.END, )) + + return code + + def matches(self, ch): + m = any(i.matches(ch) for i in self.items) + return m == self.positive + +class Skip(ZeroWidthBase): + _op_name = "SKIP" + _opcode = OP.SKIP + +class StartOfLine(ZeroWidthBase): + _opcode = OP.START_OF_LINE + _op_name = "START_OF_LINE" + +class StartOfLineU(StartOfLine): + _opcode = OP.START_OF_LINE_U + _op_name = "START_OF_LINE_U" + +class StartOfString(ZeroWidthBase): + _opcode = OP.START_OF_STRING + _op_name = "START_OF_STRING" + +class StartOfWord(ZeroWidthBase): + _opcode = OP.START_OF_WORD + _op_name = "START_OF_WORD" + +class String(RegexBase): + _opcode = {(NOCASE, False): OP.STRING, (IGNORECASE, False): OP.STRING_IGN, + (FULLCASE, False): OP.STRING, (FULLIGNORECASE, False): OP.STRING_FLD, + (NOCASE, True): OP.STRING_REV, (IGNORECASE, True): OP.STRING_IGN_REV, + (FULLCASE, True): OP.STRING_REV, (FULLIGNORECASE, True): + OP.STRING_FLD_REV} + + def __init__(self, characters, case_flags=NOCASE): + self.characters = tuple(characters) + self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] + + if (self.case_flags & FULLIGNORECASE) == FULLIGNORECASE: + folded_characters = [] + for char in self.characters: + folded = _regex.fold_case(FULL_CASE_FOLDING, chr(char)) + folded_characters.extend(ord(c) for c in folded) + else: + folded_characters = self.characters + + self.folded_characters = tuple(folded_characters) + self.required = False + + self._key = self.__class__, self.characters, self.case_flags + + def get_firstset(self, reverse): + if reverse: + pos = -1 + else: + pos = 0 + return set([Character(self.characters[pos], + case_flags=self.case_flags)]) + + def has_simple_start(self): + return True + + def _compile(self, reverse, fuzzy): + flags = 0 + if fuzzy: + flags |= FUZZY_OP + if self.required: + flags |= REQUIRED_OP + return [(self._opcode[self.case_flags, reverse], flags, + len(self.folded_characters)) + self.folded_characters] + + def dump(self, indent, reverse): + display = ascii("".join(chr(c) for c in self.characters)).lstrip("bu") + print("{}STRING {}{}".format(INDENT * indent, display, + CASE_TEXT[self.case_flags])) + + def max_width(self): + return len(self.folded_characters) + + def get_required_string(self, reverse): + return 0, self + +class Literal(String): + def dump(self, indent, reverse): + literal = ''.join(chr(c) for c in self.characters) + display = ascii(literal).lstrip("bu") + print("{}LITERAL MATCH {}{}".format(INDENT * indent, display, + CASE_TEXT[self.case_flags])) + +class StringSet(Branch): + def __init__(self, info, name, case_flags=NOCASE): + self.info = info + self.name = name + self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags] + + self._key = self.__class__, self.name, self.case_flags + + self.set_key = (name, self.case_flags) + if self.set_key not in info.named_lists_used: + info.named_lists_used[self.set_key] = len(info.named_lists_used) + + index = self.info.named_lists_used[self.set_key] + items = self.info.kwargs[self.name] + + case_flags = self.case_flags + + encoding = self.info.flags & _ALL_ENCODINGS + fold_flags = encoding | case_flags + + choices = [] + + for string in items: + if isinstance(string, str): + string = [ord(c) for c in string] + + choices.append([Character(c, case_flags=case_flags) for c in + string]) + + # Sort from longest to shortest. + choices.sort(key=len, reverse=True) + + self.branches = [Sequence(choice) for choice in choices] + + def dump(self, indent, reverse): + print("{}STRING_SET {}{}".format(INDENT * indent, self.name, + CASE_TEXT[self.case_flags])) + + def __del__(self): + self.info = None + +class Source: + "Scanner for the regular expression source string." + def __init__(self, string): + if isinstance(string, str): + self.string = string + self.char_type = chr + else: + self.string = string.decode("latin-1") + self.char_type = lambda c: bytes([c]) + + self.pos = 0 + self.ignore_space = False + self.sep = string[ : 0] + + def get(self, override_ignore=False): + string = self.string + pos = self.pos + + try: + if self.ignore_space and not override_ignore: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + else: + break + + ch = string[pos] + self.pos = pos + 1 + return ch + except IndexError: + # We've reached the end of the string. + self.pos = pos + return string[ : 0] + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + return string[ : 0] + + def get_many(self, count=1): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + substring = [] + + while len(substring) < count: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + else: + break + + substring.append(string[pos]) + pos += 1 + + substring = "".join(substring) + else: + substring = string[pos : pos + count] + pos += len(substring) + + self.pos = pos + return substring + except IndexError: + # We've reached the end of the string. + self.pos = len(string) + return "".join(substring) + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + return "".join(substring) + + def get_while(self, test_set, include=True): + string = self.string + pos = self.pos + + if self.ignore_space: + try: + substring = [] + + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + elif (string[pos] in test_set) == include: + substring.append(string[pos]) + pos += 1 + else: + break + + self.pos = pos + except IndexError: + # We've reached the end of the string. + self.pos = len(string) + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + + return "".join(substring) + else: + try: + while (string[pos] in test_set) == include: + pos += 1 + + substring = string[self.pos : pos] + + self.pos = pos + + return substring + except IndexError: + # We've reached the end of the string. + substring = string[self.pos : pos] + + self.pos = pos + + return substring + + def skip_while(self, test_set, include=True): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + elif (string[pos] in test_set) == include: + pos += 1 + else: + break + else: + while (string[pos] in test_set) == include: + pos += 1 + + self.pos = pos + except IndexError: + # We've reached the end of the string. + self.pos = len(string) + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + + def match(self, substring): + string = self.string + pos = self.pos + + if self.ignore_space: + try: + for c in substring: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + else: + break + + if string[pos] != c: + return False + + pos += 1 + + self.pos = pos + + return True + except IndexError: + # We've reached the end of the string. + return False + except ValueError: + # The comment extended to the end of the string. + return False + else: + if not string.startswith(substring, pos): + return False + + self.pos = pos + len(substring) + + return True + + def expect(self, substring): + if not self.match(substring): + raise error("missing {}".format(substring), self.string, self.pos) + + def at_end(self): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + while True: + if string[pos].isspace(): + pos += 1 + elif string[pos] == "#": + pos = string.index("\n", pos) + else: + break + + return pos >= len(string) + except IndexError: + # We've reached the end of the string. + return True + except ValueError: + # The comment extended to the end of the string. + return True + +class Info: + "Info about the regular expression." + + def __init__(self, flags=0, char_type=None, kwargs={}): + flags |= DEFAULT_FLAGS[(flags & _ALL_VERSIONS) or DEFAULT_VERSION] + self.flags = flags + self.global_flags = flags + self.inline_locale = False + + self.kwargs = kwargs + + self.group_count = 0 + self.group_index = {} + self.group_name = {} + self.char_type = char_type + self.named_lists_used = {} + self.open_groups = [] + self.open_group_count = {} + self.defined_groups = {} + self.group_calls = [] + self.private_groups = {} + + def open_group(self, name=None): + group = self.group_index.get(name) + if group is None: + while True: + self.group_count += 1 + if name is None or self.group_count not in self.group_name: + break + + group = self.group_count + if name: + self.group_index[name] = group + self.group_name[group] = name + + if group in self.open_groups: + # We have a nested named group. We'll assign it a private group + # number, initially negative until we can assign a proper + # (positive) number. + group_alias = -(len(self.private_groups) + 1) + self.private_groups[group_alias] = group + group = group_alias + + self.open_groups.append(group) + self.open_group_count[group] = self.open_group_count.get(group, 0) + 1 + + return group + + def close_group(self): + self.open_groups.pop() + + def is_open_group(self, name): + # In version 1, a group reference can refer to an open group. We'll + # just pretend the group isn't open. + version = (self.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version == VERSION1: + return False + + if name.isdigit(): + group = int(name) + else: + group = self.group_index.get(name) + + return group in self.open_groups + +def _check_group_features(info, parsed): + """Checks whether the reverse and fuzzy features of the group calls match + the groups which they call. + """ + call_refs = {} + additional_groups = [] + for call, reverse, fuzzy in info.group_calls: + # Look up the reference of this group call. + key = (call.group, reverse, fuzzy) + ref = call_refs.get(key) + if ref is None: + # This group doesn't have a reference yet, so look up its features. + if call.group == 0: + # Calling the pattern as a whole. + rev = bool(info.flags & REVERSE) + fuz = isinstance(parsed, Fuzzy) + if (rev, fuz) != (reverse, fuzzy): + # The pattern as a whole doesn't have the features we want, + # so we'll need to make a copy of it with the desired + # features. + additional_groups.append((CallRef(len(call_refs), parsed), + reverse, fuzzy)) + else: + # Calling a capture group. + def_info = info.defined_groups[call.group] + group = def_info[0] + if def_info[1 : ] != (reverse, fuzzy): + # The group doesn't have the features we want, so we'll + # need to make a copy of it with the desired features. + additional_groups.append((group, reverse, fuzzy)) + + ref = len(call_refs) + call_refs[key] = ref + + call.call_ref = ref + + info.call_refs = call_refs + info.additional_groups = additional_groups + +def _get_required_string(parsed, flags): + "Gets the required string and related info of a parsed pattern." + + req_offset, required = parsed.get_required_string(bool(flags & REVERSE)) + if required: + required.required = True + if req_offset >= UNLIMITED: + req_offset = -1 + + req_flags = required.case_flags + if not (flags & UNICODE): + req_flags &= ~UNICODE + + req_chars = required.folded_characters + else: + req_offset = 0 + req_chars = () + req_flags = 0 + + return req_offset, req_chars, req_flags + +class Scanner: + def __init__(self, lexicon, flags=0): + self.lexicon = lexicon + + # Combine phrases into a compound pattern. + patterns = [] + for phrase, action in lexicon: + # Parse the regular expression. + source = Source(phrase) + info = Info(flags, source.char_type) + source.ignore_space = bool(info.flags & VERBOSE) + parsed = _parse_pattern(source, info) + if not source.at_end(): + raise error("unbalanced parenthesis", source.string, + source.pos) + + # We want to forbid capture groups within each phrase. + patterns.append(parsed.remove_captures()) + + # Combine all the subpatterns into one pattern. + info = Info(flags) + patterns = [Group(info, g + 1, p) for g, p in enumerate(patterns)] + parsed = Branch(patterns) + + # Optimise the compound pattern. + reverse = bool(info.flags & REVERSE) + parsed = parsed.optimise(info, reverse) + parsed = parsed.pack_characters(info) + + # Get the required string. + req_offset, req_chars, req_flags = _get_required_string(parsed, + info.flags) + + # Check the features of the groups. + _check_group_features(info, parsed) + + # Complain if there are any group calls. They are not supported by the + # Scanner class. + if info.call_refs: + raise error("recursive regex not supported by Scanner", + source.string, source.pos) + + reverse = bool(info.flags & REVERSE) + + # Compile the compound pattern. The result is a list of tuples. + code = parsed.compile(reverse) + [(OP.SUCCESS, )] + + # Flatten the code into a list of ints. + code = _flatten_code(code) + + if not parsed.has_simple_start(): + # Get the first set, if possible. + try: + fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) + fs_code = _flatten_code(fs_code) + code = fs_code + code + except _FirstSetError: + pass + + # Check the global flags for conflicts. + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version not in (0, VERSION0, VERSION1): + raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") + + # Create the PatternObject. + # + # Local flags like IGNORECASE affect the code generation, but aren't + # needed by the PatternObject itself. Conversely, global flags like + # LOCALE _don't_ affect the code generation but _are_ needed by the + # PatternObject. + self.scanner = _regex.compile(None, (flags & GLOBAL_FLAGS) | version, + code, {}, {}, {}, [], req_offset, req_chars, req_flags, + len(patterns)) + + def scan(self, string): + result = [] + append = result.append + match = self.scanner.scanner(string).match + i = 0 + while True: + m = match() + if not m: + break + j = m.end() + if i == j: + break + action = self.lexicon[m.lastindex - 1][1] + if hasattr(action, '__call__'): + self.match = m + action = action(self, m.group()) + if action is not None: + append(action) + i = j + + return result, string[i : ] + +# Get the known properties dict. +PROPERTIES = _regex.get_properties() + +# Build the inverse of the properties dict. +PROPERTY_NAMES = {} +for prop_name, (prop_id, values) in PROPERTIES.items(): + name, prop_values = PROPERTY_NAMES.get(prop_id, ("", {})) + name = max(name, prop_name, key=len) + PROPERTY_NAMES[prop_id] = name, prop_values + + for val_name, val_id in values.items(): + prop_values[val_id] = max(prop_values.get(val_id, ""), val_name, + key=len) + +# Character escape sequences. +CHARACTER_ESCAPES = { + "a": "\a", + "b": "\b", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", +} + +# Predefined character set escape sequences. +CHARSET_ESCAPES = { + "d": lookup_property(None, "Digit", True), + "D": lookup_property(None, "Digit", False), + "h": lookup_property(None, "Blank", True), + "s": lookup_property(None, "Space", True), + "S": lookup_property(None, "Space", False), + "w": lookup_property(None, "Word", True), + "W": lookup_property(None, "Word", False), +} + +# Positional escape sequences. +POSITION_ESCAPES = { + "A": StartOfString(), + "b": Boundary(), + "B": Boundary(False), + "K": Keep(), + "m": StartOfWord(), + "M": EndOfWord(), + "Z": EndOfString(), +} + +# Positional escape sequences when WORD flag set. +WORD_POSITION_ESCAPES = dict(POSITION_ESCAPES) +WORD_POSITION_ESCAPES.update({ + "b": DefaultBoundary(), + "B": DefaultBoundary(False), + "m": DefaultStartOfWord(), + "M": DefaultEndOfWord(), +}) + +# Regex control verbs. +VERBS = { + "FAIL": Failure(), + "F": Failure(), + "PRUNE": Prune(), + "SKIP": Skip(), +} diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/regex.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/regex.py new file mode 100644 index 0000000000000000000000000000000000000000..7412514ca30b0a6bb76253919b194147585cd119 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/regex.py @@ -0,0 +1,742 @@ +# +# Secret Labs' Regular Expression Engine +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# This version of the SRE library can be redistributed under CNRI's +# Python 1.6 license. For any other use, please contact Secret Labs +# AB (info@pythonware.com). +# +# Portions of this engine have been developed in cooperation with +# CNRI. Hewlett-Packard provided funding for 1.6 integration and +# other compatibility work. +# +# 2010-01-16 mrab Python front-end re-written and extended + +r"""Support for regular expressions (RE). + +This module provides regular expression matching operations similar to those +found in Perl. It supports both 8-bit and Unicode strings; both the pattern and +the strings being processed can contain null bytes and characters outside the +US ASCII range. + +Regular expressions can contain both special and ordinary characters. Most +ordinary characters, like "A", "a", or "0", are the simplest regular +expressions; they simply match themselves. You can concatenate ordinary +characters, so last matches the string 'last'. + +There are a few differences between the old (legacy) behaviour and the new +(enhanced) behaviour, which are indicated by VERSION0 or VERSION1. + +The special characters are: + "." Matches any character except a newline. + "^" Matches the start of the string. + "$" Matches the end of the string or just before the + newline at the end of the string. + "*" Matches 0 or more (greedy) repetitions of the preceding + RE. Greedy means that it will match as many repetitions + as possible. + "+" Matches 1 or more (greedy) repetitions of the preceding + RE. + "?" Matches 0 or 1 (greedy) of the preceding RE. + *?,+?,?? Non-greedy versions of the previous three special + characters. + *+,++,?+ Possessive versions of the previous three special + characters. + {m,n} Matches from m to n repetitions of the preceding RE. + {m,n}? Non-greedy version of the above. + {m,n}+ Possessive version of the above. + {...} Fuzzy matching constraints. + "\\" Either escapes special characters or signals a special + sequence. + [...] Indicates a set of characters. A "^" as the first + character indicates a complementing set. + "|" A|B, creates an RE that will match either A or B. + (...) Matches the RE inside the parentheses. The contents are + captured and can be retrieved or matched later in the + string. + (?flags-flags) VERSION1: Sets/clears the flags for the remainder of + the group or pattern; VERSION0: Sets the flags for the + entire pattern. + (?:...) Non-capturing version of regular parentheses. + (?>...) Atomic non-capturing version of regular parentheses. + (?flags-flags:...) Non-capturing version of regular parentheses with local + flags. + (?P...) The substring matched by the group is accessible by + name. + (?...) The substring matched by the group is accessible by + name. + (?P=name) Matches the text matched earlier by the group named + name. + (?#...) A comment; ignored. + (?=...) Matches if ... matches next, but doesn't consume the + string. + (?!...) Matches if ... doesn't match next. + (?<=...) Matches if preceded by .... + (? Matches the text matched by the group named name. + \G Matches the empty string, but only at the position where + the search started. + \h Matches horizontal whitespace. + \K Keeps only what follows for the entire match. + \L Named list. The list is provided as a keyword argument. + \m Matches the empty string, but only at the start of a word. + \M Matches the empty string, but only at the end of a word. + \n Matches the newline character. + \N{name} Matches the named character. + \p{name=value} Matches the character if its property has the specified + value. + \P{name=value} Matches the character if its property hasn't the specified + value. + \r Matches the carriage-return character. + \s Matches any whitespace character; equivalent to + [ \t\n\r\f\v]. + \S Matches any non-whitespace character; equivalent to [^\s]. + \t Matches the tab character. + \uXXXX Matches the Unicode codepoint with 4-digit hex code XXXX. + \UXXXXXXXX Matches the Unicode codepoint with 8-digit hex code + XXXXXXXX. + \v Matches the vertical tab character. + \w Matches any alphanumeric character; equivalent to + [a-zA-Z0-9_] when matching a bytestring or a Unicode string + with the ASCII flag, or the whole range of Unicode + alphanumeric characters (letters plus digits plus + underscore) when matching a Unicode string. With LOCALE, it + will match the set [0-9_] plus characters defined as + letters for the current locale. + \W Matches the complement of \w; equivalent to [^\w]. + \xXX Matches the character with 2-digit hex code XX. + \X Matches a grapheme. + \Z Matches only at the end of the string. + \\ Matches a literal backslash. + +This module exports the following functions: + match Match a regular expression pattern at the beginning of a string. + fullmatch Match a regular expression pattern against all of a string. + search Search a string for the presence of a pattern. + sub Substitute occurrences of a pattern found in a string using a + template string. + subf Substitute occurrences of a pattern found in a string using a + format string. + subn Same as sub, but also return the number of substitutions made. + subfn Same as subf, but also return the number of substitutions made. + split Split a string by the occurrences of a pattern. VERSION1: will + split at zero-width match; VERSION0: won't split at zero-width + match. + splititer Return an iterator yielding the parts of a split string. + findall Find all occurrences of a pattern in a string. + finditer Return an iterator yielding a match object for each match. + compile Compile a pattern into a Pattern object. + purge Clear the regular expression cache. + escape Backslash all non-alphanumerics or special characters in a + string. + +Most of the functions support a concurrent parameter: if True, the GIL will be +released during matching, allowing other Python threads to run concurrently. If +the string changes during matching, the behaviour is undefined. This parameter +is not needed when working on the builtin (immutable) string classes. + +Some of the functions in this module take flags as optional parameters. Most of +these flags can also be set within an RE: + A a ASCII Make \w, \W, \b, \B, \d, and \D match the + corresponding ASCII character categories. Default + when matching a bytestring. + B b BESTMATCH Find the best fuzzy match (default is first). + D DEBUG Print the parsed pattern. + E e ENHANCEMATCH Attempt to improve the fit after finding the first + fuzzy match. + F f FULLCASE Use full case-folding when performing + case-insensitive matching in Unicode. + I i IGNORECASE Perform case-insensitive matching. + L L LOCALE Make \w, \W, \b, \B, \d, and \D dependent on the + current locale. (One byte per character only.) + M m MULTILINE "^" matches the beginning of lines (after a newline) + as well as the string. "$" matches the end of lines + (before a newline) as well as the end of the string. + P p POSIX Perform POSIX-standard matching (leftmost longest). + R r REVERSE Searches backwards. + S s DOTALL "." matches any character at all, including the + newline. + U u UNICODE Make \w, \W, \b, \B, \d, and \D dependent on the + Unicode locale. Default when matching a Unicode + string. + V0 V0 VERSION0 Turn on the old legacy behaviour. + V1 V1 VERSION1 Turn on the new enhanced behaviour. This flag + includes the FULLCASE flag. + W w WORD Make \b and \B work with default Unicode word breaks + and make ".", "^" and "$" work with Unicode line + breaks. + X x VERBOSE Ignore whitespace and comments for nicer looking REs. + +This module also defines an exception 'error'. + +""" + +# Public symbols. +__all__ = ["cache_all", "compile", "DEFAULT_VERSION", "escape", "findall", + "finditer", "fullmatch", "match", "purge", "search", "split", "splititer", + "sub", "subf", "subfn", "subn", "template", "Scanner", "A", "ASCII", "B", + "BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH", "S", "DOTALL", "F", + "FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "P", "POSIX", + "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE", "V0", "VERSION0", "V1", + "VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__", + "__doc__", "RegexFlag"] + +__version__ = "2.5.115" + +# -------------------------------------------------------------------- +# Public interface. + +def match(pattern, string, flags=0, pos=None, endpos=None, partial=False, + concurrent=None, timeout=None, ignore_unused=False, **kwargs): + """Try to apply the pattern at the start of the string, returning a match + object, or None if no match was found.""" + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.match(string, pos, endpos, concurrent, partial, timeout) + +def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False, + concurrent=None, timeout=None, ignore_unused=False, **kwargs): + """Try to apply the pattern against all of the string, returning a match + object, or None if no match was found.""" + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.fullmatch(string, pos, endpos, concurrent, partial, timeout) + +def search(pattern, string, flags=0, pos=None, endpos=None, partial=False, + concurrent=None, timeout=None, ignore_unused=False, **kwargs): + """Search through string looking for a match to the pattern, returning a + match object, or None if no match was found.""" + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.search(string, pos, endpos, concurrent, partial, timeout) + +def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, timeout=None, ignore_unused=False, **kwargs): + """Return the string obtained by replacing the leftmost (or rightmost with a + reverse pattern) non-overlapping occurrences of the pattern in string by the + replacement repl. repl can be either a string or a callable; if a string, + backslash escapes in it are processed; if a callable, it's passed the match + object and must return a replacement string to be used.""" + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.sub(repl, string, count, pos, endpos, concurrent, timeout) + +def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, timeout=None, ignore_unused=False, **kwargs): + """Return the string obtained by replacing the leftmost (or rightmost with a + reverse pattern) non-overlapping occurrences of the pattern in string by the + replacement format. format can be either a string or a callable; if a string, + it's treated as a format string; if a callable, it's passed the match object + and must return a replacement string to be used.""" + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.subf(format, string, count, pos, endpos, concurrent, timeout) + +def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, timeout=None, ignore_unused=False, **kwargs): + """Return a 2-tuple containing (new_string, number). new_string is the string + obtained by replacing the leftmost (or rightmost with a reverse pattern) + non-overlapping occurrences of the pattern in the source string by the + replacement repl. number is the number of substitutions that were made. repl + can be either a string or a callable; if a string, backslash escapes in it + are processed; if a callable, it's passed the match object and must return a + replacement string to be used.""" + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.subn(repl, string, count, pos, endpos, concurrent, timeout) + +def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, timeout=None, ignore_unused=False, **kwargs): + """Return a 2-tuple containing (new_string, number). new_string is the string + obtained by replacing the leftmost (or rightmost with a reverse pattern) + non-overlapping occurrences of the pattern in the source string by the + replacement format. number is the number of substitutions that were made. format + can be either a string or a callable; if a string, it's treated as a format + string; if a callable, it's passed the match object and must return a + replacement string to be used.""" + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.subfn(format, string, count, pos, endpos, concurrent, timeout) + +def split(pattern, string, maxsplit=0, flags=0, concurrent=None, timeout=None, + ignore_unused=False, **kwargs): + """Split the source string by the occurrences of the pattern, returning a + list containing the resulting substrings. If capturing parentheses are used + in pattern, then the text of all groups in the pattern are also returned as + part of the resulting list. If maxsplit is nonzero, at most maxsplit splits + occur, and the remainder of the string is returned as the final element of + the list.""" + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.split(string, maxsplit, concurrent, timeout) + +def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None, + timeout=None, ignore_unused=False, **kwargs): + "Return an iterator yielding the parts of a split string." + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.splititer(string, maxsplit, concurrent, timeout) + +def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, + concurrent=None, timeout=None, ignore_unused=False, **kwargs): + """Return a list of all matches in the string. The matches may be overlapped + if overlapped is True. If one or more groups are present in the pattern, + return a list of groups; this will be a list of tuples if the pattern has + more than one group. Empty matches are included in the result.""" + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.findall(string, pos, endpos, overlapped, concurrent, timeout) + +def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, + partial=False, concurrent=None, timeout=None, ignore_unused=False, **kwargs): + """Return an iterator over all matches in the string. The matches may be + overlapped if overlapped is True. For each match, the iterator returns a + match object. Empty matches are included in the result.""" + pat = _compile(pattern, flags, ignore_unused, kwargs, True) + return pat.finditer(string, pos, endpos, overlapped, concurrent, partial, + timeout) + +def compile(pattern, flags=0, ignore_unused=False, cache_pattern=None, **kwargs): + "Compile a regular expression pattern, returning a pattern object." + if cache_pattern is None: + cache_pattern = _cache_all + return _compile(pattern, flags, ignore_unused, kwargs, cache_pattern) + +def purge(): + "Clear the regular expression cache" + _cache.clear() + _locale_sensitive.clear() + +# Whether to cache all patterns. +_cache_all = True + +def cache_all(value=True): + """Sets whether to cache all patterns, even those are compiled explicitly. + Passing None has no effect, but returns the current setting.""" + global _cache_all + + if value is None: + return _cache_all + + _cache_all = value + +def template(pattern, flags=0): + "Compile a template pattern, returning a pattern object." + return _compile(pattern, flags | TEMPLATE, False, {}, False) + +def escape(pattern, special_only=True, literal_spaces=False): + """Escape a string for use as a literal in a pattern. If special_only is + True, escape only special characters, else escape all non-alphanumeric + characters. If literal_spaces is True, don't escape spaces.""" + # Convert it to Unicode. + if isinstance(pattern, bytes): + p = pattern.decode("latin-1") + else: + p = pattern + + s = [] + if special_only: + for c in p: + if c == " " and literal_spaces: + s.append(c) + elif c in _METACHARS or c.isspace(): + s.append("\\") + s.append(c) + elif c == "\x00": + s.append("\\000") + else: + s.append(c) + else: + for c in p: + if c == " " and literal_spaces: + s.append(c) + elif c in _ALNUM: + s.append(c) + elif c == "\x00": + s.append("\\000") + else: + s.append("\\") + s.append(c) + + r = "".join(s) + # Convert it back to bytes if necessary. + if isinstance(pattern, bytes): + r = r.encode("latin-1") + + return r + +# -------------------------------------------------------------------- +# Internals. + +import regex._regex_core as _regex_core +import regex._regex as _regex +from threading import RLock as _RLock +from locale import getpreferredencoding as _getpreferredencoding +from regex._regex_core import * +from regex._regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError, + _UnscopedFlagSet, _check_group_features, _compile_firstset, + _compile_replacement, _flatten_code, _fold_case, _get_required_string, + _parse_pattern, _shrink_cache) +from regex._regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source + as _Source, Fuzzy as _Fuzzy) + +# Version 0 is the old behaviour, compatible with the original 're' module. +# Version 1 is the new behaviour, which differs slightly. + +DEFAULT_VERSION = VERSION0 + +_METACHARS = frozenset("()[]{}?*+|^$\\.-#&~") + +_regex_core.DEFAULT_VERSION = DEFAULT_VERSION + +# Caches for the patterns and replacements. +_cache = {} +_cache_lock = _RLock() +_named_args = {} +_replacement_cache = {} +_locale_sensitive = {} + +# Maximum size of the cache. +_MAXCACHE = 500 +_MAXREPCACHE = 500 + +def _compile(pattern, flags, ignore_unused, kwargs, cache_it): + "Compiles a regular expression to a PatternObject." + + global DEFAULT_VERSION + try: + from regex import DEFAULT_VERSION + except ImportError: + pass + + # We won't bother to cache the pattern if we're debugging. + if (flags & DEBUG) != 0: + cache_it = False + + # What locale is this pattern using? + locale_key = (type(pattern), pattern) + if _locale_sensitive.get(locale_key, True) or (flags & LOCALE) != 0: + # This pattern is, or might be, locale-sensitive. + pattern_locale = _getpreferredencoding() + else: + # This pattern is definitely not locale-sensitive. + pattern_locale = None + + if cache_it: + try: + # Do we know what keyword arguments are needed? + args_key = pattern, type(pattern), flags + args_needed = _named_args[args_key] + + # Are we being provided with its required keyword arguments? + args_supplied = set() + if args_needed: + for k, v in args_needed: + try: + args_supplied.add((k, frozenset(kwargs[k]))) + except KeyError: + raise error("missing named list: {!r}".format(k)) + + args_supplied = frozenset(args_supplied) + + # Have we already seen this regular expression and named list? + pattern_key = (pattern, type(pattern), flags, args_supplied, + DEFAULT_VERSION, pattern_locale) + return _cache[pattern_key] + except KeyError: + # It's a new pattern, or new named list for a known pattern. + pass + + # Guess the encoding from the class of the pattern string. + if isinstance(pattern, str): + guess_encoding = UNICODE + elif isinstance(pattern, bytes): + guess_encoding = ASCII + elif isinstance(pattern, Pattern): + if flags: + raise ValueError("cannot process flags argument with a compiled pattern") + + return pattern + else: + raise TypeError("first argument must be a string or compiled pattern") + + # Set the default version in the core code in case it has been changed. + _regex_core.DEFAULT_VERSION = DEFAULT_VERSION + + global_flags = flags + + while True: + caught_exception = None + try: + source = _Source(pattern) + info = _Info(global_flags, source.char_type, kwargs) + info.guess_encoding = guess_encoding + source.ignore_space = bool(info.flags & VERBOSE) + parsed = _parse_pattern(source, info) + break + except _UnscopedFlagSet: + # Remember the global flags for the next attempt. + global_flags = info.global_flags + except error as e: + caught_exception = e + + if caught_exception: + raise error(caught_exception.msg, caught_exception.pattern, + caught_exception.pos) + + if not source.at_end(): + raise error("unbalanced parenthesis", pattern, source.pos) + + # Check the global flags for conflicts. + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version not in (0, VERSION0, VERSION1): + raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") + + if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE): + raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible") + + if isinstance(pattern, bytes) and (info.flags & UNICODE): + raise ValueError("cannot use UNICODE flag with a bytes pattern") + + if not (info.flags & _ALL_ENCODINGS): + if isinstance(pattern, str): + info.flags |= UNICODE + else: + info.flags |= ASCII + + reverse = bool(info.flags & REVERSE) + fuzzy = isinstance(parsed, _Fuzzy) + + # Remember whether this pattern as an inline locale flag. + _locale_sensitive[locale_key] = info.inline_locale + + # Fix the group references. + caught_exception = None + try: + parsed.fix_groups(pattern, reverse, False) + except error as e: + caught_exception = e + + if caught_exception: + raise error(caught_exception.msg, caught_exception.pattern, + caught_exception.pos) + + # Should we print the parsed pattern? + if flags & DEBUG: + parsed.dump(indent=0, reverse=reverse) + + # Optimise the parsed pattern. + parsed = parsed.optimise(info, reverse) + parsed = parsed.pack_characters(info) + + # Get the required string. + req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags) + + # Build the named lists. + named_lists = {} + named_list_indexes = [None] * len(info.named_lists_used) + args_needed = set() + for key, index in info.named_lists_used.items(): + name, case_flags = key + values = frozenset(kwargs[name]) + if case_flags: + items = frozenset(_fold_case(info, v) for v in values) + else: + items = values + named_lists[name] = values + named_list_indexes[index] = items + args_needed.add((name, values)) + + # Any unused keyword arguments, possibly resulting from a typo? + unused_kwargs = set(kwargs) - set(named_lists) + if unused_kwargs and not ignore_unused: + any_one = next(iter(unused_kwargs)) + raise ValueError('unused keyword argument {!a}'.format(any_one)) + + # Check the features of the groups. + _check_group_features(info, parsed) + + # Compile the parsed pattern. The result is a list of tuples. + code = parsed.compile(reverse) + + # Is there a group call to the pattern as a whole? + key = (0, reverse, fuzzy) + ref = info.call_refs.get(key) + if ref is not None: + code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )] + + # Add the final 'success' opcode. + code += [(_OP.SUCCESS, )] + + # Compile the additional copies of the groups that we need. + for group, rev, fuz in info.additional_groups: + code += group.compile(rev, fuz) + + # Flatten the code into a list of ints. + code = _flatten_code(code) + + if not parsed.has_simple_start(): + # Get the first set, if possible. + try: + fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) + fs_code = _flatten_code(fs_code) + code = fs_code + code + except _FirstSetError: + pass + + # The named capture groups. + index_group = dict((v, n) for n, v in info.group_index.items()) + + # Create the PatternObject. + # + # Local flags like IGNORECASE affect the code generation, but aren't needed + # by the PatternObject itself. Conversely, global flags like LOCALE _don't_ + # affect the code generation but _are_ needed by the PatternObject. + compiled_pattern = _regex.compile(pattern, info.flags | version, code, + info.group_index, index_group, named_lists, named_list_indexes, + req_offset, req_chars, req_flags, info.group_count) + + # Do we need to reduce the size of the cache? + if len(_cache) >= _MAXCACHE: + with _cache_lock: + _shrink_cache(_cache, _named_args, _locale_sensitive, _MAXCACHE) + + if cache_it: + if (info.flags & LOCALE) == 0: + pattern_locale = None + + args_needed = frozenset(args_needed) + + # Store this regular expression and named list. + pattern_key = (pattern, type(pattern), flags, args_needed, + DEFAULT_VERSION, pattern_locale) + _cache[pattern_key] = compiled_pattern + + # Store what keyword arguments are needed. + _named_args[args_key] = args_needed + + return compiled_pattern + +def _compile_replacement_helper(pattern, template): + "Compiles a replacement template." + # This function is called by the _regex module. + + # Have we seen this before? + key = pattern.pattern, pattern.flags, template + compiled = _replacement_cache.get(key) + if compiled is not None: + return compiled + + if len(_replacement_cache) >= _MAXREPCACHE: + _replacement_cache.clear() + + is_unicode = isinstance(template, str) + source = _Source(template) + if is_unicode: + def make_string(char_codes): + return "".join(chr(c) for c in char_codes) + else: + def make_string(char_codes): + return bytes(char_codes) + + compiled = [] + literal = [] + while True: + ch = source.get() + if not ch: + break + if ch == "\\": + # '_compile_replacement' will return either an int group reference + # or a string literal. It returns items (plural) in order to handle + # a 2-character literal (an invalid escape sequence). + is_group, items = _compile_replacement(source, pattern, is_unicode) + if is_group: + # It's a group, so first flush the literal. + if literal: + compiled.append(make_string(literal)) + literal = [] + compiled.extend(items) + else: + literal.extend(items) + else: + literal.append(ord(ch)) + + # Flush the literal. + if literal: + compiled.append(make_string(literal)) + + _replacement_cache[key] = compiled + + return compiled + +# We define Pattern here after all the support objects have been defined. +_pat = _compile('', 0, False, {}, False) +Pattern = type(_pat) +Match = type(_pat.match('')) +del _pat + +# Make Pattern public for typing annotations. +__all__.append("Pattern") +__all__.append("Match") + +# We'll define an alias for the 'compile' function so that the repr of a +# pattern object is eval-able. +Regex = compile + +# Register myself for pickling. +import copyreg as _copy_reg + +def _pickle(pattern): + return _regex.compile, pattern._pickled_data + +_copy_reg.pickle(Pattern, _pickle) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/test_regex.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/test_regex.py new file mode 100644 index 0000000000000000000000000000000000000000..36b85bb1cae6a83709a090451c3d2c2978f0f02b --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/regex/test_regex.py @@ -0,0 +1,4439 @@ +from weakref import proxy +import copy +import pickle +import regex +import string +import sys +import unittest + +# String subclasses for issue 18468. +class StrSubclass(str): + def __getitem__(self, index): + return StrSubclass(super().__getitem__(index)) + +class BytesSubclass(bytes): + def __getitem__(self, index): + return BytesSubclass(super().__getitem__(index)) + +class RegexTests(unittest.TestCase): + PATTERN_CLASS = "" + FLAGS_WITH_COMPILED_PAT = "cannot process flags argument with a compiled pattern" + INVALID_GROUP_REF = "invalid group reference" + MISSING_GT = "missing >" + BAD_GROUP_NAME = "bad character in group name" + MISSING_GROUP_NAME = "missing group name" + MISSING_LT = "missing <" + UNKNOWN_GROUP_I = "unknown group" + UNKNOWN_GROUP = "unknown group" + BAD_ESCAPE = r"bad escape \(end of pattern\)" + BAD_OCTAL_ESCAPE = r"bad escape \\" + BAD_SET = "unterminated character set" + STR_PAT_ON_BYTES = "cannot use a string pattern on a bytes-like object" + BYTES_PAT_ON_STR = "cannot use a bytes pattern on a string-like object" + STR_PAT_BYTES_TEMPL = "expected str instance, bytes found" + BYTES_PAT_STR_TEMPL = "expected a bytes-like object, str found" + BYTES_PAT_UNI_FLAG = "cannot use UNICODE flag with a bytes pattern" + MIXED_FLAGS = "ASCII, LOCALE and UNICODE flags are mutually incompatible" + MISSING_RPAREN = "missing \\)" + TRAILING_CHARS = "unbalanced parenthesis" + BAD_CHAR_RANGE = "bad character range" + NOTHING_TO_REPEAT = "nothing to repeat" + MULTIPLE_REPEAT = "multiple repeat" + OPEN_GROUP = "cannot refer to an open group" + DUPLICATE_GROUP = "duplicate group" + CANT_TURN_OFF = "bad inline flags: cannot turn flags off" + UNDEF_CHAR_NAME = "undefined character name" + + def assertTypedEqual(self, actual, expect, msg=None): + self.assertEqual(actual, expect, msg) + + def recurse(actual, expect): + if isinstance(expect, (tuple, list)): + for x, y in zip(actual, expect): + recurse(x, y) + else: + self.assertIs(type(actual), type(expect), msg) + + recurse(actual, expect) + + def test_weakref(self): + s = 'QabbbcR' + x = regex.compile('ab+c') + y = proxy(x) + if x.findall('QabbbcR') != y.findall('QabbbcR'): + self.fail() + + def test_search_star_plus(self): + self.assertEqual(regex.search('a*', 'xxx').span(0), (0, 0)) + self.assertEqual(regex.search('x*', 'axx').span(), (0, 0)) + self.assertEqual(regex.search('x+', 'axx').span(0), (1, 3)) + self.assertEqual(regex.search('x+', 'axx').span(), (1, 3)) + self.assertEqual(regex.search('x', 'aaa'), None) + self.assertEqual(regex.match('a*', 'xxx').span(0), (0, 0)) + self.assertEqual(regex.match('a*', 'xxx').span(), (0, 0)) + self.assertEqual(regex.match('x*', 'xxxa').span(0), (0, 3)) + self.assertEqual(regex.match('x*', 'xxxa').span(), (0, 3)) + self.assertEqual(regex.match('a+', 'xxx'), None) + + def bump_num(self, matchobj): + int_value = int(matchobj[0]) + return str(int_value + 1) + + def test_basic_regex_sub(self): + self.assertEqual(regex.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') + self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), + '9.3 -3 24x100y') + self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), + '9.3 -3 23x99y') + + self.assertEqual(regex.sub('.', lambda m: r"\n", 'x'), "\\n") + self.assertEqual(regex.sub('.', r"\n", 'x'), "\n") + + self.assertEqual(regex.sub('(?Px)', r'\g\g', 'xx'), 'xxxx') + self.assertEqual(regex.sub('(?Px)', r'\g\g<1>', 'xx'), 'xxxx') + self.assertEqual(regex.sub('(?Px)', r'\g\g', 'xx'), + 'xxxx') + self.assertEqual(regex.sub('(?Px)', r'\g<1>\g<1>', 'xx'), 'xxxx') + + self.assertEqual(regex.sub('a', r'\t\n\v\r\f\a\b', 'a'), "\t\n\v\r\f\a\b") + self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), "\t\n\v\r\f\a") + self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), chr(9) + chr(10) + + chr(11) + chr(13) + chr(12) + chr(7)) + + self.assertEqual(regex.sub(r'^\s*', 'X', 'test'), 'Xtest') + + self.assertEqual(regex.sub(r"x", r"\x0A", "x"), "\n") + self.assertEqual(regex.sub(r"x", r"\u000A", "x"), "\n") + self.assertEqual(regex.sub(r"x", r"\U0000000A", "x"), "\n") + self.assertEqual(regex.sub(r"x", r"\N{LATIN CAPITAL LETTER A}", + "x"), "A") + + self.assertEqual(regex.sub(br"x", br"\x0A", b"x"), b"\n") + + def test_bug_449964(self): + # Fails for group followed by other escape. + self.assertEqual(regex.sub(r'(?Px)', r'\g<1>\g<1>\b', 'xx'), + "xx\bxx\b") + + def test_bug_449000(self): + # Test for sub() on escaped characters. + self.assertEqual(regex.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'), + "abc\ndef\n") + self.assertEqual(regex.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), + "abc\ndef\n") + self.assertEqual(regex.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), + "abc\ndef\n") + self.assertEqual(regex.sub('\r\n', '\n', 'abc\r\ndef\r\n'), + "abc\ndef\n") + + def test_bug_1661(self): + # Verify that flags do not get silently ignored with compiled patterns + pattern = regex.compile('.') + self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, + lambda: regex.match(pattern, 'A', regex.I)) + self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, + lambda: regex.search(pattern, 'A', regex.I)) + self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, + lambda: regex.findall(pattern, 'A', regex.I)) + self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, + lambda: regex.compile(pattern, regex.I)) + + def test_bug_3629(self): + # A regex that triggered a bug in the sre-code validator + self.assertEqual(repr(type(regex.compile("(?P)(?(quote))"))), + self.PATTERN_CLASS) + + def test_sub_template_numeric_escape(self): + # Bug 776311 and friends. + self.assertEqual(regex.sub('x', r'\0', 'x'), "\0") + self.assertEqual(regex.sub('x', r'\000', 'x'), "\000") + self.assertEqual(regex.sub('x', r'\001', 'x'), "\001") + self.assertEqual(regex.sub('x', r'\008', 'x'), "\0" + "8") + self.assertEqual(regex.sub('x', r'\009', 'x'), "\0" + "9") + self.assertEqual(regex.sub('x', r'\111', 'x'), "\111") + self.assertEqual(regex.sub('x', r'\117', 'x'), "\117") + + self.assertEqual(regex.sub('x', r'\1111', 'x'), "\1111") + self.assertEqual(regex.sub('x', r'\1111', 'x'), "\111" + "1") + + self.assertEqual(regex.sub('x', r'\00', 'x'), '\x00') + self.assertEqual(regex.sub('x', r'\07', 'x'), '\x07') + self.assertEqual(regex.sub('x', r'\08', 'x'), "\0" + "8") + self.assertEqual(regex.sub('x', r'\09', 'x'), "\0" + "9") + self.assertEqual(regex.sub('x', r'\0a', 'x'), "\0" + "a") + + self.assertEqual(regex.sub('x', r'\400', 'x'), "\u0100") + self.assertEqual(regex.sub('x', r'\777', 'x'), "\u01FF") + self.assertEqual(regex.sub(b'x', br'\400', b'x'), b"\x00") + self.assertEqual(regex.sub(b'x', br'\777', b'x'), b"\xFF") + + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\1', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\8', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\9', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\11', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\18', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\1a', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\90', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\99', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\118', 'x')) # r'\11' + '8' + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\11a', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\181', 'x')) # r'\18' + '1' + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\800', 'x')) # r'\80' + '0' + + # In Python 2.3 (etc), these loop endlessly in sre_parser.py. + self.assertEqual(regex.sub('(((((((((((x)))))))))))', r'\11', 'x'), + 'x') + self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), + 'xz8') + self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), + 'xza') + + def test_qualified_re_sub(self): + self.assertEqual(regex.sub('a', 'b', 'aaaaa'), 'bbbbb') + self.assertEqual(regex.sub('a', 'b', 'aaaaa', 1), 'baaaa') + + def test_bug_114660(self): + self.assertEqual(regex.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), + 'hello there') + + def test_bug_462270(self): + # Test for empty sub() behaviour, see SF bug #462270 + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b--d-') + else: + self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b-d-') + self.assertEqual(regex.sub('(?V1)x*', '-', 'abxd'), '-a-b--d-') + self.assertEqual(regex.sub('x+', '-', 'abxd'), 'ab-d') + + def test_bug_14462(self): + # chr(255) is a valid identifier in Python 3. + group_name = '\xFF' + self.assertEqual(regex.search(r'(?P<' + group_name + '>a)', + 'abc').group(group_name), 'a') + + def test_symbolic_refs(self): + self.assertRaisesRegex(regex.error, self.MISSING_GT, lambda: + regex.sub('(?Px)', r'\gx)', r'\g<', 'xx')) + self.assertRaisesRegex(regex.error, self.MISSING_LT, lambda: + regex.sub('(?Px)', r'\g', 'xx')) + self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: + regex.sub('(?Px)', r'\g', 'xx')) + self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: + regex.sub('(?Px)', r'\g<1a1>', 'xx')) + self.assertRaisesRegex(IndexError, self.UNKNOWN_GROUP_I, lambda: + regex.sub('(?Px)', r'\g', 'xx')) + + # The new behaviour of unmatched but valid groups is to treat them like + # empty matches in the replacement template, like in Perl. + self.assertEqual(regex.sub('(?Px)|(?Py)', r'\g', 'xx'), '') + self.assertEqual(regex.sub('(?Px)|(?Py)', r'\2', 'xx'), '') + + # The old behaviour was to raise it as an IndexError. + self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: + regex.sub('(?Px)', r'\g<-1>', 'xx')) + + def test_re_subn(self): + self.assertEqual(regex.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) + self.assertEqual(regex.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) + self.assertEqual(regex.subn("b+", "x", "xyz"), ('xyz', 0)) + self.assertEqual(regex.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) + self.assertEqual(regex.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) + + def test_re_split(self): + self.assertEqual(regex.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.split(":*", ":a:b::c"), ['', '', 'a', '', + 'b', '', 'c', '']) + self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', '', '', + 'a', ':', '', '', 'b', '::', '', '', 'c', '', '']) + self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', '', 'a', + '', 'b', '', 'c', '']) + self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', '', + None, 'a', ':', '', None, 'b', ':', '', None, 'c', None, '']) + else: + self.assertEqual(regex.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) + self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', 'a', + ':', 'b', '::', 'c']) + self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', 'a', 'b', + 'c']) + self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', 'a', + ':', 'b', ':', 'c']) + self.assertEqual(regex.split("([b:]+)", ":a:b::c"), ['', ':', 'a', + ':b::', 'c']) + self.assertEqual(regex.split("(b)|(:+)", ":a:b::c"), ['', None, ':', + 'a', None, ':', '', 'b', None, '', None, '::', 'c']) + self.assertEqual(regex.split("(?:b)|(?::+)", ":a:b::c"), ['', 'a', '', + '', 'c']) + + self.assertEqual(regex.split("x", "xaxbxc"), ['', 'a', 'b', 'c']) + self.assertEqual([m for m in regex.splititer("x", "xaxbxc")], ['', 'a', + 'b', 'c']) + + self.assertEqual(regex.split("(?r)x", "xaxbxc"), ['c', 'b', 'a', '']) + self.assertEqual([m for m in regex.splititer("(?r)x", "xaxbxc")], ['c', + 'b', 'a', '']) + + self.assertEqual(regex.split("(x)|(y)", "xaxbxc"), ['', 'x', None, 'a', + 'x', None, 'b', 'x', None, 'c']) + self.assertEqual([m for m in regex.splititer("(x)|(y)", "xaxbxc")], + ['', 'x', None, 'a', 'x', None, 'b', 'x', None, 'c']) + + self.assertEqual(regex.split("(?r)(x)|(y)", "xaxbxc"), ['c', 'x', None, + 'b', 'x', None, 'a', 'x', None, '']) + self.assertEqual([m for m in regex.splititer("(?r)(x)|(y)", "xaxbxc")], + ['c', 'x', None, 'b', 'x', None, 'a', 'x', None, '']) + + self.assertEqual(regex.split(r"(?V1)\b", "a b c"), ['', 'a', ' ', 'b', + ' ', 'c', '']) + self.assertEqual(regex.split(r"(?V1)\m", "a b c"), ['', 'a ', 'b ', + 'c']) + self.assertEqual(regex.split(r"(?V1)\M", "a b c"), ['a', ' b', ' c', + '']) + + def test_qualified_re_split(self): + self.assertEqual(regex.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) + self.assertEqual(regex.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d']) + self.assertEqual(regex.split("(:)", ":a:b::c", 2), ['', ':', 'a', ':', + 'b::c']) + + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', '', + '', 'a:b::c']) + else: + self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', 'a', + ':', 'b::c']) + + def test_re_findall(self): + self.assertEqual(regex.findall(":+", "abc"), []) + self.assertEqual(regex.findall(":+", "a:b::c:::d"), [':', '::', ':::']) + self.assertEqual(regex.findall("(:+)", "a:b::c:::d"), [':', '::', + ':::']) + self.assertEqual(regex.findall("(:)(:*)", "a:b::c:::d"), [(':', ''), + (':', ':'), (':', '::')]) + + self.assertEqual(regex.findall(r"\((?P.{0,5}?TEST)\)", + "(MY TEST)"), ["MY TEST"]) + self.assertEqual(regex.findall(r"\((?P.{0,3}?TEST)\)", + "(MY TEST)"), ["MY TEST"]) + self.assertEqual(regex.findall(r"\((?P.{0,3}?T)\)", "(MY T)"), + ["MY T"]) + + self.assertEqual(regex.findall(r"[^a]{2}[A-Z]", "\n S"), [' S']) + self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n S"), ['\n S']) + self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n S"), [' S']) + + self.assertEqual(regex.findall(r"X(Y[^Y]+?){1,2}( |Q)+DEF", + "XYABCYPPQ\nQ DEF"), [('YPPQ\n', ' ')]) + + self.assertEqual(regex.findall(r"(\nTest(\n+.+?){0,2}?)?\n+End", + "\nTest\nxyz\nxyz\nEnd"), [('\nTest\nxyz\nxyz', '\nxyz')]) + + def test_bug_117612(self): + self.assertEqual(regex.findall(r"(a|(b))", "aba"), [('a', ''), ('b', + 'b'), ('a', '')]) + + def test_re_match(self): + self.assertEqual(regex.match('a', 'a')[:], ('a',)) + self.assertEqual(regex.match('(a)', 'a')[:], ('a', 'a')) + self.assertEqual(regex.match(r'(a)', 'a')[0], 'a') + self.assertEqual(regex.match(r'(a)', 'a')[1], 'a') + self.assertEqual(regex.match(r'(a)', 'a').group(1, 1), ('a', 'a')) + + pat = regex.compile('((a)|(b))(c)?') + self.assertEqual(pat.match('a')[:], ('a', 'a', 'a', None, None)) + self.assertEqual(pat.match('b')[:], ('b', 'b', None, 'b', None)) + self.assertEqual(pat.match('ac')[:], ('ac', 'a', 'a', None, 'c')) + self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c')) + self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c')) + + # A single group. + m = regex.match('(a)', 'a') + self.assertEqual(m.group(), 'a') + self.assertEqual(m.group(0), 'a') + self.assertEqual(m.group(1), 'a') + self.assertEqual(m.group(1, 1), ('a', 'a')) + + pat = regex.compile('(?:(?Pa)|(?Pb))(?Pc)?') + self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) + self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), (None, 'b', + None)) + self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) + + def test_re_groupref_exists(self): + self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a)')[:], + ('(a)', '(', 'a')) + self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a')[:], ('a', + None, 'a')) + self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'), None) + self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a'), None) + self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'ab')[:], ('ab', + 'a', 'b')) + self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'cd')[:], ('cd', + None, 'd')) + self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'cd')[:], ('cd', + None, 'd')) + self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'a')[:], ('a', + 'a', '')) + + # Tests for bug #1177831: exercise groups other than the first group. + p = regex.compile('(?Pa)(?Pb)?((?(g2)c|d))') + self.assertEqual(p.match('abc')[:], ('abc', 'a', 'b', 'c')) + self.assertEqual(p.match('ad')[:], ('ad', 'a', None, 'd')) + self.assertEqual(p.match('abd'), None) + self.assertEqual(p.match('ac'), None) + + def test_re_groupref(self): + self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a|')[:], ('|a|', + '|', 'a')) + self.assertEqual(regex.match(r'^(\|)?([^()]+)\1?$', 'a')[:], ('a', + None, 'a')) + self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', 'a|'), None) + self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a'), None) + self.assertEqual(regex.match(r'^(?:(a)|c)(\1)$', 'aa')[:], ('aa', 'a', + 'a')) + self.assertEqual(regex.match(r'^(?:(a)|c)(\1)?$', 'c')[:], ('c', None, + None)) + + self.assertEqual(regex.findall(r"(?i)(.{1,40}?),(.{1,40}?)(?:;)+(.{1,80}).{1,40}?\3(\ |;)+(.{1,80}?)\1", + "TEST, BEST; LEST ; Lest 123 Test, Best"), [('TEST', ' BEST', + ' LEST', ' ', '123 ')]) + + def test_groupdict(self): + self.assertEqual(regex.match('(?Pfirst) (?Psecond)', + 'first second').groupdict(), {'first': 'first', 'second': 'second'}) + + def test_expand(self): + self.assertEqual(regex.match("(?Pfirst) (?Psecond)", + "first second").expand(r"\2 \1 \g \g"), + 'second first second first') + + def test_repeat_minmax(self): + self.assertEqual(regex.match(r"^(\w){1}$", "abc"), None) + self.assertEqual(regex.match(r"^(\w){1}?$", "abc"), None) + self.assertEqual(regex.match(r"^(\w){1,2}$", "abc"), None) + self.assertEqual(regex.match(r"^(\w){1,2}?$", "abc"), None) + + self.assertEqual(regex.match(r"^(\w){3}$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){1,3}$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){1,4}$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){3}?$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){1,3}?$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){1,4}?$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c') + + self.assertEqual(regex.match("^x{1}$", "xxx"), None) + self.assertEqual(regex.match("^x{1}?$", "xxx"), None) + self.assertEqual(regex.match("^x{1,2}$", "xxx"), None) + self.assertEqual(regex.match("^x{1,2}?$", "xxx"), None) + + self.assertEqual(regex.match("^x{1}", "xxx")[0], 'x') + self.assertEqual(regex.match("^x{1}?", "xxx")[0], 'x') + self.assertEqual(regex.match("^x{0,1}", "xxx")[0], 'x') + self.assertEqual(regex.match("^x{0,1}?", "xxx")[0], '') + + self.assertEqual(bool(regex.match("^x{3}$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{1,3}$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{1,4}$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{3}?$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{1,3}?$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{1,4}?$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True) + + self.assertEqual(regex.match("^x{}$", "xxx"), None) + self.assertEqual(bool(regex.match("^x{}$", "x{}")), True) + + def test_getattr(self): + self.assertEqual(regex.compile("(?i)(a)(b)").pattern, '(?i)(a)(b)') + self.assertEqual(regex.compile("(?i)(a)(b)").flags, regex.I | regex.U | + regex.DEFAULT_VERSION) + self.assertEqual(regex.compile(b"(?i)(a)(b)").flags, regex.A | regex.I + | regex.DEFAULT_VERSION) + self.assertEqual(regex.compile("(?i)(a)(b)").groups, 2) + self.assertEqual(regex.compile("(?i)(a)(b)").groupindex, {}) + + self.assertEqual(regex.compile("(?i)(?Pa)(?Pb)").groupindex, + {'first': 1, 'other': 2}) + + self.assertEqual(regex.match("(a)", "a").pos, 0) + self.assertEqual(regex.match("(a)", "a").endpos, 1) + + self.assertEqual(regex.search("b(c)", "abcdef").pos, 0) + self.assertEqual(regex.search("b(c)", "abcdef").endpos, 6) + self.assertEqual(regex.search("b(c)", "abcdef").span(), (1, 3)) + self.assertEqual(regex.search("b(c)", "abcdef").span(1), (2, 3)) + + self.assertEqual(regex.match("(a)", "a").string, 'a') + self.assertEqual(regex.match("(a)", "a").regs, ((0, 1), (0, 1))) + self.assertEqual(repr(type(regex.match("(a)", "a").re)), + self.PATTERN_CLASS) + + # Issue 14260. + p = regex.compile(r'abc(?Pdef)') + p.groupindex["n"] = 0 + self.assertEqual(p.groupindex["n"], 1) + + def test_special_escapes(self): + self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx")[1], 'bx') + self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd")[1], 'bx') + self.assertEqual(regex.search(br"\b(b.)\b", b"abcd abc bcd bx", + regex.LOCALE)[1], b'bx') + self.assertEqual(regex.search(br"\B(b.)\B", b"abc bcd bc abxd", + regex.LOCALE)[1], b'bx') + self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx", + regex.UNICODE)[1], 'bx') + self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd", + regex.UNICODE)[1], 'bx') + + self.assertEqual(regex.search(r"^abc$", "\nabc\n", regex.M)[0], 'abc') + self.assertEqual(regex.search(r"^\Aabc\Z$", "abc", regex.M)[0], 'abc') + self.assertEqual(regex.search(r"^\Aabc\Z$", "\nabc\n", regex.M), None) + + self.assertEqual(regex.search(br"\b(b.)\b", b"abcd abc bcd bx")[1], + b'bx') + self.assertEqual(regex.search(br"\B(b.)\B", b"abc bcd bc abxd")[1], + b'bx') + self.assertEqual(regex.search(br"^abc$", b"\nabc\n", regex.M)[0], + b'abc') + self.assertEqual(regex.search(br"^\Aabc\Z$", b"abc", regex.M)[0], + b'abc') + self.assertEqual(regex.search(br"^\Aabc\Z$", b"\nabc\n", regex.M), + None) + + self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a")[0], '1aa! a') + self.assertEqual(regex.search(br"\d\D\w\W\s\S", b"1aa! a", + regex.LOCALE)[0], b'1aa! a') + self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a", + regex.UNICODE)[0], '1aa! a') + + def test_bigcharset(self): + self.assertEqual(regex.match(r"([\u2222\u2223])", "\u2222")[1], + '\u2222') + self.assertEqual(regex.match(r"([\u2222\u2223])", "\u2222", + regex.UNICODE)[1], '\u2222') + self.assertEqual("".join(regex.findall(".", + "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)), + 'e\xe8\xe9\xea\xeb\u0113\u011b\u0117') + self.assertEqual("".join(regex.findall(r"[e\xe8\xe9\xea\xeb\u0113\u011b\u0117]", + "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)), + 'e\xe8\xe9\xea\xeb\u0113\u011b\u0117') + self.assertEqual("".join(regex.findall(r"e|\xe8|\xe9|\xea|\xeb|\u0113|\u011b|\u0117", + "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)), + 'e\xe8\xe9\xea\xeb\u0113\u011b\u0117') + + def test_anyall(self): + self.assertEqual(regex.match("a.b", "a\nb", regex.DOTALL)[0], "a\nb") + self.assertEqual(regex.match("a.*b", "a\n\nb", regex.DOTALL)[0], + "a\n\nb") + + def test_non_consuming(self): + self.assertEqual(regex.match(r"(a(?=\s[^a]))", "a b")[1], 'a') + self.assertEqual(regex.match(r"(a(?=\s[^a]*))", "a b")[1], 'a') + self.assertEqual(regex.match(r"(a(?=\s[abc]))", "a b")[1], 'a') + self.assertEqual(regex.match(r"(a(?=\s[abc]*))", "a bc")[1], 'a') + self.assertEqual(regex.match(r"(a)(?=\s\1)", "a a")[1], 'a') + self.assertEqual(regex.match(r"(a)(?=\s\1*)", "a aa")[1], 'a') + self.assertEqual(regex.match(r"(a)(?=\s(abc|a))", "a a")[1], 'a') + + self.assertEqual(regex.match(r"(a(?!\s[^a]))", "a a")[1], 'a') + self.assertEqual(regex.match(r"(a(?!\s[abc]))", "a d")[1], 'a') + self.assertEqual(regex.match(r"(a)(?!\s\1)", "a b")[1], 'a') + self.assertEqual(regex.match(r"(a)(?!\s(abc|a))", "a b")[1], 'a') + + def test_ignore_case(self): + self.assertEqual(regex.match("abc", "ABC", regex.I)[0], 'ABC') + self.assertEqual(regex.match(b"abc", b"ABC", regex.I)[0], b'ABC') + + self.assertEqual(regex.match(r"(a\s[^a]*)", "a bb", regex.I)[1], + 'a bb') + self.assertEqual(regex.match(r"(a\s[abc])", "a b", regex.I)[1], 'a b') + self.assertEqual(regex.match(r"(a\s[abc]*)", "a bb", regex.I)[1], + 'a bb') + self.assertEqual(regex.match(r"((a)\s\2)", "a a", regex.I)[1], 'a a') + self.assertEqual(regex.match(r"((a)\s\2*)", "a aa", regex.I)[1], + 'a aa') + self.assertEqual(regex.match(r"((a)\s(abc|a))", "a a", regex.I)[1], + 'a a') + self.assertEqual(regex.match(r"((a)\s(abc|a)*)", "a aa", regex.I)[1], + 'a aa') + + # Issue 3511. + self.assertEqual(regex.match(r"[Z-a]", "_").span(), (0, 1)) + self.assertEqual(regex.match(r"(?i)[Z-a]", "_").span(), (0, 1)) + + self.assertEqual(bool(regex.match(r"(?i)nao", "nAo")), True) + self.assertEqual(bool(regex.match(r"(?i)n\xE3o", "n\xC3o")), True) + self.assertEqual(bool(regex.match(r"(?i)n\xE3o", "N\xC3O")), True) + self.assertEqual(bool(regex.match(r"(?i)s", "\u017F")), True) + + def test_case_folding(self): + self.assertEqual(regex.search(r"(?fi)ss", "SS").span(), (0, 2)) + self.assertEqual(regex.search(r"(?fi)SS", "ss").span(), (0, 2)) + self.assertEqual(regex.search(r"(?fi)SS", + "\N{LATIN SMALL LETTER SHARP S}").span(), (0, 1)) + self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LETTER SHARP S}", + "SS").span(), (0, 2)) + + self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE ST}", + "ST").span(), (0, 2)) + self.assertEqual(regex.search(r"(?fi)ST", + "\N{LATIN SMALL LIGATURE ST}").span(), (0, 1)) + self.assertEqual(regex.search(r"(?fi)ST", + "\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 1)) + + self.assertEqual(regex.search(r"(?fi)SST", + "\N{LATIN SMALL LETTER SHARP S}t").span(), (0, 2)) + self.assertEqual(regex.search(r"(?fi)SST", + "s\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 2)) + self.assertEqual(regex.search(r"(?fi)SST", + "s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2)) + self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE ST}", + "SST").span(), (1, 3)) + self.assertEqual(regex.search(r"(?fi)SST", + "s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2)) + + self.assertEqual(regex.search(r"(?fi)FFI", + "\N{LATIN SMALL LIGATURE FFI}").span(), (0, 1)) + self.assertEqual(regex.search(r"(?fi)FFI", + "\N{LATIN SMALL LIGATURE FF}i").span(), (0, 2)) + self.assertEqual(regex.search(r"(?fi)FFI", + "f\N{LATIN SMALL LIGATURE FI}").span(), (0, 2)) + self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE FFI}", + "FFI").span(), (0, 3)) + self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE FF}i", + "FFI").span(), (0, 3)) + self.assertEqual(regex.search(r"(?fi)f\N{LATIN SMALL LIGATURE FI}", + "FFI").span(), (0, 3)) + + sigma = "\u03A3\u03C3\u03C2" + for ch1 in sigma: + for ch2 in sigma: + if not regex.match(r"(?fi)" + ch1, ch2): + self.fail() + + self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB01\uFB00")), + True) + self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB01\uFB00")), + True) + self.assertEqual(bool(regex.search(r"(?iV1)fffi", "\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(r"(?iV1)f\uFB03", + "\uFB00\uFB01")), True) + self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(r"(?iV1)fffi", "\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(r"(?iV1)f\uFB03", + "\uFB00\uFB01")), True) + self.assertEqual(bool(regex.search(r"(?iV1)f\uFB01", "\uFB00i")), + True) + self.assertEqual(bool(regex.search(r"(?iV1)f\uFB01", "\uFB00i")), + True) + + self.assertEqual(regex.findall(r"(?iV0)\m(?:word){e<=3}\M(?ne", "affine", + options=["\N{LATIN SMALL LIGATURE FFI}"]).span(), (0, 6)) + self.assertEqual(regex.search(r"(?fi)a\Lne", + "a\N{LATIN SMALL LIGATURE FFI}ne", options=["ffi"]).span(), (0, 4)) + + def test_category(self): + self.assertEqual(regex.match(r"(\s)", " ")[1], ' ') + + def test_not_literal(self): + self.assertEqual(regex.search(r"\s([^a])", " b")[1], 'b') + self.assertEqual(regex.search(r"\s([^a]*)", " bb")[1], 'bb') + + def test_search_coverage(self): + self.assertEqual(regex.search(r"\s(b)", " b")[1], 'b') + self.assertEqual(regex.search(r"a\s", "a ")[0], 'a ') + + def test_re_escape(self): + p = "" + self.assertEqual(regex.escape(p), p) + for i in range(0, 256): + p += chr(i) + self.assertEqual(bool(regex.match(regex.escape(chr(i)), chr(i))), + True) + self.assertEqual(regex.match(regex.escape(chr(i)), chr(i)).span(), + (0, 1)) + + pat = regex.compile(regex.escape(p)) + self.assertEqual(pat.match(p).span(), (0, 256)) + + def test_re_escape_byte(self): + p = b"" + self.assertEqual(regex.escape(p), p) + for i in range(0, 256): + b = bytes([i]) + p += b + self.assertEqual(bool(regex.match(regex.escape(b), b)), True) + self.assertEqual(regex.match(regex.escape(b), b).span(), (0, 1)) + + pat = regex.compile(regex.escape(p)) + self.assertEqual(pat.match(p).span(), (0, 256)) + + def test_constants(self): + if regex.I != regex.IGNORECASE: + self.fail() + if regex.L != regex.LOCALE: + self.fail() + if regex.M != regex.MULTILINE: + self.fail() + if regex.S != regex.DOTALL: + self.fail() + if regex.X != regex.VERBOSE: + self.fail() + + def test_flags(self): + for flag in [regex.I, regex.M, regex.X, regex.S, regex.L]: + self.assertEqual(repr(type(regex.compile('^pattern$', flag))), + self.PATTERN_CLASS) + + def test_sre_character_literals(self): + for i in [0, 8, 16, 32, 64, 127, 128, 255]: + self.assertEqual(bool(regex.match(r"\%03o" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"\%03o0" % i, chr(i) + "0")), + True) + self.assertEqual(bool(regex.match(r"\%03o8" % i, chr(i) + "8")), + True) + self.assertEqual(bool(regex.match(r"\x%02x" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"\x%02x0" % i, chr(i) + "0")), + True) + self.assertEqual(bool(regex.match(r"\x%02xz" % i, chr(i) + "z")), + True) + + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.match(r"\911", "")) + + def test_sre_character_class_literals(self): + for i in [0, 8, 16, 32, 64, 127, 128, 255]: + self.assertEqual(bool(regex.match(r"[\%03o]" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"[\%03o0]" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"[\%03o8]" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"[\x%02x]" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"[\x%02x0]" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"[\x%02xz]" % i, chr(i))), True) + + self.assertRaisesRegex(regex.error, self.BAD_OCTAL_ESCAPE, lambda: + regex.match(r"[\911]", "")) + + def test_bug_113254(self): + self.assertEqual(regex.match(r'(a)|(b)', 'b').start(1), -1) + self.assertEqual(regex.match(r'(a)|(b)', 'b').end(1), -1) + self.assertEqual(regex.match(r'(a)|(b)', 'b').span(1), (-1, -1)) + + def test_bug_527371(self): + # Bug described in patches 527371/672491. + self.assertEqual(regex.match(r'(a)?a','a').lastindex, None) + self.assertEqual(regex.match(r'(a)(b)?b','ab').lastindex, 1) + self.assertEqual(regex.match(r'(?Pa)(?Pb)?b','ab').lastgroup, + 'a') + self.assertEqual(regex.match("(?Pa(b))", "ab").lastgroup, 'a') + self.assertEqual(regex.match("((a))", "a").lastindex, 1) + + def test_bug_545855(self): + # Bug 545855 -- This pattern failed to cause a compile error as it + # should, instead provoking a TypeError. + self.assertRaisesRegex(regex.error, self.BAD_SET, lambda: + regex.compile('foo[a-')) + + def test_bug_418626(self): + # Bugs 418626 at al. -- Testing Greg Chapman's addition of op code + # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of + # pattern '*?' on a long string. + self.assertEqual(regex.match('.*?c', 10000 * 'ab' + 'cd').end(0), + 20001) + self.assertEqual(regex.match('.*?cd', 5000 * 'ab' + 'c' + 5000 * 'ab' + + 'cde').end(0), 20003) + self.assertEqual(regex.match('.*?cd', 20000 * 'abc' + 'de').end(0), + 60001) + # Non-simple '*?' still used to hit the recursion limit, before the + # non-recursive scheme was implemented. + self.assertEqual(regex.search('(a|b)*?c', 10000 * 'ab' + 'cd').end(0), + 20001) + + def test_bug_612074(self): + pat = "[" + regex.escape("\u2039") + "]" + self.assertEqual(regex.compile(pat) and 1, 1) + + def test_stack_overflow(self): + # Nasty cases that used to overflow the straightforward recursive + # implementation of repeated groups. + self.assertEqual(regex.match('(x)*', 50000 * 'x')[1], 'x') + self.assertEqual(regex.match('(x)*y', 50000 * 'x' + 'y')[1], 'x') + self.assertEqual(regex.match('(x)*?y', 50000 * 'x' + 'y')[1], 'x') + + def test_scanner(self): + def s_ident(scanner, token): return token + def s_operator(scanner, token): return "op%s" % token + def s_float(scanner, token): return float(token) + def s_int(scanner, token): return int(token) + + scanner = regex.Scanner([(r"[a-zA-Z_]\w*", s_ident), (r"\d+\.\d*", + s_float), (r"\d+", s_int), (r"=|\+|-|\*|/", s_operator), (r"\s+", + None), ]) + + self.assertEqual(repr(type(scanner.scanner.scanner("").pattern)), + self.PATTERN_CLASS) + + self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), (['sum', + 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], '')) + + def test_bug_448951(self): + # Bug 448951 (similar to 429357, but with single char match). + # (Also test greedy matches.) + for op in '', '?', '*': + self.assertEqual(regex.match(r'((.%s):)?z' % op, 'z')[:], ('z', + None, None)) + self.assertEqual(regex.match(r'((.%s):)?z' % op, 'a:z')[:], ('a:z', + 'a:', 'a')) + + def test_bug_725106(self): + # Capturing groups in alternatives in repeats. + self.assertEqual(regex.match('^((a)|b)*', 'abc')[:], ('ab', 'b', 'a')) + self.assertEqual(regex.match('^(([ab])|c)*', 'abc')[:], ('abc', 'c', + 'b')) + self.assertEqual(regex.match('^((d)|[ab])*', 'abc')[:], ('ab', 'b', + None)) + self.assertEqual(regex.match('^((a)c|[ab])*', 'abc')[:], ('ab', 'b', + None)) + self.assertEqual(regex.match('^((a)|b)*?c', 'abc')[:], ('abc', 'b', + 'a')) + self.assertEqual(regex.match('^(([ab])|c)*?d', 'abcd')[:], ('abcd', + 'c', 'b')) + self.assertEqual(regex.match('^((d)|[ab])*?c', 'abc')[:], ('abc', 'b', + None)) + self.assertEqual(regex.match('^((a)c|[ab])*?c', 'abc')[:], ('abc', 'b', + None)) + + def test_bug_725149(self): + # Mark_stack_base restoring before restoring marks. + self.assertEqual(regex.match('(a)(?:(?=(b)*)c)*', 'abb')[:], ('a', 'a', + None)) + self.assertEqual(regex.match('(a)((?!(b)*))*', 'abb')[:], ('a', 'a', + None, None)) + + def test_bug_764548(self): + # Bug 764548, regex.compile() barfs on str/unicode subclasses. + class my_unicode(str): pass + pat = regex.compile(my_unicode("abc")) + self.assertEqual(pat.match("xyz"), None) + + def test_finditer(self): + it = regex.finditer(r":+", "a:b::c:::d") + self.assertEqual([item[0] for item in it], [':', '::', ':::']) + + def test_bug_926075(self): + if regex.compile('bug_926075') is regex.compile(b'bug_926075'): + self.fail() + + def test_bug_931848(self): + pattern = "[\u002E\u3002\uFF0E\uFF61]" + self.assertEqual(regex.compile(pattern).split("a.b.c"), ['a', 'b', + 'c']) + + def test_bug_581080(self): + it = regex.finditer(r"\s", "a b") + self.assertEqual(next(it).span(), (1, 2)) + self.assertRaises(StopIteration, lambda: next(it)) + + scanner = regex.compile(r"\s").scanner("a b") + self.assertEqual(scanner.search().span(), (1, 2)) + self.assertEqual(scanner.search(), None) + + def test_bug_817234(self): + it = regex.finditer(r".*", "asdf") + self.assertEqual(next(it).span(), (0, 4)) + self.assertEqual(next(it).span(), (4, 4)) + self.assertRaises(StopIteration, lambda: next(it)) + + def test_empty_array(self): + # SF buf 1647541. + import array + for typecode in 'bBuhHiIlLfd': + a = array.array(typecode) + self.assertEqual(regex.compile(b"bla").match(a), None) + self.assertEqual(regex.compile(b"").match(a)[1 : ], ()) + + def test_inline_flags(self): + # Bug #1700. + upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Below + lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Below + + p = regex.compile(upper_char, regex.I | regex.U) + self.assertEqual(bool(p.match(lower_char)), True) + + p = regex.compile(lower_char, regex.I | regex.U) + self.assertEqual(bool(p.match(upper_char)), True) + + p = regex.compile('(?i)' + upper_char, regex.U) + self.assertEqual(bool(p.match(lower_char)), True) + + p = regex.compile('(?i)' + lower_char, regex.U) + self.assertEqual(bool(p.match(upper_char)), True) + + p = regex.compile('(?iu)' + upper_char) + self.assertEqual(bool(p.match(lower_char)), True) + + p = regex.compile('(?iu)' + lower_char) + self.assertEqual(bool(p.match(upper_char)), True) + + self.assertEqual(bool(regex.match(r"(?i)a", "A")), True) + self.assertEqual(bool(regex.match(r"a(?i)", "A")), True) + self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True) + self.assertEqual(regex.match(r"a(?iV1)", "A"), None) + + def test_dollar_matches_twice(self): + # $ matches the end of string, and just before the terminating \n. + pattern = regex.compile('$') + self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') + self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') + self.assertEqual(pattern.sub('#', '\n'), '#\n#') + + pattern = regex.compile('$', regex.MULTILINE) + self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#') + self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') + self.assertEqual(pattern.sub('#', '\n'), '#\n#') + + def test_bytes_str_mixing(self): + # Mixing str and bytes is disallowed. + pat = regex.compile('.') + bpat = regex.compile(b'.') + self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda: + pat.match(b'b')) + self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda: + bpat.match('b')) + self.assertRaisesRegex(TypeError, self.STR_PAT_BYTES_TEMPL, lambda: + pat.sub(b'b', 'c')) + self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda: + pat.sub('b', b'c')) + self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda: + pat.sub(b'b', b'c')) + self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda: + bpat.sub(b'b', 'c')) + self.assertRaisesRegex(TypeError, self.BYTES_PAT_STR_TEMPL, lambda: + bpat.sub('b', b'c')) + self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda: + bpat.sub('b', 'c')) + + self.assertRaisesRegex(ValueError, self.BYTES_PAT_UNI_FLAG, lambda: + regex.compile(br'\w', regex.UNICODE)) + self.assertRaisesRegex(ValueError, self.BYTES_PAT_UNI_FLAG, lambda: + regex.compile(br'(?u)\w')) + self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda: + regex.compile(r'\w', regex.UNICODE | regex.ASCII)) + self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda: + regex.compile(r'(?u)\w', regex.ASCII)) + self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda: + regex.compile(r'(?a)\w', regex.UNICODE)) + self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda: + regex.compile(r'(?au)\w')) + + def test_ascii_and_unicode_flag(self): + # String patterns. + for flags in (0, regex.UNICODE): + pat = regex.compile('\xc0', flags | regex.IGNORECASE) + self.assertEqual(bool(pat.match('\xe0')), True) + pat = regex.compile(r'\w', flags) + self.assertEqual(bool(pat.match('\xe0')), True) + + pat = regex.compile('\xc0', regex.ASCII | regex.IGNORECASE) + self.assertEqual(pat.match('\xe0'), None) + pat = regex.compile('(?a)\xc0', regex.IGNORECASE) + self.assertEqual(pat.match('\xe0'), None) + pat = regex.compile(r'\w', regex.ASCII) + self.assertEqual(pat.match('\xe0'), None) + pat = regex.compile(r'(?a)\w') + self.assertEqual(pat.match('\xe0'), None) + + # Bytes patterns. + for flags in (0, regex.ASCII): + pat = regex.compile(b'\xc0', flags | regex.IGNORECASE) + self.assertEqual(pat.match(b'\xe0'), None) + pat = regex.compile(br'\w') + self.assertEqual(pat.match(b'\xe0'), None) + + self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda: + regex.compile(r'(?au)\w')) + + def test_subscripting_match(self): + m = regex.match(r'(?\w)', 'xy') + if not m: + self.fail("Failed: expected match but returned None") + elif not m or m[0] != m.group(0) or m[1] != m.group(1): + self.fail("Failed") + if not m: + self.fail("Failed: expected match but returned None") + elif m[:] != ('x', 'x'): + self.fail("Failed: expected \"('x', 'x')\" but got {} instead".format(ascii(m[:]))) + + def test_new_named_groups(self): + m0 = regex.match(r'(?P\w)', 'x') + m1 = regex.match(r'(?\w)', 'x') + if not (m0 and m1 and m0[:] == m1[:]): + self.fail("Failed") + + def test_properties(self): + self.assertEqual(regex.match(b'(?ai)\xC0', b'\xE0'), None) + self.assertEqual(regex.match(br'(?ai)\xC0', b'\xE0'), None) + self.assertEqual(regex.match(br'(?a)\w', b'\xE0'), None) + self.assertEqual(bool(regex.match(r'\w', '\xE0')), True) + + # Dropped the following test. It's not possible to determine what the + # correct result should be in the general case. +# self.assertEqual(bool(regex.match(br'(?L)\w', b'\xE0')), +# b'\xE0'.isalnum()) + + self.assertEqual(bool(regex.match(br'(?L)\d', b'0')), True) + self.assertEqual(bool(regex.match(br'(?L)\s', b' ')), True) + self.assertEqual(bool(regex.match(br'(?L)\w', b'a')), True) + self.assertEqual(regex.match(br'(?L)\d', b'?'), None) + self.assertEqual(regex.match(br'(?L)\s', b'?'), None) + self.assertEqual(regex.match(br'(?L)\w', b'?'), None) + + self.assertEqual(regex.match(br'(?L)\D', b'0'), None) + self.assertEqual(regex.match(br'(?L)\S', b' '), None) + self.assertEqual(regex.match(br'(?L)\W', b'a'), None) + self.assertEqual(bool(regex.match(br'(?L)\D', b'?')), True) + self.assertEqual(bool(regex.match(br'(?L)\S', b'?')), True) + self.assertEqual(bool(regex.match(br'(?L)\W', b'?')), True) + + self.assertEqual(bool(regex.match(r'\p{Cyrillic}', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'(?i)\p{Cyrillic}', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\p{IsCyrillic}', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\p{Script=Cyrillic}', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\p{InCyrillic}', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\p{Block=Cyrillic}', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'[[:Cyrillic:]]', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'[[:IsCyrillic:]]', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'[[:Script=Cyrillic:]]', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'[[:InCyrillic:]]', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'[[:Block=Cyrillic:]]', + '\N{CYRILLIC CAPITAL LETTER A}')), True) + + self.assertEqual(bool(regex.match(r'\P{Cyrillic}', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\P{IsCyrillic}', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\P{Script=Cyrillic}', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\P{InCyrillic}', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\P{Block=Cyrillic}', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\p{^Cyrillic}', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\p{^IsCyrillic}', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\p{^Script=Cyrillic}', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\p{^InCyrillic}', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'\p{^Block=Cyrillic}', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'[[:^Cyrillic:]]', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'[[:^IsCyrillic:]]', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'[[:^Script=Cyrillic:]]', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'[[:^InCyrillic:]]', + '\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(r'[[:^Block=Cyrillic:]]', + '\N{LATIN CAPITAL LETTER A}')), True) + + self.assertEqual(bool(regex.match(r'\d', '0')), True) + self.assertEqual(bool(regex.match(r'\s', ' ')), True) + self.assertEqual(bool(regex.match(r'\w', 'A')), True) + self.assertEqual(regex.match(r"\d", "?"), None) + self.assertEqual(regex.match(r"\s", "?"), None) + self.assertEqual(regex.match(r"\w", "?"), None) + self.assertEqual(regex.match(r"\D", "0"), None) + self.assertEqual(regex.match(r"\S", " "), None) + self.assertEqual(regex.match(r"\W", "A"), None) + self.assertEqual(bool(regex.match(r'\D', '?')), True) + self.assertEqual(bool(regex.match(r'\S', '?')), True) + self.assertEqual(bool(regex.match(r'\W', '?')), True) + + self.assertEqual(bool(regex.match(r'\p{L}', 'A')), True) + self.assertEqual(bool(regex.match(r'\p{L}', 'a')), True) + self.assertEqual(bool(regex.match(r'\p{Lu}', 'A')), True) + self.assertEqual(bool(regex.match(r'\p{Ll}', 'a')), True) + + self.assertEqual(bool(regex.match(r'(?i)a', 'a')), True) + self.assertEqual(bool(regex.match(r'(?i)a', 'A')), True) + + self.assertEqual(bool(regex.match(r'\w', '0')), True) + self.assertEqual(bool(regex.match(r'\w', 'a')), True) + self.assertEqual(bool(regex.match(r'\w', '_')), True) + + self.assertEqual(regex.match(r"\X", "\xE0").span(), (0, 1)) + self.assertEqual(regex.match(r"\X", "a\u0300").span(), (0, 2)) + self.assertEqual(regex.findall(r"\X", + "a\xE0a\u0300e\xE9e\u0301"), ['a', '\xe0', 'a\u0300', 'e', + '\xe9', 'e\u0301']) + self.assertEqual(regex.findall(r"\X{3}", + "a\xE0a\u0300e\xE9e\u0301"), ['a\xe0a\u0300', 'e\xe9e\u0301']) + self.assertEqual(regex.findall(r"\X", "\r\r\n\u0301A\u0301"), + ['\r', '\r\n', '\u0301', 'A\u0301']) + + self.assertEqual(bool(regex.match(r'\p{Ll}', 'a')), True) + + chars_u = "-09AZaz_\u0393\u03b3" + chars_b = b"-09AZaz_" + word_set = set("Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc".split()) + + tests = [ + (r"\w", chars_u, "09AZaz_\u0393\u03b3"), + (r"[[:word:]]", chars_u, "09AZaz_\u0393\u03b3"), + (r"\W", chars_u, "-"), + (r"[[:^word:]]", chars_u, "-"), + (r"\d", chars_u, "09"), + (r"[[:digit:]]", chars_u, "09"), + (r"\D", chars_u, "-AZaz_\u0393\u03b3"), + (r"[[:^digit:]]", chars_u, "-AZaz_\u0393\u03b3"), + (r"[[:alpha:]]", chars_u, "AZaz\u0393\u03b3"), + (r"[[:^alpha:]]", chars_u, "-09_"), + (r"[[:alnum:]]", chars_u, "09AZaz\u0393\u03b3"), + (r"[[:^alnum:]]", chars_u, "-_"), + (r"[[:xdigit:]]", chars_u, "09Aa"), + (r"[[:^xdigit:]]", chars_u, "-Zz_\u0393\u03b3"), + (r"\p{InBasicLatin}", "a\xE1", "a"), + (r"\P{InBasicLatin}", "a\xE1", "\xE1"), + (r"(?i)\p{InBasicLatin}", "a\xE1", "a"), + (r"(?i)\P{InBasicLatin}", "a\xE1", "\xE1"), + + (br"(?L)\w", chars_b, b"09AZaz_"), + (br"(?L)[[:word:]]", chars_b, b"09AZaz_"), + (br"(?L)\W", chars_b, b"-"), + (br"(?L)[[:^word:]]", chars_b, b"-"), + (br"(?L)\d", chars_b, b"09"), + (br"(?L)[[:digit:]]", chars_b, b"09"), + (br"(?L)\D", chars_b, b"-AZaz_"), + (br"(?L)[[:^digit:]]", chars_b, b"-AZaz_"), + (br"(?L)[[:alpha:]]", chars_b, b"AZaz"), + (br"(?L)[[:^alpha:]]", chars_b, b"-09_"), + (br"(?L)[[:alnum:]]", chars_b, b"09AZaz"), + (br"(?L)[[:^alnum:]]", chars_b, b"-_"), + (br"(?L)[[:xdigit:]]", chars_b, b"09Aa"), + (br"(?L)[[:^xdigit:]]", chars_b, b"-Zz_"), + + (br"(?a)\w", chars_b, b"09AZaz_"), + (br"(?a)[[:word:]]", chars_b, b"09AZaz_"), + (br"(?a)\W", chars_b, b"-"), + (br"(?a)[[:^word:]]", chars_b, b"-"), + (br"(?a)\d", chars_b, b"09"), + (br"(?a)[[:digit:]]", chars_b, b"09"), + (br"(?a)\D", chars_b, b"-AZaz_"), + (br"(?a)[[:^digit:]]", chars_b, b"-AZaz_"), + (br"(?a)[[:alpha:]]", chars_b, b"AZaz"), + (br"(?a)[[:^alpha:]]", chars_b, b"-09_"), + (br"(?a)[[:alnum:]]", chars_b, b"09AZaz"), + (br"(?a)[[:^alnum:]]", chars_b, b"-_"), + (br"(?a)[[:xdigit:]]", chars_b, b"09Aa"), + (br"(?a)[[:^xdigit:]]", chars_b, b"-Zz_"), + ] + for pattern, chars, expected in tests: + try: + if chars[ : 0].join(regex.findall(pattern, chars)) != expected: + self.fail("Failed: {}".format(pattern)) + except Exception as e: + self.fail("Failed: {} raised {}".format(pattern, ascii(e))) + + self.assertEqual(bool(regex.match(r"\p{NumericValue=0}", "0")), + True) + self.assertEqual(bool(regex.match(r"\p{NumericValue=1/2}", + "\N{VULGAR FRACTION ONE HALF}")), True) + self.assertEqual(bool(regex.match(r"\p{NumericValue=0.5}", + "\N{VULGAR FRACTION ONE HALF}")), True) + + def test_word_class(self): + self.assertEqual(regex.findall(r"\w+", + " \u0939\u093f\u0928\u094d\u0926\u0940,"), + ['\u0939\u093f\u0928\u094d\u0926\u0940']) + self.assertEqual(regex.findall(r"\W+", + " \u0939\u093f\u0928\u094d\u0926\u0940,"), [' ', ',']) + self.assertEqual(regex.split(r"(?V1)\b", + " \u0939\u093f\u0928\u094d\u0926\u0940,"), [' ', + '\u0939\u093f\u0928\u094d\u0926\u0940', ',']) + self.assertEqual(regex.split(r"(?V1)\B", + " \u0939\u093f\u0928\u094d\u0926\u0940,"), ['', ' \u0939', + '\u093f', '\u0928', '\u094d', '\u0926', '\u0940,', '']) + + def test_search_anchor(self): + self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd']) + + def test_search_reverse(self): + self.assertEqual(regex.findall(r"(?r).", "abc"), ['c', 'b', 'a']) + self.assertEqual(regex.findall(r"(?r).", "abc", overlapped=True), ['c', + 'b', 'a']) + self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc']) + self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True), + ['de', 'cd', 'bc', 'ab']) + self.assertEqual(regex.findall(r"(?r)(.)(-)(.)", "a-b-c", + overlapped=True), [("b", "-", "c"), ("a", "-", "b")]) + + self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c', + 'b', 'a']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde", + overlapped=True)], ['de', 'cd', 'bc', 'ab']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c', + 'b', 'a']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde", + overlapped=True)], ['de', 'cd', 'bc', 'ab']) + + self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo', + 'bar']) + self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo', + 'bar']) + self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo', + '']) + self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar', + 'foo', '']) + + self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")], + ['', 'foo', 'bar']) + self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+", + "foo bar")], ['', 'foo', 'bar']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+", + "foo bar")], ['bar', 'foo', '']) + self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+", + "foo bar")], ['bar', 'foo', '']) + + self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd']) + self.assertEqual(regex.findall(r".{2}(?<=\G.*)", "abcd"), ['ab', 'cd']) + self.assertEqual(regex.findall(r"(?r)\G\w{2}", "abcd ef"), []) + self.assertEqual(regex.findall(r"(?r)\w{2}\G", "abcd ef"), ['ef']) + + self.assertEqual(regex.findall(r"q*", "qqwe"), ['qq', '', '', '']) + self.assertEqual(regex.findall(r"(?V1)q*", "qqwe"), ['qq', '', '', '']) + self.assertEqual(regex.findall(r"(?r)q*", "qqwe"), ['', '', 'qq', '']) + self.assertEqual(regex.findall(r"(?rV1)q*", "qqwe"), ['', '', 'qq', + '']) + + self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=3), ['b', + 'c']) + self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=-1), ['b', + 'c']) + self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1, + endpos=3)], ['b', 'c']) + self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1, + endpos=-1)], ['b', 'c']) + + self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1, + endpos=3)], ['c', 'b']) + self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1, + endpos=-1)], ['c', 'b']) + self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=3), ['c', + 'b']) + self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=-1), + ['c', 'b']) + + self.assertEqual(regex.findall(r"[ab]", "aB", regex.I), ['a', 'B']) + self.assertEqual(regex.findall(r"(?r)[ab]", "aB", regex.I), ['B', 'a']) + + self.assertEqual(regex.findall(r"(?r).{2}", "abc"), ['bc']) + self.assertEqual(regex.findall(r"(?r).{2}", "abc", overlapped=True), + ['bc', 'ab']) + self.assertEqual(regex.findall(r"(\w+) (\w+)", + "first second third fourth fifth"), [('first', 'second'), ('third', + 'fourth')]) + self.assertEqual(regex.findall(r"(?r)(\w+) (\w+)", + "first second third fourth fifth"), [('fourth', 'fifth'), ('second', + 'third')]) + + self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc")], + ['bc']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc", + overlapped=True)], ['bc', 'ab']) + self.assertEqual([m[0] for m in regex.finditer(r"(\w+) (\w+)", + "first second third fourth fifth")], ['first second', + 'third fourth']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)(\w+) (\w+)", + "first second third fourth fifth")], ['fourth fifth', + 'second third']) + + self.assertEqual(regex.search("abcdef", "abcdef").span(), (0, 6)) + self.assertEqual(regex.search("(?r)abcdef", "abcdef").span(), (0, 6)) + self.assertEqual(regex.search("(?i)abcdef", "ABCDEF").span(), (0, 6)) + self.assertEqual(regex.search("(?ir)abcdef", "ABCDEF").span(), (0, 6)) + + self.assertEqual(regex.sub(r"(.)", r"\1", "abc"), 'abc') + self.assertEqual(regex.sub(r"(?r)(.)", r"\1", "abc"), 'abc') + + def test_atomic(self): + # Issue 433030. + self.assertEqual(regex.search(r"(?>a*)a", "aa"), None) + + def test_possessive(self): + # Single-character non-possessive. + self.assertEqual(regex.search(r"a?a", "a").span(), (0, 1)) + self.assertEqual(regex.search(r"a*a", "aaa").span(), (0, 3)) + self.assertEqual(regex.search(r"a+a", "aaa").span(), (0, 3)) + self.assertEqual(regex.search(r"a{1,3}a", "aaa").span(), (0, 3)) + + # Multiple-character non-possessive. + self.assertEqual(regex.search(r"(?:ab)?ab", "ab").span(), (0, 2)) + self.assertEqual(regex.search(r"(?:ab)*ab", "ababab").span(), (0, 6)) + self.assertEqual(regex.search(r"(?:ab)+ab", "ababab").span(), (0, 6)) + self.assertEqual(regex.search(r"(?:ab){1,3}ab", "ababab").span(), (0, + 6)) + + # Single-character possessive. + self.assertEqual(regex.search(r"a?+a", "a"), None) + self.assertEqual(regex.search(r"a*+a", "aaa"), None) + self.assertEqual(regex.search(r"a++a", "aaa"), None) + self.assertEqual(regex.search(r"a{1,3}+a", "aaa"), None) + + # Multiple-character possessive. + self.assertEqual(regex.search(r"(?:ab)?+ab", "ab"), None) + self.assertEqual(regex.search(r"(?:ab)*+ab", "ababab"), None) + self.assertEqual(regex.search(r"(?:ab)++ab", "ababab"), None) + self.assertEqual(regex.search(r"(?:ab){1,3}+ab", "ababab"), None) + + def test_zerowidth(self): + # Issue 3262. + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.split(r"\b", "a b"), ['', 'a', ' ', 'b', + '']) + else: + self.assertEqual(regex.split(r"\b", "a b"), ['a b']) + self.assertEqual(regex.split(r"(?V1)\b", "a b"), ['', 'a', ' ', 'b', + '']) + + # Issue 1647489. + self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo', + 'bar']) + self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")], + ['', 'foo', 'bar']) + self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', + 'foo', '']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+", + "foo bar")], ['bar', 'foo', '']) + self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo', + 'bar']) + self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+", + "foo bar")], ['', 'foo', 'bar']) + self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar', + 'foo', '']) + self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+", + "foo bar")], ['bar', 'foo', '']) + + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.split("", "xaxbxc"), ['', 'x', 'a', 'x', + 'b', 'x', 'c', '']) + self.assertEqual([m for m in regex.splititer("", "xaxbxc")], ['', + 'x', 'a', 'x', 'b', 'x', 'c', '']) + else: + self.assertEqual(regex.split("", "xaxbxc"), ['xaxbxc']) + self.assertEqual([m for m in regex.splititer("", "xaxbxc")], + ['xaxbxc']) + + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.split("(?r)", "xaxbxc"), ['', 'c', 'x', 'b', + 'x', 'a', 'x', '']) + self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")], + ['', 'c', 'x', 'b', 'x', 'a', 'x', '']) + else: + self.assertEqual(regex.split("(?r)", "xaxbxc"), ['xaxbxc']) + self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")], + ['xaxbxc']) + + self.assertEqual(regex.split("(?V1)", "xaxbxc"), ['', 'x', 'a', 'x', + 'b', 'x', 'c', '']) + self.assertEqual([m for m in regex.splititer("(?V1)", "xaxbxc")], ['', + 'x', 'a', 'x', 'b', 'x', 'c', '']) + + self.assertEqual(regex.split("(?rV1)", "xaxbxc"), ['', 'c', 'x', 'b', + 'x', 'a', 'x', '']) + self.assertEqual([m for m in regex.splititer("(?rV1)", "xaxbxc")], ['', + 'c', 'x', 'b', 'x', 'a', 'x', '']) + + def test_scoped_and_inline_flags(self): + # Issues 433028, 433024, 433027. + self.assertEqual(regex.search(r"(?i)Ab", "ab").span(), (0, 2)) + self.assertEqual(regex.search(r"(?i:A)b", "ab").span(), (0, 2)) + self.assertEqual(regex.search(r"A(?i)b", "ab").span(), (0, 2)) + self.assertEqual(regex.search(r"A(?iV1)b", "ab"), None) + + self.assertRaisesRegex(regex.error, self.CANT_TURN_OFF, lambda: + regex.search(r"(?V0-i)Ab", "ab", flags=regex.I)) + + self.assertEqual(regex.search(r"(?V0)Ab", "ab"), None) + self.assertEqual(regex.search(r"(?V1)Ab", "ab"), None) + self.assertEqual(regex.search(r"(?V1-i)Ab", "ab", flags=regex.I), None) + self.assertEqual(regex.search(r"(?-i:A)b", "ab", flags=regex.I), None) + self.assertEqual(regex.search(r"A(?V1-i)b", "ab", + flags=regex.I).span(), (0, 2)) + + def test_repeated_repeats(self): + # Issue 2537. + self.assertEqual(regex.search(r"(?:a+)+", "aaa").span(), (0, 3)) + self.assertEqual(regex.search(r"(?:(?:ab)+c)+", "abcabc").span(), (0, + 6)) + + # Hg issue 286. + self.assertEqual(regex.search(r"(?:a+){2,}", "aaa").span(), (0, 3)) + + def test_lookbehind(self): + self.assertEqual(regex.search(r"123(?<=a\d+)", "a123").span(), (1, 4)) + self.assertEqual(regex.search(r"123(?<=a\d+)", "b123"), None) + self.assertEqual(regex.search(r"123(?= (3, 7, 0): + self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "xy"), + 'y-x-') + else: + self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "xy"), + 'y-x') + self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "xy"), 'y-x-') + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "x"), '-x-') + else: + self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "x"), '-x') + self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "x"), '-x-') + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "y"), 'y--') + else: + self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "y"), 'y-') + self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "y"), 'y--') + + def test_bug_10328 (self): + # Issue 10328. + pat = regex.compile(r'(?mV0)(?P[ \t]+\r*$)|(?P(?<=[^\n])\Z)') + if sys.version_info >= (3, 7, 0): + self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', + 'foobar '), ('foobar', 2)) + else: + self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', + 'foobar '), ('foobar', 1)) + self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ', + '']) + pat = regex.compile(r'(?mV1)(?P[ \t]+\r*$)|(?P(?<=[^\n])\Z)') + self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', + 'foobar '), ('foobar', 2)) + self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ', + '']) + + def test_overlapped(self): + self.assertEqual(regex.findall(r"..", "abcde"), ['ab', 'cd']) + self.assertEqual(regex.findall(r"..", "abcde", overlapped=True), ['ab', + 'bc', 'cd', 'de']) + self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc']) + self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True), + ['de', 'cd', 'bc', 'ab']) + self.assertEqual(regex.findall(r"(.)(-)(.)", "a-b-c", overlapped=True), + [("a", "-", "b"), ("b", "-", "c")]) + + self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde")], ['ab', + 'cd']) + self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde", + overlapped=True)], ['ab', 'bc', 'cd', 'de']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde")], + ['de', 'bc']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde", + overlapped=True)], ['de', 'cd', 'bc', 'ab']) + + self.assertEqual([m.groups() for m in regex.finditer(r"(.)(-)(.)", + "a-b-c", overlapped=True)], [("a", "-", "b"), ("b", "-", "c")]) + self.assertEqual([m.groups() for m in regex.finditer(r"(?r)(.)(-)(.)", + "a-b-c", overlapped=True)], [("b", "-", "c"), ("a", "-", "b")]) + + def test_splititer(self): + self.assertEqual(regex.split(r",", "a,b,,c,"), ['a', 'b', '', 'c', '']) + self.assertEqual([m for m in regex.splititer(r",", "a,b,,c,")], ['a', + 'b', '', 'c', '']) + + def test_grapheme(self): + self.assertEqual(regex.match(r"\X", "\xE0").span(), (0, 1)) + self.assertEqual(regex.match(r"\X", "a\u0300").span(), (0, 2)) + + self.assertEqual(regex.findall(r"\X", + "a\xE0a\u0300e\xE9e\u0301"), ['a', '\xe0', 'a\u0300', 'e', + '\xe9', 'e\u0301']) + self.assertEqual(regex.findall(r"\X{3}", + "a\xE0a\u0300e\xE9e\u0301"), ['a\xe0a\u0300', 'e\xe9e\u0301']) + self.assertEqual(regex.findall(r"\X", "\r\r\n\u0301A\u0301"), + ['\r', '\r\n', '\u0301', 'A\u0301']) + + def test_word_boundary(self): + text = 'The quick ("brown") fox can\'t jump 32.3 feet, right?' + self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'The', ' ', + 'quick', ' ("', 'brown', '") ', 'fox', ' ', 'can', "'", 't', + ' ', 'jump', ' ', '32', '.', '3', ' ', 'feet', ', ', + 'right', '?']) + self.assertEqual(regex.split(r'(?V1w)\b', text), ['', 'The', ' ', + 'quick', ' ', '(', '"', 'brown', '"', ')', ' ', 'fox', ' ', + "can't", ' ', 'jump', ' ', '32.3', ' ', 'feet', ',', ' ', + 'right', '?', '']) + + text = "The fox" + self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'The', ' ', + 'fox', '']) + self.assertEqual(regex.split(r'(?V1w)\b', text), ['', 'The', ' ', + 'fox', '']) + + text = "can't aujourd'hui l'objectif" + self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'can', "'", + 't', ' ', 'aujourd', "'", 'hui', ' ', 'l', "'", 'objectif', + '']) + self.assertEqual(regex.split(r'(?V1w)\b', text), ['', "can't", ' ', + "aujourd'hui", ' ', "l'objectif", '']) + + def test_line_boundary(self): + self.assertEqual(regex.findall(r".+", "Line 1\nLine 2\n"), ["Line 1", + "Line 2"]) + self.assertEqual(regex.findall(r".+", "Line 1\rLine 2\r"), + ["Line 1\rLine 2\r"]) + self.assertEqual(regex.findall(r".+", "Line 1\r\nLine 2\r\n"), + ["Line 1\r", "Line 2\r"]) + self.assertEqual(regex.findall(r"(?w).+", "Line 1\nLine 2\n"), + ["Line 1", "Line 2"]) + self.assertEqual(regex.findall(r"(?w).+", "Line 1\rLine 2\r"), + ["Line 1", "Line 2"]) + self.assertEqual(regex.findall(r"(?w).+", "Line 1\r\nLine 2\r\n"), + ["Line 1", "Line 2"]) + + self.assertEqual(regex.search(r"^abc", "abc").start(), 0) + self.assertEqual(regex.search(r"^abc", "\nabc"), None) + self.assertEqual(regex.search(r"^abc", "\rabc"), None) + self.assertEqual(regex.search(r"(?w)^abc", "abc").start(), 0) + self.assertEqual(regex.search(r"(?w)^abc", "\nabc"), None) + self.assertEqual(regex.search(r"(?w)^abc", "\rabc"), None) + + self.assertEqual(regex.search(r"abc$", "abc").start(), 0) + self.assertEqual(regex.search(r"abc$", "abc\n").start(), 0) + self.assertEqual(regex.search(r"abc$", "abc\r"), None) + self.assertEqual(regex.search(r"(?w)abc$", "abc").start(), 0) + self.assertEqual(regex.search(r"(?w)abc$", "abc\n").start(), 0) + self.assertEqual(regex.search(r"(?w)abc$", "abc\r").start(), 0) + + self.assertEqual(regex.search(r"(?m)^abc", "abc").start(), 0) + self.assertEqual(regex.search(r"(?m)^abc", "\nabc").start(), 1) + self.assertEqual(regex.search(r"(?m)^abc", "\rabc"), None) + self.assertEqual(regex.search(r"(?mw)^abc", "abc").start(), 0) + self.assertEqual(regex.search(r"(?mw)^abc", "\nabc").start(), 1) + self.assertEqual(regex.search(r"(?mw)^abc", "\rabc").start(), 1) + + self.assertEqual(regex.search(r"(?m)abc$", "abc").start(), 0) + self.assertEqual(regex.search(r"(?m)abc$", "abc\n").start(), 0) + self.assertEqual(regex.search(r"(?m)abc$", "abc\r"), None) + self.assertEqual(regex.search(r"(?mw)abc$", "abc").start(), 0) + self.assertEqual(regex.search(r"(?mw)abc$", "abc\n").start(), 0) + self.assertEqual(regex.search(r"(?mw)abc$", "abc\r").start(), 0) + + def test_branch_reset(self): + self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "ac").groups(), ('a', + None, 'c')) + self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "bc").groups(), (None, + 'b', 'c')) + self.assertEqual(regex.match(r"(?:(?a)|(?b))(?c)", + "ac").groups(), ('a', None, 'c')) + self.assertEqual(regex.match(r"(?:(?a)|(?b))(?c)", + "bc").groups(), (None, 'b', 'c')) + + self.assertEqual(regex.match(r"(?a)(?:(?b)|(?c))(?d)", + "abd").groups(), ('a', 'b', None, 'd')) + self.assertEqual(regex.match(r"(?a)(?:(?b)|(?c))(?d)", + "acd").groups(), ('a', None, 'c', 'd')) + self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "abd").groups(), + ('a', 'b', None, 'd')) + + self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "acd").groups(), + ('a', None, 'c', 'd')) + self.assertEqual(regex.match(r"(a)(?|(b)|(b))(d)", "abd").groups(), + ('a', 'b', 'd')) + self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "ac").groups(), + ('a', None, 'c')) + self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "bc").groups(), + (None, 'b', 'c')) + self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "ac").groups(), + ('a', 'c')) + + self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "bc").groups(), + ('b', 'c')) + + self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(?d))(e)", + "abe").groups(), ('a', 'b', 'e')) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(?d))(e)", + "cde").groups(), ('d', 'c', 'e')) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(d))(e)", + "abe").groups(), ('a', 'b', 'e')) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(d))(e)", + "cde").groups(), ('d', 'c', 'e')) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(d))(e)", + "abe").groups(), ('a', 'b', 'e')) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(d))(e)", + "cde").groups(), ('c', 'd', 'e')) + + # Hg issue 87: Allow duplicate names of groups + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", + "abe").groups(), ("a", "b", "e")) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", + "abe").capturesdict(), {"a": ["a"], "b": ["b"]}) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", + "cde").groups(), ("d", None, "e")) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", + "cde").capturesdict(), {"a": ["c", "d"], "b": []}) + + def test_set(self): + self.assertEqual(regex.match(r"[a]", "a").span(), (0, 1)) + self.assertEqual(regex.match(r"(?i)[a]", "A").span(), (0, 1)) + self.assertEqual(regex.match(r"[a-b]", r"a").span(), (0, 1)) + self.assertEqual(regex.match(r"(?i)[a-b]", r"A").span(), (0, 1)) + + self.assertEqual(regex.sub(r"(?V0)([][])", r"-", "a[b]c"), "a-b-c") + + self.assertEqual(regex.findall(r"[\p{Alpha}]", "a0"), ["a"]) + self.assertEqual(regex.findall(r"(?i)[\p{Alpha}]", "A0"), ["A"]) + + self.assertEqual(regex.findall(r"[a\p{Alpha}]", "ab0"), ["a", "b"]) + self.assertEqual(regex.findall(r"[a\P{Alpha}]", "ab0"), ["a", "0"]) + self.assertEqual(regex.findall(r"(?i)[a\p{Alpha}]", "ab0"), ["a", + "b"]) + self.assertEqual(regex.findall(r"(?i)[a\P{Alpha}]", "ab0"), ["a", + "0"]) + + self.assertEqual(regex.findall(r"[a-b\p{Alpha}]", "abC0"), ["a", + "b", "C"]) + self.assertEqual(regex.findall(r"(?i)[a-b\p{Alpha}]", "AbC0"), ["A", + "b", "C"]) + + self.assertEqual(regex.findall(r"[\p{Alpha}]", "a0"), ["a"]) + self.assertEqual(regex.findall(r"[\P{Alpha}]", "a0"), ["0"]) + self.assertEqual(regex.findall(r"[^\p{Alpha}]", "a0"), ["0"]) + self.assertEqual(regex.findall(r"[^\P{Alpha}]", "a0"), ["a"]) + + self.assertEqual("".join(regex.findall(r"[^\d-h]", "a^b12c-h")), + 'a^bc') + self.assertEqual("".join(regex.findall(r"[^\dh]", "a^b12c-h")), + 'a^bc-') + self.assertEqual("".join(regex.findall(r"[^h\s\db]", "a^b 12c-h")), + 'a^c-') + self.assertEqual("".join(regex.findall(r"[^b\w]", "a b")), ' ') + self.assertEqual("".join(regex.findall(r"[^b\S]", "a b")), ' ') + self.assertEqual("".join(regex.findall(r"[^8\d]", "a 1b2")), 'a b') + + all_chars = "".join(chr(c) for c in range(0x100)) + self.assertEqual(len(regex.findall(r"\p{ASCII}", all_chars)), 128) + self.assertEqual(len(regex.findall(r"\p{Letter}", all_chars)), + 117) + self.assertEqual(len(regex.findall(r"\p{Digit}", all_chars)), 10) + + # Set operators + self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Letter}]", + all_chars)), 52) + self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Alnum}&&\p{Letter}]", + all_chars)), 52) + self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Alnum}&&\p{Digit}]", + all_chars)), 10) + self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Cc}]", + all_chars)), 33) + self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Graph}]", + all_chars)), 94) + self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}--\p{Cc}]", + all_chars)), 95) + self.assertEqual(len(regex.findall(r"[\p{Letter}\p{Digit}]", + all_chars)), 127) + self.assertEqual(len(regex.findall(r"(?V1)[\p{Letter}||\p{Digit}]", + all_chars)), 127) + self.assertEqual(len(regex.findall(r"\p{HexDigit}", all_chars)), + 22) + self.assertEqual(len(regex.findall(r"(?V1)[\p{HexDigit}~~\p{Digit}]", + all_chars)), 12) + self.assertEqual(len(regex.findall(r"(?V1)[\p{Digit}~~\p{HexDigit}]", + all_chars)), 12) + + self.assertEqual(repr(type(regex.compile(r"(?V0)([][-])"))), + self.PATTERN_CLASS) + self.assertEqual(regex.findall(r"(?V1)[[a-z]--[aei]]", "abc"), ["b", + "c"]) + self.assertEqual(regex.findall(r"(?iV1)[[a-z]--[aei]]", "abc"), ["b", + "c"]) + self.assertEqual(regex.findall(r"(?V1)[\w--a]","abc"), ["b", "c"]) + self.assertEqual(regex.findall(r"(?iV1)[\w--a]","abc"), ["b", "c"]) + + def test_various(self): + tests = [ + # Test ?P< and ?P= extensions. + ('(?Pa)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with a digit. + ('(?Pa)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with an illegal char. + ('(?Pa)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with an illegal char. + + # Same tests, for the ?P= form. + ('(?Pa)(?P=foo_123', 'aa', '', regex.error, + self.MISSING_RPAREN), + ('(?Pa)(?P=1)', 'aa', '1', ascii('a')), + ('(?Pa)(?P=0)', 'aa', '', regex.error, + self.BAD_GROUP_NAME), + ('(?Pa)(?P=-1)', 'aa', '', regex.error, + self.BAD_GROUP_NAME), + ('(?Pa)(?P=!)', 'aa', '', regex.error, + self.BAD_GROUP_NAME), + ('(?Pa)(?P=foo_124)', 'aa', '', regex.error, + self.UNKNOWN_GROUP), # Backref to undefined group. + + ('(?Pa)', 'a', '1', ascii('a')), + ('(?Pa)(?P=foo_123)', 'aa', '1', ascii('a')), + + # Mal-formed \g in pattern treated as literal for compatibility. + (r'(?a)\ga)\g<1>', 'aa', '1', ascii('a')), + (r'(?a)\g', 'aa', '', ascii(None)), + (r'(?a)\g', 'aa', '', regex.error, + self.UNKNOWN_GROUP), # Backref to undefined group. + + ('(?a)', 'a', '1', ascii('a')), + (r'(?a)\g', 'aa', '1', ascii('a')), + + # Test octal escapes. + ('\\1', 'a', '', regex.error, self.INVALID_GROUP_REF), # Backreference. + ('[\\1]', '\1', '0', "'\\x01'"), # Character. + ('\\09', chr(0) + '9', '0', ascii(chr(0) + '9')), + ('\\141', 'a', '0', ascii('a')), + ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', + '0,11', ascii(('abcdefghijklk9', 'k'))), + + # Test \0 is handled everywhere. + (r'\0', '\0', '0', ascii('\0')), + (r'[\0a]', '\0', '0', ascii('\0')), + (r'[a\0]', '\0', '0', ascii('\0')), + (r'[^a\0]', '\0', '', ascii(None)), + + # Test various letter escapes. + (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', '0', + ascii('\a\b\f\n\r\t\v')), + (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', '0', + ascii('\a\b\f\n\r\t\v')), + (r'\xff', '\377', '0', ascii(chr(255))), + + # New \x semantics. + (r'\x00ffffffffffffff', '\377', '', ascii(None)), + (r'\x00f', '\017', '', ascii(None)), + (r'\x00fe', '\376', '', ascii(None)), + + (r'\x00ff', '\377', '', ascii(None)), + (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', '0', ascii('\t\n\v\r\f\ag')), + ('\t\n\v\r\f\a\\g', '\t\n\v\r\f\ag', '0', ascii('\t\n\v\r\f\ag')), + (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', '0', ascii(chr(9) + chr(10) + + chr(11) + chr(13) + chr(12) + chr(7))), + (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', '0', + ascii('\t\n\v\r\f\b')), + + (r"^\w+=(\\[\000-\277]|[^\n\\])*", + "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", '0', + ascii("SRC=eval.c g.c blah blah blah \\\\")), + + # Test that . only matches \n in DOTALL mode. + ('a.b', 'acb', '0', ascii('acb')), + ('a.b', 'a\nb', '', ascii(None)), + ('a.*b', 'acc\nccb', '', ascii(None)), + ('a.{4,5}b', 'acc\nccb', '', ascii(None)), + ('a.b', 'a\rb', '0', ascii('a\rb')), + # The new behaviour is that the inline flag affects only what follows. + ('a.b(?s)', 'a\nb', '0', ascii('a\nb')), + ('a.b(?sV1)', 'a\nb', '', ascii(None)), + ('(?s)a.b', 'a\nb', '0', ascii('a\nb')), + ('a.*(?s)b', 'acc\nccb', '0', ascii('acc\nccb')), + ('a.*(?sV1)b', 'acc\nccb', '', ascii(None)), + ('(?s)a.*b', 'acc\nccb', '0', ascii('acc\nccb')), + ('(?s)a.{4,5}b', 'acc\nccb', '0', ascii('acc\nccb')), + + (')', '', '', regex.error, self.TRAILING_CHARS), # Unmatched right bracket. + ('', '', '0', "''"), # Empty pattern. + ('abc', 'abc', '0', ascii('abc')), + ('abc', 'xbc', '', ascii(None)), + ('abc', 'axc', '', ascii(None)), + ('abc', 'abx', '', ascii(None)), + ('abc', 'xabcy', '0', ascii('abc')), + ('abc', 'ababc', '0', ascii('abc')), + ('ab*c', 'abc', '0', ascii('abc')), + ('ab*bc', 'abc', '0', ascii('abc')), + + ('ab*bc', 'abbc', '0', ascii('abbc')), + ('ab*bc', 'abbbbc', '0', ascii('abbbbc')), + ('ab+bc', 'abbc', '0', ascii('abbc')), + ('ab+bc', 'abc', '', ascii(None)), + ('ab+bc', 'abq', '', ascii(None)), + ('ab+bc', 'abbbbc', '0', ascii('abbbbc')), + ('ab?bc', 'abbc', '0', ascii('abbc')), + ('ab?bc', 'abc', '0', ascii('abc')), + ('ab?bc', 'abbbbc', '', ascii(None)), + ('ab?c', 'abc', '0', ascii('abc')), + + ('^abc$', 'abc', '0', ascii('abc')), + ('^abc$', 'abcc', '', ascii(None)), + ('^abc', 'abcc', '0', ascii('abc')), + ('^abc$', 'aabc', '', ascii(None)), + ('abc$', 'aabc', '0', ascii('abc')), + ('^', 'abc', '0', ascii('')), + ('$', 'abc', '0', ascii('')), + ('a.c', 'abc', '0', ascii('abc')), + ('a.c', 'axc', '0', ascii('axc')), + ('a.*c', 'axyzc', '0', ascii('axyzc')), + + ('a.*c', 'axyzd', '', ascii(None)), + ('a[bc]d', 'abc', '', ascii(None)), + ('a[bc]d', 'abd', '0', ascii('abd')), + ('a[b-d]e', 'abd', '', ascii(None)), + ('a[b-d]e', 'ace', '0', ascii('ace')), + ('a[b-d]', 'aac', '0', ascii('ac')), + ('a[-b]', 'a-', '0', ascii('a-')), + ('a[\\-b]', 'a-', '0', ascii('a-')), + ('a[b-]', 'a-', '0', ascii('a-')), + ('a[]b', '-', '', regex.error, self.BAD_SET), + + ('a[', '-', '', regex.error, self.BAD_SET), + ('a\\', '-', '', regex.error, self.BAD_ESCAPE), + ('abc)', '-', '', regex.error, self.TRAILING_CHARS), + ('(abc', '-', '', regex.error, self.MISSING_RPAREN), + ('a]', 'a]', '0', ascii('a]')), + ('a[]]b', 'a]b', '0', ascii('a]b')), + ('a[]]b', 'a]b', '0', ascii('a]b')), + ('a[^bc]d', 'aed', '0', ascii('aed')), + ('a[^bc]d', 'abd', '', ascii(None)), + ('a[^-b]c', 'adc', '0', ascii('adc')), + + ('a[^-b]c', 'a-c', '', ascii(None)), + ('a[^]b]c', 'a]c', '', ascii(None)), + ('a[^]b]c', 'adc', '0', ascii('adc')), + ('\\ba\\b', 'a-', '0', ascii('a')), + ('\\ba\\b', '-a', '0', ascii('a')), + ('\\ba\\b', '-a-', '0', ascii('a')), + ('\\by\\b', 'xy', '', ascii(None)), + ('\\by\\b', 'yz', '', ascii(None)), + ('\\by\\b', 'xyz', '', ascii(None)), + ('x\\b', 'xyz', '', ascii(None)), + + ('x\\B', 'xyz', '0', ascii('x')), + ('\\Bz', 'xyz', '0', ascii('z')), + ('z\\B', 'xyz', '', ascii(None)), + ('\\Bx', 'xyz', '', ascii(None)), + ('\\Ba\\B', 'a-', '', ascii(None)), + ('\\Ba\\B', '-a', '', ascii(None)), + ('\\Ba\\B', '-a-', '', ascii(None)), + ('\\By\\B', 'xy', '', ascii(None)), + ('\\By\\B', 'yz', '', ascii(None)), + ('\\By\\b', 'xy', '0', ascii('y')), + + ('\\by\\B', 'yz', '0', ascii('y')), + ('\\By\\B', 'xyz', '0', ascii('y')), + ('ab|cd', 'abc', '0', ascii('ab')), + ('ab|cd', 'abcd', '0', ascii('ab')), + ('()ef', 'def', '0,1', ascii(('ef', ''))), + ('$b', 'b', '', ascii(None)), + ('a\\(b', 'a(b', '', ascii(('a(b',))), + ('a\\(*b', 'ab', '0', ascii('ab')), + ('a\\(*b', 'a((b', '0', ascii('a((b')), + ('a\\\\b', 'a\\b', '0', ascii('a\\b')), + + ('((a))', 'abc', '0,1,2', ascii(('a', 'a', 'a'))), + ('(a)b(c)', 'abc', '0,1,2', ascii(('abc', 'a', 'c'))), + ('a+b+c', 'aabbabc', '0', ascii('abc')), + ('(a+|b)*', 'ab', '0,1', ascii(('ab', 'b'))), + ('(a+|b)+', 'ab', '0,1', ascii(('ab', 'b'))), + ('(a+|b)?', 'ab', '0,1', ascii(('a', 'a'))), + (')(', '-', '', regex.error, self.TRAILING_CHARS), + ('[^ab]*', 'cde', '0', ascii('cde')), + ('abc', '', '', ascii(None)), + ('a*', '', '0', ascii('')), + + ('a|b|c|d|e', 'e', '0', ascii('e')), + ('(a|b|c|d|e)f', 'ef', '0,1', ascii(('ef', 'e'))), + ('abcd*efg', 'abcdefg', '0', ascii('abcdefg')), + ('ab*', 'xabyabbbz', '0', ascii('ab')), + ('ab*', 'xayabbbz', '0', ascii('a')), + ('(ab|cd)e', 'abcde', '0,1', ascii(('cde', 'cd'))), + ('[abhgefdc]ij', 'hij', '0', ascii('hij')), + ('^(ab|cd)e', 'abcde', '', ascii(None)), + ('(abc|)ef', 'abcdef', '0,1', ascii(('ef', ''))), + ('(a|b)c*d', 'abcd', '0,1', ascii(('bcd', 'b'))), + + ('(ab|ab*)bc', 'abc', '0,1', ascii(('abc', 'a'))), + ('a([bc]*)c*', 'abc', '0,1', ascii(('abc', 'bc'))), + ('a([bc]*)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))), + ('a([bc]+)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))), + ('a([bc]*)(c+d)', 'abcd', '0,1,2', ascii(('abcd', 'b', 'cd'))), + ('a[bcd]*dcdcde', 'adcdcde', '0', ascii('adcdcde')), + ('a[bcd]+dcdcde', 'adcdcde', '', ascii(None)), + ('(ab|a)b*c', 'abc', '0,1', ascii(('abc', 'ab'))), + ('((a)(b)c)(d)', 'abcd', '1,2,3,4', ascii(('abc', 'a', 'b', 'd'))), + ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', ascii('alpha')), + + ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', ascii(('bh', None))), + ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', ascii(('effgz', + 'effgz', None))), + ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', ascii(('ij', 'ij', + 'j'))), + ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', ascii(None)), + ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', ascii(None)), + ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', ascii(('effgz', + 'effgz', None))), + ('(((((((((a)))))))))', 'a', '0', ascii('a')), + ('multiple words of text', 'uh-uh', '', ascii(None)), + ('multiple words', 'multiple words, yeah', '0', + ascii('multiple words')), + ('(.*)c(.*)', 'abcde', '0,1,2', ascii(('abcde', 'ab', 'de'))), + + ('\\((.*), (.*)\\)', '(a, b)', '2,1', ascii(('b', 'a'))), + ('[k]', 'ab', '', ascii(None)), + ('a[-]?c', 'ac', '0', ascii('ac')), + ('(abc)\\1', 'abcabc', '1', ascii('abc')), + ('([a-c]*)\\1', 'abcabc', '1', ascii('abc')), + ('^(.+)?B', 'AB', '1', ascii('A')), + ('(a+).\\1$', 'aaaaa', '0,1', ascii(('aaaaa', 'aa'))), + ('^(a+).\\1$', 'aaaa', '', ascii(None)), + ('(abc)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))), + ('([a-c]+)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))), + + ('(a)\\1', 'aa', '0,1', ascii(('aa', 'a'))), + ('(a+)\\1', 'aa', '0,1', ascii(('aa', 'a'))), + ('(a+)+\\1', 'aa', '0,1', ascii(('aa', 'a'))), + ('(a).+\\1', 'aba', '0,1', ascii(('aba', 'a'))), + ('(a)ba*\\1', 'aba', '0,1', ascii(('aba', 'a'))), + ('(aa|a)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))), + ('(a|aa)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))), + ('(a+)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))), + ('([abc]*)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))), + ('(a)(b)c|ab', 'ab', '0,1,2', ascii(('ab', None, None))), + + ('(a)+x', 'aaax', '0,1', ascii(('aaax', 'a'))), + ('([ac])+x', 'aacx', '0,1', ascii(('aacx', 'c'))), + ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', '0,1', + ascii(('d:msgs/tdir/sub1/', 'tdir/'))), + ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', + '0,1,2,3', ascii(('track1.title:TBlah blah blah', 'track1', + 'title', 'Blah blah blah'))), + ('([^N]*N)+', 'abNNxyzN', '0,1', ascii(('abNNxyzN', 'xyzN'))), + ('([^N]*N)+', 'abNNxyz', '0,1', ascii(('abNN', 'N'))), + ('([abc]*)x', 'abcx', '0,1', ascii(('abcx', 'abc'))), + ('([abc]*)x', 'abc', '', ascii(None)), + ('([xyz]*)x', 'abcx', '0,1', ascii(('x', ''))), + ('(a)+b|aac', 'aac', '0,1', ascii(('aac', None))), + + # Test symbolic groups. + ('(?Paaa)a', 'aaaa', '', regex.error, self.BAD_GROUP_NAME), + ('(?Paaa)a', 'aaaa', '0,id', ascii(('aaaa', 'aaa'))), + ('(?Paa)(?P=id)', 'aaaa', '0,id', ascii(('aaaa', 'aa'))), + ('(?Paa)(?P=xd)', 'aaaa', '', regex.error, self.UNKNOWN_GROUP), + + # Character properties. + (r"\g", "g", '0', ascii('g')), + (r"\g<1>", "g", '', regex.error, self.INVALID_GROUP_REF), + (r"(.)\g<1>", "gg", '0', ascii('gg')), + (r"(.)\g<1>", "gg", '', ascii(('gg', 'g'))), + (r"\N", "N", '0', ascii('N')), + (r"\N{LATIN SMALL LETTER A}", "a", '0', ascii('a')), + (r"\p", "p", '0', ascii('p')), + (r"\p{Ll}", "a", '0', ascii('a')), + (r"\P", "P", '0', ascii('P')), + (r"\P{Lu}", "p", '0', ascii('p')), + + # All tests from Perl. + ('abc', 'abc', '0', ascii('abc')), + ('abc', 'xbc', '', ascii(None)), + ('abc', 'axc', '', ascii(None)), + ('abc', 'abx', '', ascii(None)), + ('abc', 'xabcy', '0', ascii('abc')), + ('abc', 'ababc', '0', ascii('abc')), + + ('ab*c', 'abc', '0', ascii('abc')), + ('ab*bc', 'abc', '0', ascii('abc')), + ('ab*bc', 'abbc', '0', ascii('abbc')), + ('ab*bc', 'abbbbc', '0', ascii('abbbbc')), + ('ab{0,}bc', 'abbbbc', '0', ascii('abbbbc')), + ('ab+bc', 'abbc', '0', ascii('abbc')), + ('ab+bc', 'abc', '', ascii(None)), + ('ab+bc', 'abq', '', ascii(None)), + ('ab{1,}bc', 'abq', '', ascii(None)), + ('ab+bc', 'abbbbc', '0', ascii('abbbbc')), + + ('ab{1,}bc', 'abbbbc', '0', ascii('abbbbc')), + ('ab{1,3}bc', 'abbbbc', '0', ascii('abbbbc')), + ('ab{3,4}bc', 'abbbbc', '0', ascii('abbbbc')), + ('ab{4,5}bc', 'abbbbc', '', ascii(None)), + ('ab?bc', 'abbc', '0', ascii('abbc')), + ('ab?bc', 'abc', '0', ascii('abc')), + ('ab{0,1}bc', 'abc', '0', ascii('abc')), + ('ab?bc', 'abbbbc', '', ascii(None)), + ('ab?c', 'abc', '0', ascii('abc')), + ('ab{0,1}c', 'abc', '0', ascii('abc')), + + ('^abc$', 'abc', '0', ascii('abc')), + ('^abc$', 'abcc', '', ascii(None)), + ('^abc', 'abcc', '0', ascii('abc')), + ('^abc$', 'aabc', '', ascii(None)), + ('abc$', 'aabc', '0', ascii('abc')), + ('^', 'abc', '0', ascii('')), + ('$', 'abc', '0', ascii('')), + ('a.c', 'abc', '0', ascii('abc')), + ('a.c', 'axc', '0', ascii('axc')), + ('a.*c', 'axyzc', '0', ascii('axyzc')), + + ('a.*c', 'axyzd', '', ascii(None)), + ('a[bc]d', 'abc', '', ascii(None)), + ('a[bc]d', 'abd', '0', ascii('abd')), + ('a[b-d]e', 'abd', '', ascii(None)), + ('a[b-d]e', 'ace', '0', ascii('ace')), + ('a[b-d]', 'aac', '0', ascii('ac')), + ('a[-b]', 'a-', '0', ascii('a-')), + ('a[b-]', 'a-', '0', ascii('a-')), + ('a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE), + ('a[]b', '-', '', regex.error, self.BAD_SET), + + ('a[', '-', '', regex.error, self.BAD_SET), + ('a]', 'a]', '0', ascii('a]')), + ('a[]]b', 'a]b', '0', ascii('a]b')), + ('a[^bc]d', 'aed', '0', ascii('aed')), + ('a[^bc]d', 'abd', '', ascii(None)), + ('a[^-b]c', 'adc', '0', ascii('adc')), + ('a[^-b]c', 'a-c', '', ascii(None)), + ('a[^]b]c', 'a]c', '', ascii(None)), + ('a[^]b]c', 'adc', '0', ascii('adc')), + ('ab|cd', 'abc', '0', ascii('ab')), + + ('ab|cd', 'abcd', '0', ascii('ab')), + ('()ef', 'def', '0,1', ascii(('ef', ''))), + ('*a', '-', '', regex.error, self.NOTHING_TO_REPEAT), + ('(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT), + ('$b', 'b', '', ascii(None)), + ('a\\', '-', '', regex.error, self.BAD_ESCAPE), + ('a\\(b', 'a(b', '', ascii(('a(b',))), + ('a\\(*b', 'ab', '0', ascii('ab')), + ('a\\(*b', 'a((b', '0', ascii('a((b')), + ('a\\\\b', 'a\\b', '0', ascii('a\\b')), + + ('abc)', '-', '', regex.error, self.TRAILING_CHARS), + ('(abc', '-', '', regex.error, self.MISSING_RPAREN), + ('((a))', 'abc', '0,1,2', ascii(('a', 'a', 'a'))), + ('(a)b(c)', 'abc', '0,1,2', ascii(('abc', 'a', 'c'))), + ('a+b+c', 'aabbabc', '0', ascii('abc')), + ('a{1,}b{1,}c', 'aabbabc', '0', ascii('abc')), + ('a**', '-', '', regex.error, self.MULTIPLE_REPEAT), + ('a.+?c', 'abcabc', '0', ascii('abc')), + ('(a+|b)*', 'ab', '0,1', ascii(('ab', 'b'))), + ('(a+|b){0,}', 'ab', '0,1', ascii(('ab', 'b'))), + + ('(a+|b)+', 'ab', '0,1', ascii(('ab', 'b'))), + ('(a+|b){1,}', 'ab', '0,1', ascii(('ab', 'b'))), + ('(a+|b)?', 'ab', '0,1', ascii(('a', 'a'))), + ('(a+|b){0,1}', 'ab', '0,1', ascii(('a', 'a'))), + (')(', '-', '', regex.error, self.TRAILING_CHARS), + ('[^ab]*', 'cde', '0', ascii('cde')), + ('abc', '', '', ascii(None)), + ('a*', '', '0', ascii('')), + ('([abc])*d', 'abbbcd', '0,1', ascii(('abbbcd', 'c'))), + ('([abc])*bcd', 'abcd', '0,1', ascii(('abcd', 'a'))), + + ('a|b|c|d|e', 'e', '0', ascii('e')), + ('(a|b|c|d|e)f', 'ef', '0,1', ascii(('ef', 'e'))), + ('abcd*efg', 'abcdefg', '0', ascii('abcdefg')), + ('ab*', 'xabyabbbz', '0', ascii('ab')), + ('ab*', 'xayabbbz', '0', ascii('a')), + ('(ab|cd)e', 'abcde', '0,1', ascii(('cde', 'cd'))), + ('[abhgefdc]ij', 'hij', '0', ascii('hij')), + ('^(ab|cd)e', 'abcde', '', ascii(None)), + ('(abc|)ef', 'abcdef', '0,1', ascii(('ef', ''))), + ('(a|b)c*d', 'abcd', '0,1', ascii(('bcd', 'b'))), + + ('(ab|ab*)bc', 'abc', '0,1', ascii(('abc', 'a'))), + ('a([bc]*)c*', 'abc', '0,1', ascii(('abc', 'bc'))), + ('a([bc]*)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))), + ('a([bc]+)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))), + ('a([bc]*)(c+d)', 'abcd', '0,1,2', ascii(('abcd', 'b', 'cd'))), + ('a[bcd]*dcdcde', 'adcdcde', '0', ascii('adcdcde')), + ('a[bcd]+dcdcde', 'adcdcde', '', ascii(None)), + ('(ab|a)b*c', 'abc', '0,1', ascii(('abc', 'ab'))), + ('((a)(b)c)(d)', 'abcd', '1,2,3,4', ascii(('abc', 'a', 'b', 'd'))), + ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', ascii('alpha')), + + ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', ascii(('bh', None))), + ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', ascii(('effgz', + 'effgz', None))), + ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', ascii(('ij', 'ij', + 'j'))), + ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', ascii(None)), + ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', ascii(None)), + ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', ascii(('effgz', + 'effgz', None))), + ('((((((((((a))))))))))', 'a', '10', ascii('a')), + ('((((((((((a))))))))))\\10', 'aa', '0', ascii('aa')), + + # Python does not have the same rules for \\41 so this is a syntax error + # ('((((((((((a))))))))))\\41', 'aa', '', ascii(None)), + # ('((((((((((a))))))))))\\41', 'a!', '0', ascii('a!')), + ('((((((((((a))))))))))\\41', '', '', regex.error, + self.INVALID_GROUP_REF), + ('(?i)((((((((((a))))))))))\\41', '', '', regex.error, + self.INVALID_GROUP_REF), + + ('(((((((((a)))))))))', 'a', '0', ascii('a')), + ('multiple words of text', 'uh-uh', '', ascii(None)), + ('multiple words', 'multiple words, yeah', '0', + ascii('multiple words')), + ('(.*)c(.*)', 'abcde', '0,1,2', ascii(('abcde', 'ab', 'de'))), + ('\\((.*), (.*)\\)', '(a, b)', '2,1', ascii(('b', 'a'))), + ('[k]', 'ab', '', ascii(None)), + ('a[-]?c', 'ac', '0', ascii('ac')), + ('(abc)\\1', 'abcabc', '1', ascii('abc')), + ('([a-c]*)\\1', 'abcabc', '1', ascii('abc')), + ('(?i)abc', 'ABC', '0', ascii('ABC')), + + ('(?i)abc', 'XBC', '', ascii(None)), + ('(?i)abc', 'AXC', '', ascii(None)), + ('(?i)abc', 'ABX', '', ascii(None)), + ('(?i)abc', 'XABCY', '0', ascii('ABC')), + ('(?i)abc', 'ABABC', '0', ascii('ABC')), + ('(?i)ab*c', 'ABC', '0', ascii('ABC')), + ('(?i)ab*bc', 'ABC', '0', ascii('ABC')), + ('(?i)ab*bc', 'ABBC', '0', ascii('ABBC')), + ('(?i)ab*?bc', 'ABBBBC', '0', ascii('ABBBBC')), + ('(?i)ab{0,}?bc', 'ABBBBC', '0', ascii('ABBBBC')), + + ('(?i)ab+?bc', 'ABBC', '0', ascii('ABBC')), + ('(?i)ab+bc', 'ABC', '', ascii(None)), + ('(?i)ab+bc', 'ABQ', '', ascii(None)), + ('(?i)ab{1,}bc', 'ABQ', '', ascii(None)), + ('(?i)ab+bc', 'ABBBBC', '0', ascii('ABBBBC')), + ('(?i)ab{1,}?bc', 'ABBBBC', '0', ascii('ABBBBC')), + ('(?i)ab{1,3}?bc', 'ABBBBC', '0', ascii('ABBBBC')), + ('(?i)ab{3,4}?bc', 'ABBBBC', '0', ascii('ABBBBC')), + ('(?i)ab{4,5}?bc', 'ABBBBC', '', ascii(None)), + ('(?i)ab??bc', 'ABBC', '0', ascii('ABBC')), + + ('(?i)ab??bc', 'ABC', '0', ascii('ABC')), + ('(?i)ab{0,1}?bc', 'ABC', '0', ascii('ABC')), + ('(?i)ab??bc', 'ABBBBC', '', ascii(None)), + ('(?i)ab??c', 'ABC', '0', ascii('ABC')), + ('(?i)ab{0,1}?c', 'ABC', '0', ascii('ABC')), + ('(?i)^abc$', 'ABC', '0', ascii('ABC')), + ('(?i)^abc$', 'ABCC', '', ascii(None)), + ('(?i)^abc', 'ABCC', '0', ascii('ABC')), + ('(?i)^abc$', 'AABC', '', ascii(None)), + ('(?i)abc$', 'AABC', '0', ascii('ABC')), + + ('(?i)^', 'ABC', '0', ascii('')), + ('(?i)$', 'ABC', '0', ascii('')), + ('(?i)a.c', 'ABC', '0', ascii('ABC')), + ('(?i)a.c', 'AXC', '0', ascii('AXC')), + ('(?i)a.*?c', 'AXYZC', '0', ascii('AXYZC')), + ('(?i)a.*c', 'AXYZD', '', ascii(None)), + ('(?i)a[bc]d', 'ABC', '', ascii(None)), + ('(?i)a[bc]d', 'ABD', '0', ascii('ABD')), + ('(?i)a[b-d]e', 'ABD', '', ascii(None)), + ('(?i)a[b-d]e', 'ACE', '0', ascii('ACE')), + + ('(?i)a[b-d]', 'AAC', '0', ascii('AC')), + ('(?i)a[-b]', 'A-', '0', ascii('A-')), + ('(?i)a[b-]', 'A-', '0', ascii('A-')), + ('(?i)a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE), + ('(?i)a[]b', '-', '', regex.error, self.BAD_SET), + ('(?i)a[', '-', '', regex.error, self.BAD_SET), + ('(?i)a]', 'A]', '0', ascii('A]')), + ('(?i)a[]]b', 'A]B', '0', ascii('A]B')), + ('(?i)a[^bc]d', 'AED', '0', ascii('AED')), + ('(?i)a[^bc]d', 'ABD', '', ascii(None)), + + ('(?i)a[^-b]c', 'ADC', '0', ascii('ADC')), + ('(?i)a[^-b]c', 'A-C', '', ascii(None)), + ('(?i)a[^]b]c', 'A]C', '', ascii(None)), + ('(?i)a[^]b]c', 'ADC', '0', ascii('ADC')), + ('(?i)ab|cd', 'ABC', '0', ascii('AB')), + ('(?i)ab|cd', 'ABCD', '0', ascii('AB')), + ('(?i)()ef', 'DEF', '0,1', ascii(('EF', ''))), + ('(?i)*a', '-', '', regex.error, self.NOTHING_TO_REPEAT), + ('(?i)(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT), + ('(?i)$b', 'B', '', ascii(None)), + + ('(?i)a\\', '-', '', regex.error, self.BAD_ESCAPE), + ('(?i)a\\(b', 'A(B', '', ascii(('A(B',))), + ('(?i)a\\(*b', 'AB', '0', ascii('AB')), + ('(?i)a\\(*b', 'A((B', '0', ascii('A((B')), + ('(?i)a\\\\b', 'A\\B', '0', ascii('A\\B')), + ('(?i)abc)', '-', '', regex.error, self.TRAILING_CHARS), + ('(?i)(abc', '-', '', regex.error, self.MISSING_RPAREN), + ('(?i)((a))', 'ABC', '0,1,2', ascii(('A', 'A', 'A'))), + ('(?i)(a)b(c)', 'ABC', '0,1,2', ascii(('ABC', 'A', 'C'))), + ('(?i)a+b+c', 'AABBABC', '0', ascii('ABC')), + + ('(?i)a{1,}b{1,}c', 'AABBABC', '0', ascii('ABC')), + ('(?i)a**', '-', '', regex.error, self.MULTIPLE_REPEAT), + ('(?i)a.+?c', 'ABCABC', '0', ascii('ABC')), + ('(?i)a.*?c', 'ABCABC', '0', ascii('ABC')), + ('(?i)a.{0,5}?c', 'ABCABC', '0', ascii('ABC')), + ('(?i)(a+|b)*', 'AB', '0,1', ascii(('AB', 'B'))), + ('(?i)(a+|b){0,}', 'AB', '0,1', ascii(('AB', 'B'))), + ('(?i)(a+|b)+', 'AB', '0,1', ascii(('AB', 'B'))), + ('(?i)(a+|b){1,}', 'AB', '0,1', ascii(('AB', 'B'))), + ('(?i)(a+|b)?', 'AB', '0,1', ascii(('A', 'A'))), + + ('(?i)(a+|b){0,1}', 'AB', '0,1', ascii(('A', 'A'))), + ('(?i)(a+|b){0,1}?', 'AB', '0,1', ascii(('', None))), + ('(?i))(', '-', '', regex.error, self.TRAILING_CHARS), + ('(?i)[^ab]*', 'CDE', '0', ascii('CDE')), + ('(?i)abc', '', '', ascii(None)), + ('(?i)a*', '', '0', ascii('')), + ('(?i)([abc])*d', 'ABBBCD', '0,1', ascii(('ABBBCD', 'C'))), + ('(?i)([abc])*bcd', 'ABCD', '0,1', ascii(('ABCD', 'A'))), + ('(?i)a|b|c|d|e', 'E', '0', ascii('E')), + ('(?i)(a|b|c|d|e)f', 'EF', '0,1', ascii(('EF', 'E'))), + + ('(?i)abcd*efg', 'ABCDEFG', '0', ascii('ABCDEFG')), + ('(?i)ab*', 'XABYABBBZ', '0', ascii('AB')), + ('(?i)ab*', 'XAYABBBZ', '0', ascii('A')), + ('(?i)(ab|cd)e', 'ABCDE', '0,1', ascii(('CDE', 'CD'))), + ('(?i)[abhgefdc]ij', 'HIJ', '0', ascii('HIJ')), + ('(?i)^(ab|cd)e', 'ABCDE', '', ascii(None)), + ('(?i)(abc|)ef', 'ABCDEF', '0,1', ascii(('EF', ''))), + ('(?i)(a|b)c*d', 'ABCD', '0,1', ascii(('BCD', 'B'))), + ('(?i)(ab|ab*)bc', 'ABC', '0,1', ascii(('ABC', 'A'))), + ('(?i)a([bc]*)c*', 'ABC', '0,1', ascii(('ABC', 'BC'))), + + ('(?i)a([bc]*)(c*d)', 'ABCD', '0,1,2', ascii(('ABCD', 'BC', 'D'))), + ('(?i)a([bc]+)(c*d)', 'ABCD', '0,1,2', ascii(('ABCD', 'BC', 'D'))), + ('(?i)a([bc]*)(c+d)', 'ABCD', '0,1,2', ascii(('ABCD', 'B', 'CD'))), + ('(?i)a[bcd]*dcdcde', 'ADCDCDE', '0', ascii('ADCDCDE')), + ('(?i)a[bcd]+dcdcde', 'ADCDCDE', '', ascii(None)), + ('(?i)(ab|a)b*c', 'ABC', '0,1', ascii(('ABC', 'AB'))), + ('(?i)((a)(b)c)(d)', 'ABCD', '1,2,3,4', ascii(('ABC', 'A', 'B', + 'D'))), + ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', '0', ascii('ALPHA')), + ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', '0,1', ascii(('BH', None))), + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', '0,1,2', ascii(('EFFGZ', + 'EFFGZ', None))), + + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', '0,1,2', ascii(('IJ', 'IJ', + 'J'))), + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', '', ascii(None)), + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', '', ascii(None)), + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', '0,1,2', ascii(('EFFGZ', + 'EFFGZ', None))), + ('(?i)((((((((((a))))))))))', 'A', '10', ascii('A')), + ('(?i)((((((((((a))))))))))\\10', 'AA', '0', ascii('AA')), + #('(?i)((((((((((a))))))))))\\41', 'AA', '', ascii(None)), + #('(?i)((((((((((a))))))))))\\41', 'A!', '0', ascii('A!')), + ('(?i)(((((((((a)))))))))', 'A', '0', ascii('A')), + ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', '1', + ascii('A')), + ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', '1', + ascii('C')), + ('(?i)multiple words of text', 'UH-UH', '', ascii(None)), + + ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', '0', + ascii('MULTIPLE WORDS')), + ('(?i)(.*)c(.*)', 'ABCDE', '0,1,2', ascii(('ABCDE', 'AB', 'DE'))), + ('(?i)\\((.*), (.*)\\)', '(A, B)', '2,1', ascii(('B', 'A'))), + ('(?i)[k]', 'AB', '', ascii(None)), + # ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', ascii(ABCD-$&-\\ABCD)), + # ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', ascii(BC-$1-\\BC)), + ('(?i)a[-]?c', 'AC', '0', ascii('AC')), + ('(?i)(abc)\\1', 'ABCABC', '1', ascii('ABC')), + ('(?i)([a-c]*)\\1', 'ABCABC', '1', ascii('ABC')), + ('a(?!b).', 'abad', '0', ascii('ad')), + ('a(?=d).', 'abad', '0', ascii('ad')), + ('a(?=c|d).', 'abad', '0', ascii('ad')), + + ('a(?:b|c|d)(.)', 'ace', '1', ascii('e')), + ('a(?:b|c|d)*(.)', 'ace', '1', ascii('e')), + ('a(?:b|c|d)+?(.)', 'ace', '1', ascii('e')), + ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', '1,2', ascii(('c', 'e'))), + + # Lookbehind: split by : but not if it is escaped by -. + ('(?]*?b', 'a>b', '', ascii(None)), + # Bug 490573: minimizing repeat problem. + (r'^a*?$', 'foo', '', ascii(None)), + # Bug 470582: nested groups problem. + (r'^((a)c)?(ab)$', 'ab', '1,2,3', ascii((None, None, 'ab'))), + # Another minimizing repeat problem (capturing groups in assertions). + ('^([ab]*?)(?=(b)?)c', 'abc', '1,2', ascii(('ab', None))), + ('^([ab]*?)(?!(b))c', 'abc', '1,2', ascii(('ab', None))), + ('^([ab]*?)(?(.){0,2})d", "abcd").captures(1), + ['b', 'c']) + self.assertEqual(regex.search(r"(.)+", "a").captures(1), ['a']) + + def test_guards(self): + m = regex.search(r"(X.*?Y\s*){3}(X\s*)+AB:", + "XY\nX Y\nX Y\nXY\nXX AB:") + self.assertEqual(m.span(0, 1, 2), ((3, 21), (12, 15), (16, 18))) + + m = regex.search(r"(X.*?Y\s*){3,}(X\s*)+AB:", + "XY\nX Y\nX Y\nXY\nXX AB:") + self.assertEqual(m.span(0, 1, 2), ((0, 21), (12, 15), (16, 18))) + + m = regex.search(r'\d{4}(\s*\w)?\W*((?!\d)\w){2}', "9999XX") + self.assertEqual(m.span(0, 1, 2), ((0, 6), (-1, -1), (5, 6))) + + m = regex.search(r'A\s*?.*?(\n+.*?\s*?){0,2}\(X', 'A\n1\nS\n1 (X') + self.assertEqual(m.span(0, 1), ((0, 10), (5, 8))) + + m = regex.search(r'Derde\s*:', 'aaaaaa:\nDerde:') + self.assertEqual(m.span(), (8, 14)) + m = regex.search(r'Derde\s*:', 'aaaaa:\nDerde:') + self.assertEqual(m.span(), (7, 13)) + + def test_turkic(self): + # Turkish has dotted and dotless I/i. + pairs = "I=i;I=\u0131;i=\u0130" + + all_chars = set() + matching = set() + for pair in pairs.split(";"): + ch1, ch2 = pair.split("=") + all_chars.update((ch1, ch2)) + matching.add((ch1, ch1)) + matching.add((ch1, ch2)) + matching.add((ch2, ch1)) + matching.add((ch2, ch2)) + + for ch1 in all_chars: + for ch2 in all_chars: + m = regex.match(r"(?i)\A" + ch1 + r"\Z", ch2) + if m: + if (ch1, ch2) not in matching: + self.fail("{} matching {}".format(ascii(ch1), + ascii(ch2))) + else: + if (ch1, ch2) in matching: + self.fail("{} not matching {}".format(ascii(ch1), + ascii(ch2))) + + def test_named_lists(self): + options = ["one", "two", "three"] + self.assertEqual(regex.match(r"333\L444", "333one444", + bar=options).group(), "333one444") + self.assertEqual(regex.match(r"(?i)333\L444", "333TWO444", + bar=options).group(), "333TWO444") + self.assertEqual(regex.match(r"333\L444", "333four444", + bar=options), None) + + options = [b"one", b"two", b"three"] + self.assertEqual(regex.match(br"333\L444", b"333one444", + bar=options).group(), b"333one444") + self.assertEqual(regex.match(br"(?i)333\L444", b"333TWO444", + bar=options).group(), b"333TWO444") + self.assertEqual(regex.match(br"333\L444", b"333four444", + bar=options), None) + + self.assertEqual(repr(type(regex.compile(r"3\L4\L+5", + bar=["one", "two", "three"]))), self.PATTERN_CLASS) + + self.assertEqual(regex.findall(r"^\L", "solid QWERT", + options=set(['good', 'brilliant', '+s\\ol[i}d'])), []) + self.assertEqual(regex.findall(r"^\L", "+solid QWERT", + options=set(['good', 'brilliant', '+solid'])), ['+solid']) + + options = ["STRASSE"] + self.assertEqual(regex.match(r"(?fi)\L", + "stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0, + 6)) + + options = ["STRASSE", "stress"] + self.assertEqual(regex.match(r"(?fi)\L", + "stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0, + 6)) + + options = ["stra\N{LATIN SMALL LETTER SHARP S}e"] + self.assertEqual(regex.match(r"(?fi)\L", "STRASSE", + words=options).span(), (0, 7)) + + options = ["kit"] + self.assertEqual(regex.search(r"(?i)\L", "SKITS", + words=options).span(), (1, 4)) + self.assertEqual(regex.search(r"(?i)\L", + "SK\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}TS", + words=options).span(), (1, 4)) + + self.assertEqual(regex.search(r"(?fi)\b(\w+) +\1\b", + " stra\N{LATIN SMALL LETTER SHARP S}e STRASSE ").span(), (1, 15)) + self.assertEqual(regex.search(r"(?fi)\b(\w+) +\1\b", + " STRASSE stra\N{LATIN SMALL LETTER SHARP S}e ").span(), (1, 15)) + + self.assertEqual(regex.search(r"^\L$", "", options=[]).span(), + (0, 0)) + + def test_fuzzy(self): + # Some tests borrowed from TRE library tests. + self.assertEqual(repr(type(regex.compile('(fou){s,e<=1}'))), + self.PATTERN_CLASS) + self.assertEqual(repr(type(regex.compile('(fuu){s}'))), + self.PATTERN_CLASS) + self.assertEqual(repr(type(regex.compile('(fuu){s,e}'))), + self.PATTERN_CLASS) + self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1}'))), + self.PATTERN_CLASS) + self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1,e<=10}'))), + self.PATTERN_CLASS) + self.assertEqual(repr(type(regex.compile('(anaconda){s<=1,e<=1,1i+1d<1}'))), + self.PATTERN_CLASS) + + text = 'molasses anaconda foo bar baz smith anderson ' + self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<1}', text), + None) + self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<2}', + text).span(0, 1), ((9, 17), (9, 17))) + self.assertEqual(regex.search('(ananda){1i+1d<2}', text), None) + self.assertEqual(regex.search(r"(?:\bznacnda){e<=2}", text)[0], + "anaconda") + self.assertEqual(regex.search(r"(?:\bnacnda){e<=2}", text)[0], + "anaconda") + + text = 'anaconda foo bar baz smith anderson' + self.assertEqual(regex.search('(fuu){i<=3,d<=3,e<=5}', text).span(0, + 1), ((0, 0), (0, 0))) + self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e<=5}', + text).span(0, 1), ((9, 10), (9, 10))) + self.assertEqual(regex.search('(fuu){i<=2,d<=2,e<=5}', text).span(0, + 1), ((7, 10), (7, 10))) + self.assertEqual(regex.search('(?e)(fuu){i<=2,d<=2,e<=5}', + text).span(0, 1), ((9, 10), (9, 10))) + self.assertEqual(regex.search('(fuu){i<=3,d<=3,e}', text).span(0, 1), + ((0, 0), (0, 0))) + self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e}', text).span(0, + 1), ((9, 10), (9, 10))) + + self.assertEqual(repr(type(regex.compile('(approximate){s<=3,1i+1d<3}'))), + self.PATTERN_CLASS) + + # No cost limit. + self.assertEqual(regex.search('(foobar){e}', + 'xirefoabralfobarxie').span(0, 1), ((0, 6), (0, 6))) + self.assertEqual(regex.search('(?e)(foobar){e}', + 'xirefoabralfobarxie').span(0, 1), ((0, 3), (0, 3))) + self.assertEqual(regex.search('(?b)(foobar){e}', + 'xirefoabralfobarxie').span(0, 1), ((11, 16), (11, 16))) + + # At most two errors. + self.assertEqual(regex.search('(foobar){e<=2}', + 'xirefoabrzlfd').span(0, 1), ((4, 9), (4, 9))) + self.assertEqual(regex.search('(foobar){e<=2}', 'xirefoabzlfd'), None) + + # At most two inserts or substitutions and max two errors total. + self.assertEqual(regex.search('(foobar){i<=2,s<=2,e<=2}', + 'oobargoobaploowap').span(0, 1), ((5, 11), (5, 11))) + + # Find best whole word match for "foobar". + self.assertEqual(regex.search('\\b(foobar){e}\\b', 'zfoobarz').span(0, + 1), ((0, 8), (0, 8))) + self.assertEqual(regex.search('\\b(foobar){e}\\b', + 'boing zfoobarz goobar woop').span(0, 1), ((0, 6), (0, 6))) + self.assertEqual(regex.search('(?b)\\b(foobar){e}\\b', + 'boing zfoobarz goobar woop').span(0, 1), ((15, 21), (15, 21))) + + # Match whole string, allow only 1 error. + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobar').span(0, 1), + ((0, 6), (0, 6))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobar').span(0, + 1), ((0, 7), (0, 7))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarx').span(0, + 1), ((0, 7), (0, 7))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooxbar').span(0, + 1), ((0, 7), (0, 7))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbar').span(0, 1), + ((0, 6), (0, 6))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'xoobar').span(0, 1), + ((0, 6), (0, 6))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobax').span(0, 1), + ((0, 6), (0, 6))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'oobar').span(0, 1), + ((0, 5), (0, 5))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'fobar').span(0, 1), + ((0, 5), (0, 5))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooba').span(0, 1), + ((0, 5), (0, 5))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobarx'), None) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarxx'), None) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'xxfoobar'), None) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoxbar'), None) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbarx'), None) + + # At most one insert, two deletes, and three substitutions. + # Additionally, deletes cost two and substitutes one, and total + # cost must be less than 4. + self.assertEqual(regex.search('(foobar){i<=1,d<=2,s<=3,2d+1s<4}', + '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((6, 13), (6, + 13))) + self.assertEqual(regex.search('(?b)(foobar){i<=1,d<=2,s<=3,2d+1s<4}', + '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((34, 39), + (34, 39))) + + # Partially fuzzy matches. + self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobarzap').span(0, + 1), ((0, 9), (3, 6))) + self.assertEqual(regex.search('foo(bar){e<=1}zap', 'fobarzap'), None) + self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobrzap').span(0, + 1), ((0, 8), (3, 5))) + + text = ('www.cnn.com 64.236.16.20\nwww.slashdot.org 66.35.250.150\n' + 'For useful information, use www.slashdot.org\nthis is demo data!\n') + self.assertEqual(regex.search(r'(?s)^.*(dot.org){e}.*$', text).span(0, + 1), ((0, 120), (120, 120))) + self.assertEqual(regex.search(r'(?es)^.*(dot.org){e}.*$', text).span(0, + 1), ((0, 120), (93, 100))) + self.assertEqual(regex.search(r'^.*(dot.org){e}.*$', text).span(0, 1), + ((0, 119), (24, 101))) + + # Behaviour is unexpected, but arguably not wrong. It first finds the + # best match, then the best in what follows, etc. + self.assertEqual(regex.findall(r"\b\L{e<=1}\b", + " book cot dog desk ", words="cat dog".split()), ["cot", "dog"]) + self.assertEqual(regex.findall(r"\b\L{e<=1}\b", + " book dog cot desk ", words="cat dog".split()), [" dog", "cot"]) + self.assertEqual(regex.findall(r"(?e)\b\L{e<=1}\b", + " book dog cot desk ", words="cat dog".split()), ["dog", "cot"]) + self.assertEqual(regex.findall(r"(?r)\b\L{e<=1}\b", + " book cot dog desk ", words="cat dog".split()), ["dog ", "cot"]) + self.assertEqual(regex.findall(r"(?er)\b\L{e<=1}\b", + " book cot dog desk ", words="cat dog".split()), ["dog", "cot"]) + self.assertEqual(regex.findall(r"(?r)\b\L{e<=1}\b", + " book dog cot desk ", words="cat dog".split()), ["cot", "dog"]) + self.assertEqual(regex.findall(br"\b\L{e<=1}\b", + b" book cot dog desk ", words=b"cat dog".split()), [b"cot", b"dog"]) + self.assertEqual(regex.findall(br"\b\L{e<=1}\b", + b" book dog cot desk ", words=b"cat dog".split()), [b" dog", b"cot"]) + self.assertEqual(regex.findall(br"(?e)\b\L{e<=1}\b", + b" book dog cot desk ", words=b"cat dog".split()), [b"dog", b"cot"]) + self.assertEqual(regex.findall(br"(?r)\b\L{e<=1}\b", + b" book cot dog desk ", words=b"cat dog".split()), [b"dog ", b"cot"]) + self.assertEqual(regex.findall(br"(?er)\b\L{e<=1}\b", + b" book cot dog desk ", words=b"cat dog".split()), [b"dog", b"cot"]) + self.assertEqual(regex.findall(br"(?r)\b\L{e<=1}\b", + b" book dog cot desk ", words=b"cat dog".split()), [b"cot", b"dog"]) + + self.assertEqual(regex.search(r"(\w+) (\1{e<=1})", "foo fou").groups(), + ("foo", "fou")) + self.assertEqual(regex.search(r"(?r)(\2{e<=1}) (\w+)", + "foo fou").groups(), ("foo", "fou")) + self.assertEqual(regex.search(br"(\w+) (\1{e<=1})", + b"foo fou").groups(), (b"foo", b"fou")) + + self.assertEqual(regex.findall(r"(?:(?:QR)+){e}", "abcde"), ["abcde", + ""]) + self.assertEqual(regex.findall(r"(?:Q+){e}", "abc"), ["abc", ""]) + + # Hg issue 41: = for fuzzy matches + self.assertEqual(regex.match(r"(?:service detection){0[^()]+)|(?R))*\)", "(ab(cd)ef)")[ + : ], ("(ab(cd)ef)", "ef")) + self.assertEqual(regex.search(r"\(((?>[^()]+)|(?R))*\)", + "(ab(cd)ef)").captures(1), ["ab", "cd", "(cd)", "ef"]) + + self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)", + "(ab(cd)ef)")[ : ], ("(ab(cd)ef)", "ab")) + self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)", + "(ab(cd)ef)").captures(1), ["ef", "cd", "(cd)", "ab"]) + + self.assertEqual(regex.search(r"\(([^()]+|(?R))*\)", + "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "e")) + + self.assertEqual(regex.search(r"(?r)\(((?R)|[^()]+)*\)", + "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "a")) + + self.assertEqual(regex.search(r"(foo(\(((?:(?>[^()]+)|(?2))*)\)))", + "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))", + "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))", + "bar(baz)+baz(bop)")) + + self.assertEqual(regex.search(r"(?r)(foo(\(((?:(?2)|(?>[^()]+))*)\)))", + "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))", + "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))", + "bar(baz)+baz(bop)")) + + rgx = regex.compile(r"""^\s*(<\s*([a-zA-Z:]+)(?:\s*[a-zA-Z:]*\s*=\s*(?:'[^']*'|"[^"]*"))*\s*(/\s*)?>(?:[^<>]*|(?1))*(?(3)|<\s*/\s*\2\s*>))\s*$""") + self.assertEqual(bool(rgx.search('')), True) + self.assertEqual(bool(rgx.search('')), False) + self.assertEqual(bool(rgx.search('')), True) + self.assertEqual(bool(rgx.search('')), False) + self.assertEqual(bool(rgx.search('')), False) + + self.assertEqual(bool(rgx.search('')), False) + self.assertEqual(bool(rgx.search('')), True) + self.assertEqual(bool(rgx.search('< fooo / >')), True) + # The next regex should and does match. Perl 5.14 agrees. + #self.assertEqual(bool(rgx.search('foo')), False) + self.assertEqual(bool(rgx.search('foo')), False) + + self.assertEqual(bool(rgx.search('foo')), True) + self.assertEqual(bool(rgx.search('foo')), True) + self.assertEqual(bool(rgx.search('')), True) + + def test_copy(self): + # PatternObjects are immutable, therefore there's no need to clone them. + r = regex.compile("a") + self.assertTrue(copy.copy(r) is r) + self.assertTrue(copy.deepcopy(r) is r) + + # MatchObjects are normally mutable because the target string can be + # detached. However, after the target string has been detached, a + # MatchObject becomes immutable, so there's no need to clone it. + m = r.match("a") + self.assertTrue(copy.copy(m) is not m) + self.assertTrue(copy.deepcopy(m) is not m) + + self.assertTrue(m.string is not None) + m2 = copy.copy(m) + m2.detach_string() + self.assertTrue(m.string is not None) + self.assertTrue(m2.string is None) + + # The following behaviour matches that of the re module. + it = regex.finditer(".", "ab") + it2 = copy.copy(it) + self.assertEqual(next(it).group(), "a") + self.assertEqual(next(it2).group(), "b") + + # The following behaviour matches that of the re module. + it = regex.finditer(".", "ab") + it2 = copy.deepcopy(it) + self.assertEqual(next(it).group(), "a") + self.assertEqual(next(it2).group(), "b") + + # The following behaviour is designed to match that of copying 'finditer'. + it = regex.splititer(" ", "a b") + it2 = copy.copy(it) + self.assertEqual(next(it), "a") + self.assertEqual(next(it2), "b") + + # The following behaviour is designed to match that of copying 'finditer'. + it = regex.splititer(" ", "a b") + it2 = copy.deepcopy(it) + self.assertEqual(next(it), "a") + self.assertEqual(next(it2), "b") + + def test_format(self): + self.assertEqual(regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", + "foo bar"), "foo bar => bar foo") + self.assertEqual(regex.subf(r"(?\w+) (?\w+)", + "{word2} {word1}", "foo bar"), "bar foo") + + self.assertEqual(regex.subfn(r"(\w+) (\w+)", "{0} => {2} {1}", + "foo bar"), ("foo bar => bar foo", 1)) + self.assertEqual(regex.subfn(r"(?\w+) (?\w+)", + "{word2} {word1}", "foo bar"), ("bar foo", 1)) + + self.assertEqual(regex.match(r"(\w+) (\w+)", + "foo bar").expandf("{0} => {2} {1}"), "foo bar => bar foo") + + def test_fullmatch(self): + self.assertEqual(bool(regex.fullmatch(r"abc", "abc")), True) + self.assertEqual(bool(regex.fullmatch(r"abc", "abcx")), False) + self.assertEqual(bool(regex.fullmatch(r"abc", "abcx", endpos=3)), True) + + self.assertEqual(bool(regex.fullmatch(r"abc", "xabc", pos=1)), True) + self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1)), False) + self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1, + endpos=4)), True) + + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abc")), True) + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx")), False) + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx", endpos=3)), + True) + + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabc", pos=1)), + True) + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1)), + False) + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1, + endpos=4)), True) + + def test_issue_18468(self): + self.assertTypedEqual(regex.sub('y', 'a', 'xyz'), 'xaz') + self.assertTypedEqual(regex.sub('y', StrSubclass('a'), + StrSubclass('xyz')), 'xaz') + self.assertTypedEqual(regex.sub(b'y', b'a', b'xyz'), b'xaz') + self.assertTypedEqual(regex.sub(b'y', BytesSubclass(b'a'), + BytesSubclass(b'xyz')), b'xaz') + self.assertTypedEqual(regex.sub(b'y', bytearray(b'a'), + bytearray(b'xyz')), b'xaz') + self.assertTypedEqual(regex.sub(b'y', memoryview(b'a'), + memoryview(b'xyz')), b'xaz') + + for string in ":a:b::c", StrSubclass(":a:b::c"): + self.assertTypedEqual(regex.split(":", string), ['', 'a', 'b', '', + 'c']) + if sys.version_info >= (3, 7, 0): + self.assertTypedEqual(regex.split(":*", string), ['', '', 'a', + '', 'b', '', 'c', '']) + self.assertTypedEqual(regex.split("(:*)", string), ['', ':', + '', '', 'a', ':', '', '', 'b', '::', '', '', 'c', '', '']) + else: + self.assertTypedEqual(regex.split(":*", string), ['', 'a', 'b', + 'c']) + self.assertTypedEqual(regex.split("(:*)", string), ['', ':', + 'a', ':', 'b', '::', 'c']) + + for string in (b":a:b::c", BytesSubclass(b":a:b::c"), + bytearray(b":a:b::c"), memoryview(b":a:b::c")): + self.assertTypedEqual(regex.split(b":", string), [b'', b'a', b'b', + b'', b'c']) + if sys.version_info >= (3, 7, 0): + self.assertTypedEqual(regex.split(b":*", string), [b'', b'', + b'a', b'', b'b', b'', b'c', b'']) + self.assertTypedEqual(regex.split(b"(:*)", string), [b'', b':', + b'', b'', b'a', b':', b'', b'', b'b', b'::', b'', b'', b'c', + b'', b'']) + else: + self.assertTypedEqual(regex.split(b":*", string), [b'', b'a', + b'b', b'c']) + self.assertTypedEqual(regex.split(b"(:*)", string), [b'', b':', + b'a', b':', b'b', b'::', b'c']) + + for string in "a:b::c:::d", StrSubclass("a:b::c:::d"): + self.assertTypedEqual(regex.findall(":+", string), [":", "::", + ":::"]) + self.assertTypedEqual(regex.findall("(:+)", string), [":", "::", + ":::"]) + self.assertTypedEqual(regex.findall("(:)(:*)", string), [(":", ""), + (":", ":"), (":", "::")]) + + for string in (b"a:b::c:::d", BytesSubclass(b"a:b::c:::d"), + bytearray(b"a:b::c:::d"), memoryview(b"a:b::c:::d")): + self.assertTypedEqual(regex.findall(b":+", string), [b":", b"::", + b":::"]) + self.assertTypedEqual(regex.findall(b"(:+)", string), [b":", b"::", + b":::"]) + self.assertTypedEqual(regex.findall(b"(:)(:*)", string), [(b":", + b""), (b":", b":"), (b":", b"::")]) + + for string in 'a', StrSubclass('a'): + self.assertEqual(regex.match('a', string).groups(), ()) + self.assertEqual(regex.match('(a)', string).groups(), ('a',)) + self.assertEqual(regex.match('(a)', string).group(0), 'a') + self.assertEqual(regex.match('(a)', string).group(1), 'a') + self.assertEqual(regex.match('(a)', string).group(1, 1), ('a', + 'a')) + + for string in (b'a', BytesSubclass(b'a'), bytearray(b'a'), + memoryview(b'a')): + self.assertEqual(regex.match(b'a', string).groups(), ()) + self.assertEqual(regex.match(b'(a)', string).groups(), (b'a',)) + self.assertEqual(regex.match(b'(a)', string).group(0), b'a') + self.assertEqual(regex.match(b'(a)', string).group(1), b'a') + self.assertEqual(regex.match(b'(a)', string).group(1, 1), (b'a', + b'a')) + + def test_partial(self): + self.assertEqual(regex.match('ab', 'a', partial=True).partial, True) + self.assertEqual(regex.match('ab', 'a', partial=True).span(), (0, 1)) + self.assertEqual(regex.match(r'cats', 'cat', partial=True).partial, + True) + self.assertEqual(regex.match(r'cats', 'cat', partial=True).span(), (0, + 3)) + self.assertEqual(regex.match(r'cats', 'catch', partial=True), None) + self.assertEqual(regex.match(r'abc\w{3}', 'abcdef', + partial=True).partial, False) + self.assertEqual(regex.match(r'abc\w{3}', 'abcdef', + partial=True).span(), (0, 6)) + self.assertEqual(regex.match(r'abc\w{3}', 'abcde', + partial=True).partial, True) + self.assertEqual(regex.match(r'abc\w{3}', 'abcde', + partial=True).span(), (0, 5)) + + self.assertEqual(regex.match(r'\d{4}$', '1234', partial=True).partial, + False) + + self.assertEqual(regex.match(r'\L', 'post', partial=True, + words=['post']).partial, False) + self.assertEqual(regex.match(r'\L', 'post', partial=True, + words=['post']).span(), (0, 4)) + self.assertEqual(regex.match(r'\L', 'pos', partial=True, + words=['post']).partial, True) + self.assertEqual(regex.match(r'\L', 'pos', partial=True, + words=['post']).span(), (0, 3)) + + self.assertEqual(regex.match(r'(?fi)\L', 'POST', partial=True, + words=['po\uFB06']).partial, False) + self.assertEqual(regex.match(r'(?fi)\L', 'POST', partial=True, + words=['po\uFB06']).span(), (0, 4)) + self.assertEqual(regex.match(r'(?fi)\L', 'POS', partial=True, + words=['po\uFB06']).partial, True) + self.assertEqual(regex.match(r'(?fi)\L', 'POS', partial=True, + words=['po\uFB06']).span(), (0, 3)) + self.assertEqual(regex.match(r'(?fi)\L', 'po\uFB06', + partial=True, words=['POS']), None) + + self.assertEqual(regex.match(r'[a-z]*4R$', 'a', partial=True).span(), + (0, 1)) + self.assertEqual(regex.match(r'[a-z]*4R$', 'ab', partial=True).span(), + (0, 2)) + self.assertEqual(regex.match(r'[a-z]*4R$', 'ab4', partial=True).span(), + (0, 3)) + self.assertEqual(regex.match(r'[a-z]*4R$', 'a4', partial=True).span(), + (0, 2)) + self.assertEqual(regex.match(r'[a-z]*4R$', 'a4R', partial=True).span(), + (0, 3)) + self.assertEqual(regex.match(r'[a-z]*4R$', '4a', partial=True), None) + self.assertEqual(regex.match(r'[a-z]*4R$', 'a44', partial=True), None) + + def test_hg_bugs(self): + # Hg issue 28: regex.compile("(?>b)") causes "TypeError: 'Character' + # object is not subscriptable" + self.assertEqual(bool(regex.compile("(?>b)", flags=regex.V1)), True) + + # Hg issue 29: regex.compile("^((?>\w+)|(?>\s+))*$") causes + # "TypeError: 'GreedyRepeat' object is not iterable" + self.assertEqual(bool(regex.compile(r"^((?>\w+)|(?>\s+))*$", + flags=regex.V1)), True) + + # Hg issue 31: atomic and normal groups in recursive patterns + self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)", + "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)']) + self.assertEqual(regex.findall(r"\((?:(?:[^()]+)|(?R))*\)", + "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)']) + self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)", + "a(b(cd)e)f)g)h"), ['(b(cd)e)']) + self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)", + "a(bc(d(e)f)gh"), ['(d(e)f)']) + self.assertEqual(regex.findall(r"(?r)\((?:(?>[^()]+)|(?R))*\)", + "a(bc(d(e)f)gh"), ['(d(e)f)']) + self.assertEqual([m.group() for m in + regex.finditer(r"\((?:[^()]*+|(?0))*\)", "a(b(c(de)fg)h")], + ['(c(de)fg)']) + + # Hg issue 32: regex.search("a(bc)d", "abcd", regex.I|regex.V1) returns + # None + self.assertEqual(regex.search("a(bc)d", "abcd", regex.I | + regex.V1).group(0), "abcd") + + # Hg issue 33: regex.search("([\da-f:]+)$", "E", regex.I|regex.V1) + # returns None + self.assertEqual(regex.search(r"([\da-f:]+)$", "E", regex.I | + regex.V1).group(0), "E") + self.assertEqual(regex.search(r"([\da-f:]+)$", "e", regex.I | + regex.V1).group(0), "e") + + # Hg issue 34: regex.search("^(?=ab(de))(abd)(e)", "abde").groups() + # returns (None, 'abd', 'e') instead of ('de', 'abd', 'e') + self.assertEqual(regex.search("^(?=ab(de))(abd)(e)", "abde").groups(), + ('de', 'abd', 'e')) + + # Hg issue 35: regex.compile("\ ", regex.X) causes "_regex_core.error: + # bad escape" + self.assertEqual(bool(regex.match(r"\ ", " ", flags=regex.X)), True) + + # Hg issue 36: regex.search("^(a|)\1{2}b", "b") returns None + self.assertEqual(regex.search(r"^(a|)\1{2}b", "b").group(0, 1), ('b', + '')) + + # Hg issue 37: regex.search("^(a){0,0}", "abc").group(0,1) returns + # ('a', 'a') instead of ('', None) + self.assertEqual(regex.search("^(a){0,0}", "abc").group(0, 1), ('', + None)) + + # Hg issue 38: regex.search("(?>.*/)b", "a/b") returns None + self.assertEqual(regex.search("(?>.*/)b", "a/b").group(0), "a/b") + + # Hg issue 39: regex.search("((?i)blah)\\s+\\1", "blah BLAH") doesn't + # return None + self.assertEqual(regex.search(r"(?V0)((?i)blah)\s+\1", + "blah BLAH").group(0, 1), ("blah BLAH", "blah")) + self.assertEqual(regex.search(r"(?V1)((?i)blah)\s+\1", "blah BLAH"), + None) + + # Hg issue 40: regex.search("(\()?[^()]+(?(1)\)|)", "(abcd").group(0) + # returns "bcd" instead of "abcd" + self.assertEqual(regex.search(r"(\()?[^()]+(?(1)\)|)", + "(abcd").group(0), "abcd") + + # Hg issue 42: regex.search("(a*)*", "a", flags=regex.V1).span(1) + # returns (0, 1) instead of (1, 1) + self.assertEqual(regex.search("(a*)*", "a").span(1), (1, 1)) + self.assertEqual(regex.search("(a*)*", "aa").span(1), (2, 2)) + self.assertEqual(regex.search("(a*)*", "aaa").span(1), (3, 3)) + + # Hg issue 43: regex.compile("a(?#xxx)*") causes "_regex_core.error: + # nothing to repeat" + self.assertEqual(regex.search("a(?#xxx)*", "aaa").group(), "aaa") + + # Hg issue 44: regex.compile("(?=abc){3}abc") causes + # "_regex_core.error: nothing to repeat" + self.assertEqual(regex.search("(?=abc){3}abc", "abcabcabc").span(), (0, + 3)) + + # Hg issue 45: regex.compile("^(?:a(?:(?:))+)+") causes + # "_regex_core.error: nothing to repeat" + self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "a").span(), (0, 1)) + self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "aa").span(), (0, 2)) + + # Hg issue 46: regex.compile("a(?x: b c )d") causes + # "_regex_core.error: missing )" + self.assertEqual(regex.search("a(?x: b c )d", "abcd").group(0), "abcd") + + # Hg issue 47: regex.compile("a#comment\n*", flags=regex.X) causes + # "_regex_core.error: nothing to repeat" + self.assertEqual(regex.search("a#comment\n*", "aaa", + flags=regex.X).group(0), "aaa") + + # Hg issue 48: regex.search("(a(?(1)\\1)){4}", "a"*10, + # flags=regex.V1).group(0,1) returns ('aaaaa', 'a') instead of ('aaaaaaaaaa', 'aaaa') + self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){1}", + "aaaaaaaaaa").span(0, 1), ((0, 1), (0, 1))) + self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){2}", + "aaaaaaaaaa").span(0, 1), ((0, 3), (1, 3))) + self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){3}", + "aaaaaaaaaa").span(0, 1), ((0, 6), (3, 6))) + self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){4}", + "aaaaaaaaaa").span(0, 1), ((0, 10), (6, 10))) + + # Hg issue 49: regex.search("(a)(?<=b(?1))", "baz", regex.V1) returns + # None incorrectly + self.assertEqual(regex.search("(?V1)(a)(?<=b(?1))", "baz").group(0), + "a") + + # Hg issue 50: not all keywords are found by named list with + # overlapping keywords when full Unicode casefolding is required + self.assertEqual(regex.findall(r'(?fi)\L', + 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05', + keywords=['post','pos']), ['POST', 'Post', 'post', 'po\u017Ft', + 'po\uFB06', 'po\uFB05']) + self.assertEqual(regex.findall(r'(?fi)pos|post', + 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POS', + 'Pos', 'pos', 'po\u017F', 'po\uFB06', 'po\uFB05']) + self.assertEqual(regex.findall(r'(?fi)post|pos', + 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POST', + 'Post', 'post', 'po\u017Ft', 'po\uFB06', 'po\uFB05']) + self.assertEqual(regex.findall(r'(?fi)post|another', + 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POST', + 'Post', 'post', 'po\u017Ft', 'po\uFB06', 'po\uFB05']) + + # Hg issue 51: regex.search("((a)(?1)|(?2))", "a", flags=regex.V1) + # returns None incorrectly + self.assertEqual(regex.search("(?V1)((a)(?1)|(?2))", "a").group(0, 1, + 2), ('a', 'a', None)) + + # Hg issue 52: regex.search("(\\1xx|){6}", "xx", + # flags=regex.V1).span(0,1) returns incorrect value + self.assertEqual(regex.search(r"(?V1)(\1xx|){6}", "xx").span(0, 1), + ((0, 2), (2, 2))) + + # Hg issue 53: regex.search("(a|)+", "a") causes MemoryError + self.assertEqual(regex.search("(a|)+", "a").group(0, 1), ("a", "")) + + # Hg issue 54: regex.search("(a|)*\\d", "a"*80) causes MemoryError + self.assertEqual(regex.search(r"(a|)*\d", "a" * 80), None) + + # Hg issue 55: regex.search("^(?:a?b?)*$", "ac") take a very long time. + self.assertEqual(regex.search("^(?:a?b?)*$", "ac"), None) + + # Hg issue 58: bad named character escape sequences like "\\N{1}" + # treats as "N" + self.assertRaisesRegex(regex.error, self.UNDEF_CHAR_NAME, lambda: + regex.compile("\\N{1}")) + + # Hg issue 59: regex.search("\\Z", "a\na\n") returns None incorrectly + self.assertEqual(regex.search("\\Z", "a\na\n").span(0), (4, 4)) + + # Hg issue 60: regex.search("(q1|.)*(q2|.)*(x(a|bc)*y){2,}", "xayxay") + # returns None incorrectly + self.assertEqual(regex.search("(q1|.)*(q2|.)*(x(a|bc)*y){2,}", + "xayxay").group(0), "xayxay") + + # Hg issue 61: regex.search("[^a]", "A", regex.I).group(0) returns '' + # incorrectly + self.assertEqual(regex.search("(?i)[^a]", "A"), None) + + # Hg issue 63: regex.search("[[:ascii:]]", "\N{KELVIN SIGN}", + # flags=regex.I|regex.V1) doesn't return None + self.assertEqual(regex.search("(?i)[[:ascii:]]", "\N{KELVIN SIGN}"), + None) + + # Hg issue 66: regex.search("((a|b(?1)c){3,5})", "baaaaca", + # flags=regex.V1).groups() returns ('baaaac', 'baaaac') instead of ('aaaa', 'a') + self.assertEqual(regex.search("((a|b(?1)c){3,5})", "baaaaca").group(0, + 1, 2), ('aaaa', 'aaaa', 'a')) + + # Hg issue 71: non-greedy quantifier in lookbehind + self.assertEqual(regex.findall(r"(?<=:\S+ )\w+", ":9 abc :10 def"), + ['abc', 'def']) + self.assertEqual(regex.findall(r"(?<=:\S* )\w+", ":9 abc :10 def"), + ['abc', 'def']) + self.assertEqual(regex.findall(r"(?<=:\S+? )\w+", ":9 abc :10 def"), + ['abc', 'def']) + self.assertEqual(regex.findall(r"(?<=:\S*? )\w+", ":9 abc :10 def"), + ['abc', 'def']) + + # Hg issue 73: conditional patterns + self.assertEqual(regex.search(r"(?:fe)?male", "female").group(), + "female") + self.assertEqual([m.group() for m in + regex.finditer(r"(fe)?male: h(?(1)(er)|(is)) (\w+)", + "female: her dog; male: his cat. asdsasda")], ['female: her dog', + 'male: his cat']) + + # Hg issue 78: "Captures"doesn't work for recursive calls + self.assertEqual(regex.search(r'(?\((?:[^()]++|(?&rec))*\))', + 'aaa(((1+0)+1)+1)bbb').captures('rec'), ['(1+0)', '((1+0)+1)', + '(((1+0)+1)+1)']) + + # Hg issue 80: Escape characters throws an exception + self.assertRaisesRegex(regex.error, self.BAD_ESCAPE, lambda: + regex.sub('x', '\\', 'x'), ) + + # Hg issue 82: error range does not work + fz = "(CAGCCTCCCATTTCAGAATATACATCC){1a(?b))', "ab").spans("x"), [(1, + 2), (0, 2)]) + + # Hg issue 91: match.expand is extremely slow + # Check that the replacement cache works. + self.assertEqual(regex.sub(r'(-)', lambda m: m.expand(r'x'), 'a-b-c'), + 'axbxc') + + # Hg issue 94: Python crashes when executing regex updates + # pattern.findall + rx = regex.compile(r'\bt(est){i<2}', flags=regex.V1) + self.assertEqual(rx.search("Some text"), None) + self.assertEqual(rx.findall("Some text"), []) + + # Hg issue 95: 'pos' for regex.error + self.assertRaisesRegex(regex.error, self.MULTIPLE_REPEAT, lambda: + regex.compile(r'.???')) + + # Hg issue 97: behaviour of regex.escape's special_only is wrong + # + # Hg issue 244: Make `special_only=True` the default in + # `regex.escape()` + self.assertEqual(regex.escape('foo!?', special_only=False), 'foo\\!\\?') + self.assertEqual(regex.escape('foo!?', special_only=True), 'foo!\\?') + self.assertEqual(regex.escape('foo!?'), 'foo!\\?') + + self.assertEqual(regex.escape(b'foo!?', special_only=False), b'foo\\!\\?') + self.assertEqual(regex.escape(b'foo!?', special_only=True), + b'foo!\\?') + self.assertEqual(regex.escape(b'foo!?'), b'foo!\\?') + + # Hg issue 100: strange results from regex.search + self.assertEqual(regex.search('^([^z]*(?:WWWi|W))?$', + 'WWWi').groups(), ('WWWi', )) + self.assertEqual(regex.search('^([^z]*(?:WWWi|w))?$', + 'WWWi').groups(), ('WWWi', )) + self.assertEqual(regex.search('^([^z]*?(?:WWWi|W))?$', + 'WWWi').groups(), ('WWWi', )) + + # Hg issue 101: findall() broken (seems like memory corruption) + pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.UNICODE) + self.assertEqual([x.group() for x in pat.finditer('yxxx')], ['xxx']) + self.assertEqual(pat.findall('yxxx'), ['xxx']) + + raw = 'yxxx' + self.assertEqual([x.group() for x in pat.finditer(raw)], ['xxx']) + self.assertEqual(pat.findall(raw), ['xxx']) + + pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.IGNORECASE | + regex.UNICODE) + self.assertEqual([x.group() for x in pat.finditer('yxxx')], ['xxx']) + self.assertEqual(pat.findall('yxxx'), ['xxx']) + + raw = 'yxxx' + self.assertEqual([x.group() for x in pat.finditer(raw)], ['xxx']) + self.assertEqual(pat.findall(raw), ['xxx']) + + # Hg issue 106: * operator not working correctly with sub() + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.sub('(?V0).*', 'x', 'test'), 'xx') + else: + self.assertEqual(regex.sub('(?V0).*', 'x', 'test'), 'x') + self.assertEqual(regex.sub('(?V1).*', 'x', 'test'), 'xx') + + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.sub('(?V0).*?', '|', 'test'), '|||||||||') + else: + self.assertEqual(regex.sub('(?V0).*?', '|', 'test'), '|t|e|s|t|') + self.assertEqual(regex.sub('(?V1).*?', '|', 'test'), '|||||||||') + + # Hg issue 112: re: OK, but regex: SystemError + self.assertEqual(regex.sub(r'^(@)\n(?!.*?@)(.*)', + r'\1\n==========\n\2', '@\n', flags=regex.DOTALL), '@\n==========\n') + + # Hg issue 109: Edit distance of fuzzy match + self.assertEqual(regex.match(r'(?:cats|cat){e<=1}', + 'caz').fuzzy_counts, (1, 0, 0)) + self.assertEqual(regex.match(r'(?e)(?:cats|cat){e<=1}', + 'caz').fuzzy_counts, (1, 0, 0)) + self.assertEqual(regex.match(r'(?b)(?:cats|cat){e<=1}', + 'caz').fuzzy_counts, (1, 0, 0)) + + self.assertEqual(regex.match(r'(?:cat){e<=1}', 'caz').fuzzy_counts, + (1, 0, 0)) + self.assertEqual(regex.match(r'(?e)(?:cat){e<=1}', + 'caz').fuzzy_counts, (1, 0, 0)) + self.assertEqual(regex.match(r'(?b)(?:cat){e<=1}', + 'caz').fuzzy_counts, (1, 0, 0)) + + self.assertEqual(regex.match(r'(?:cats){e<=2}', 'c ats').fuzzy_counts, + (1, 1, 0)) + self.assertEqual(regex.match(r'(?e)(?:cats){e<=2}', + 'c ats').fuzzy_counts, (0, 1, 0)) + self.assertEqual(regex.match(r'(?b)(?:cats){e<=2}', + 'c ats').fuzzy_counts, (0, 1, 0)) + + self.assertEqual(regex.match(r'(?:cats){e<=2}', + 'c a ts').fuzzy_counts, (0, 2, 0)) + self.assertEqual(regex.match(r'(?e)(?:cats){e<=2}', + 'c a ts').fuzzy_counts, (0, 2, 0)) + self.assertEqual(regex.match(r'(?b)(?:cats){e<=2}', + 'c a ts').fuzzy_counts, (0, 2, 0)) + + self.assertEqual(regex.match(r'(?:cats){e<=1}', 'c ats').fuzzy_counts, + (0, 1, 0)) + self.assertEqual(regex.match(r'(?e)(?:cats){e<=1}', + 'c ats').fuzzy_counts, (0, 1, 0)) + self.assertEqual(regex.match(r'(?b)(?:cats){e<=1}', + 'c ats').fuzzy_counts, (0, 1, 0)) + + # Hg issue 115: Infinite loop when processing backreferences + self.assertEqual(regex.findall(r'\bof ([a-z]+) of \1\b', + 'To make use of one of these modules'), []) + + # Hg issue 125: Reference to entire match (\g<0>) in + # Pattern.sub() doesn't work as of 2014.09.22 release. + self.assertEqual(regex.sub(r'x', r'\g<0>', 'x'), 'x') + + # Unreported issue: no such builtin as 'ascii' in Python 2. + self.assertEqual(bool(regex.match(r'a', 'a', regex.DEBUG)), True) + + # Hg issue 131: nested sets behaviour + self.assertEqual(regex.findall(r'(?V1)[[b-e]--cd]', 'abcdef'), ['b', + 'e']) + self.assertEqual(regex.findall(r'(?V1)[b-e--cd]', 'abcdef'), ['b', + 'e']) + self.assertEqual(regex.findall(r'(?V1)[[bcde]--cd]', 'abcdef'), ['b', + 'e']) + self.assertEqual(regex.findall(r'(?V1)[bcde--cd]', 'abcdef'), ['b', + 'e']) + + # Hg issue 132: index out of range on null property \p{} + self.assertRaisesRegex(regex.error, '^unknown property at position 4$', + lambda: regex.compile(r'\p{}')) + + # Issue 23692. + self.assertEqual(regex.match('(?:()|(?(1)()|z)){2}(?(2)a|z)', + 'a').group(0, 1, 2), ('a', '', '')) + self.assertEqual(regex.match('(?:()|(?(1)()|z)){0,2}(?(2)a|z)', + 'a').group(0, 1, 2), ('a', '', '')) + + # Hg issue 137: Posix character class :punct: does not seem to be + # supported. + + # Posix compatibility as recommended here: + # http://www.unicode.org/reports/tr18/#Compatibility_Properties + + # Posix in Unicode. + chars = ''.join(chr(c) for c in range(0x10000)) + + self.assertEqual(ascii(''.join(regex.findall(r'''[[:alnum:]]+''', + chars))), ascii(''.join(regex.findall(r'''[\p{Alpha}\p{PosixDigit}]+''', + chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:alpha:]]+''', + chars))), ascii(''.join(regex.findall(r'''\p{Alpha}+''', + chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:ascii:]]+''', + chars))), ascii(''.join(regex.findall(r'''[\p{InBasicLatin}]+''', + chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:blank:]]+''', + chars))), ascii(''.join(regex.findall(r'''[\p{gc=Space_Separator}\t]+''', + chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:cntrl:]]+''', + chars))), ascii(''.join(regex.findall(r'''\p{gc=Control}+''', chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:digit:]]+''', + chars))), ascii(''.join(regex.findall(r'''[0-9]+''', chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:graph:]]+''', + chars))), ascii(''.join(regex.findall(r'''[^\p{Space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]+''', + chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:lower:]]+''', + chars))), ascii(''.join(regex.findall(r'''\p{Lower}+''', + chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:print:]]+''', + chars))), ascii(''.join(regex.findall(r'''(?V1)[\p{Graph}\p{Blank}--\p{Cntrl}]+''', chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:punct:]]+''', + chars))), + ascii(''.join(regex.findall(r'''(?V1)[\p{gc=Punctuation}\p{gc=Symbol}--\p{Alpha}]+''', + chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:space:]]+''', + chars))), ascii(''.join(regex.findall(r'''\p{Whitespace}+''', + chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:upper:]]+''', + chars))), ascii(''.join(regex.findall(r'''\p{Upper}+''', + chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:word:]]+''', + chars))), ascii(''.join(regex.findall(r'''[\p{Alpha}\p{gc=Mark}\p{Digit}\p{gc=Connector_Punctuation}\p{Join_Control}]+''', + chars)))) + self.assertEqual(ascii(''.join(regex.findall(r'''[[:xdigit:]]+''', + chars))), ascii(''.join(regex.findall(r'''[0-9A-Fa-f]+''', + chars)))) + + # Posix in ASCII. + chars = bytes(range(0x100)) + + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:alnum:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{Alpha}\p{PosixDigit}]+''', + chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:alpha:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Alpha}+''', chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:ascii:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)[\x00-\x7F]+''', chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:blank:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{gc=Space_Separator}\t]+''', + chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:cntrl:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)\p{gc=Control}+''', + chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:digit:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)[0-9]+''', chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:graph:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)[^\p{Space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]+''', chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:lower:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Lower}+''', chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:print:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?aV1)[\p{Graph}\p{Blank}--\p{Cntrl}]+''', chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:punct:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?aV1)[\p{gc=Punctuation}\p{gc=Symbol}--\p{Alpha}]+''', + chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:space:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Whitespace}+''', chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:upper:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Upper}+''', chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:word:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{Alpha}\p{gc=Mark}\p{Digit}\p{gc=Connector_Punctuation}\p{Join_Control}]+''', chars)))) + self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:xdigit:]]+''', + chars))), ascii(b''.join(regex.findall(br'''(?a)[0-9A-Fa-f]+''', chars)))) + + # Hg issue 138: grapheme anchored search not working properly. + self.assertEqual(ascii(regex.search(r'\X$', 'ab\u2103').group()), + ascii('\u2103')) + + # Hg issue 139: Regular expression with multiple wildcards where first + # should match empty string does not always work. + self.assertEqual(regex.search("([^L]*)([^R]*R)", "LtR").groups(), ('', + 'LtR')) + + # Hg issue 140: Replace with REVERSE and groups has unexpected + # behavior. + self.assertEqual(regex.sub(r'(.)', r'x\1y', 'ab'), 'xayxby') + self.assertEqual(regex.sub(r'(?r)(.)', r'x\1y', 'ab'), 'xayxby') + self.assertEqual(regex.subf(r'(.)', 'x{1}y', 'ab'), 'xayxby') + self.assertEqual(regex.subf(r'(?r)(.)', 'x{1}y', 'ab'), 'xayxby') + + # Hg issue 141: Crash on a certain partial match. + self.assertEqual(regex.fullmatch('(a)*abc', 'ab', + partial=True).span(), (0, 2)) + self.assertEqual(regex.fullmatch('(a)*abc', 'ab', + partial=True).partial, True) + + # Hg issue 143: Partial matches have incorrect span if prefix is '.' + # wildcard. + self.assertEqual(regex.search('OXRG', 'OOGOX', partial=True).span(), + (3, 5)) + self.assertEqual(regex.search('.XRG', 'OOGOX', partial=True).span(), + (3, 5)) + self.assertEqual(regex.search('.{1,3}XRG', 'OOGOX', + partial=True).span(), (1, 5)) + + # Hg issue 144: Latest version problem with matching 'R|R'. + self.assertEqual(regex.match('R|R', 'R').span(), (0, 1)) + + # Hg issue 146: Forced-fail (?!) works improperly in conditional. + self.assertEqual(regex.match(r'(.)(?(1)(?!))', 'xy'), None) + + # Groups cleared after failure. + self.assertEqual(regex.findall(r'(y)?(\d)(?(1)\b\B)', 'ax1y2z3b'), + [('', '1'), ('', '2'), ('', '3')]) + self.assertEqual(regex.findall(r'(y)?+(\d)(?(1)\b\B)', 'ax1y2z3b'), + [('', '1'), ('', '2'), ('', '3')]) + + # Hg issue 147: Fuzzy match can return match points beyond buffer end. + self.assertEqual([m.span() for m in regex.finditer(r'(?i)(?:error){e}', + 'regex failure')], [(0, 5), (5, 10), (10, 13), (13, 13)]) + self.assertEqual([m.span() for m in + regex.finditer(r'(?fi)(?:error){e}', 'regex failure')], [(0, 5), (5, + 10), (10, 13), (13, 13)]) + + # Hg issue 150: Have an option for POSIX-compatible longest match of + # alternates. + self.assertEqual(regex.search(r'(?p)\d+(\w(\d*)?|[eE]([+-]\d+))', + '10b12')[0], '10b12') + self.assertEqual(regex.search(r'(?p)\d+(\w(\d*)?|[eE]([+-]\d+))', + '10E+12')[0], '10E+12') + + self.assertEqual(regex.search(r'(?p)(\w|ae|oe|ue|ss)', 'ae')[0], 'ae') + self.assertEqual(regex.search(r'(?p)one(self)?(selfsufficient)?', + 'oneselfsufficient')[0], 'oneselfsufficient') + + # Hg issue 151: Request: \K. + self.assertEqual(regex.search(r'(ab\Kcd)', 'abcd').group(0, 1), ('cd', + 'abcd')) + self.assertEqual(regex.findall(r'\w\w\K\w\w', 'abcdefgh'), ['cd', + 'gh']) + self.assertEqual(regex.findall(r'(\w\w\K\w\w)', 'abcdefgh'), ['abcd', + 'efgh']) + + self.assertEqual(regex.search(r'(?r)(ab\Kcd)', 'abcd').group(0, 1), + ('ab', 'abcd')) + self.assertEqual(regex.findall(r'(?r)\w\w\K\w\w', 'abcdefgh'), ['ef', + 'ab']) + self.assertEqual(regex.findall(r'(?r)(\w\w\K\w\w)', 'abcdefgh'), + ['efgh', 'abcd']) + + # Hg issue 152: Request: Request: (?(DEFINE)...). + self.assertEqual(regex.search(r'(?(DEFINE)(?\d+)(?\w+))(?&quant) (?&item)', + '5 elephants')[0], '5 elephants') + + # Hg issue 153: Request: (*SKIP). + self.assertEqual(regex.search(r'12(*FAIL)|3', '123')[0], '3') + self.assertEqual(regex.search(r'(?r)12(*FAIL)|3', '123')[0], '3') + + self.assertEqual(regex.search(r'\d+(*PRUNE)\d', '123'), None) + self.assertEqual(regex.search(r'\d+(?=(*PRUNE))\d', '123')[0], '123') + self.assertEqual(regex.search(r'\d+(*PRUNE)bcd|[3d]', '123bcd')[0], + '123bcd') + self.assertEqual(regex.search(r'\d+(*PRUNE)bcd|[3d]', '123zzd')[0], + 'd') + self.assertEqual(regex.search(r'\d+?(*PRUNE)bcd|[3d]', '123bcd')[0], + '3bcd') + self.assertEqual(regex.search(r'\d+?(*PRUNE)bcd|[3d]', '123zzd')[0], + 'd') + self.assertEqual(regex.search(r'\d++(?<=3(*PRUNE))zzd|[4d]$', + '123zzd')[0], '123zzd') + self.assertEqual(regex.search(r'\d++(?<=3(*PRUNE))zzd|[4d]$', + '124zzd')[0], 'd') + self.assertEqual(regex.search(r'\d++(?<=(*PRUNE)3)zzd|[4d]$', + '124zzd')[0], 'd') + self.assertEqual(regex.search(r'\d++(?<=2(*PRUNE)3)zzd|[3d]$', + '124zzd')[0], 'd') + + self.assertEqual(regex.search(r'(?r)\d(*PRUNE)\d+', '123'), None) + self.assertEqual(regex.search(r'(?r)\d(?<=(*PRUNE))\d+', '123')[0], + '123') + self.assertEqual(regex.search(r'(?r)\d+(*PRUNE)bcd|[3d]', + '123bcd')[0], '123bcd') + self.assertEqual(regex.search(r'(?r)\d+(*PRUNE)bcd|[3d]', + '123zzd')[0], 'd') + self.assertEqual(regex.search(r'(?r)\d++(?<=3(*PRUNE))zzd|[4d]$', + '123zzd')[0], '123zzd') + self.assertEqual(regex.search(r'(?r)\d++(?<=3(*PRUNE))zzd|[4d]$', + '124zzd')[0], 'd') + self.assertEqual(regex.search(r'(?r)\d++(?<=(*PRUNE)3)zzd|[4d]$', + '124zzd')[0], 'd') + self.assertEqual(regex.search(r'(?r)\d++(?<=2(*PRUNE)3)zzd|[3d]$', + '124zzd')[0], 'd') + + self.assertEqual(regex.search(r'\d+(*SKIP)bcd|[3d]', '123bcd')[0], + '123bcd') + self.assertEqual(regex.search(r'\d+(*SKIP)bcd|[3d]', '123zzd')[0], + 'd') + self.assertEqual(regex.search(r'\d+?(*SKIP)bcd|[3d]', '123bcd')[0], + '3bcd') + self.assertEqual(regex.search(r'\d+?(*SKIP)bcd|[3d]', '123zzd')[0], + 'd') + self.assertEqual(regex.search(r'\d++(?<=3(*SKIP))zzd|[4d]$', + '123zzd')[0], '123zzd') + self.assertEqual(regex.search(r'\d++(?<=3(*SKIP))zzd|[4d]$', + '124zzd')[0], 'd') + self.assertEqual(regex.search(r'\d++(?<=(*SKIP)3)zzd|[4d]$', + '124zzd')[0], 'd') + self.assertEqual(regex.search(r'\d++(?<=2(*SKIP)3)zzd|[3d]$', + '124zzd')[0], 'd') + + self.assertEqual(regex.search(r'(?r)\d+(*SKIP)bcd|[3d]', '123bcd')[0], + '123bcd') + self.assertEqual(regex.search(r'(?r)\d+(*SKIP)bcd|[3d]', '123zzd')[0], + 'd') + self.assertEqual(regex.search(r'(?r)\d++(?<=3(*SKIP))zzd|[4d]$', + '123zzd')[0], '123zzd') + self.assertEqual(regex.search(r'(?r)\d++(?<=3(*SKIP))zzd|[4d]$', + '124zzd')[0], 'd') + self.assertEqual(regex.search(r'(?r)\d++(?<=(*SKIP)3)zzd|[4d]$', + '124zzd')[0], 'd') + self.assertEqual(regex.search(r'(?r)\d++(?<=2(*SKIP)3)zzd|[3d]$', + '124zzd')[0], 'd') + + # Hg issue 154: Segmentation fault 11 when working with an atomic group + text = """June 30, December 31, 2013 2012 +some words follow: +more words and numbers 1,234,567 9,876,542 +more words and numbers 1,234,567 9,876,542""" + self.assertEqual(len(regex.findall(r'(?2014|2013 ?2012)', text)), 1) + + # Hg issue 156: regression on atomic grouping + self.assertEqual(regex.match('1(?>2)', '12').span(), (0, 2)) + + # Hg issue 157: regression: segfault on complex lookaround + self.assertEqual(regex.match(r'(?V1w)(?=(?=[^A-Z]*+[A-Z])(?=[^a-z]*+[a-z]))(?=\D*+\d)(?=\p{Alphanumeric}*+\P{Alphanumeric})\A(?s:.){8,255}+\Z', + 'AAaa11!!')[0], 'AAaa11!!') + + # Hg issue 158: Group issue with (?(DEFINE)...) + TEST_REGEX = regex.compile(r'''(?smx) +(?(DEFINE) + (? + ^,[^,]+, + ) +) + +# Group 2 is defined on this line +^,([^,]+), + +(?:(?!(?&subcat)[\r\n]+(?&subcat)).)+ +''') + + TEST_DATA = ''' +,Cat 1, +,Brand 1, +some +thing +,Brand 2, +other +things +,Cat 2, +,Brand, +Some +thing +''' + + self.assertEqual([m.span(1, 2) for m in + TEST_REGEX.finditer(TEST_DATA)], [((-1, -1), (2, 7)), ((-1, -1), (54, + 59))]) + + # Hg issue 161: Unexpected fuzzy match results + self.assertEqual(regex.search('(abcdefgh){e}', + '******abcdefghijklmnopqrtuvwxyz', regex.BESTMATCH).span(), (6, 14)) + self.assertEqual(regex.search('(abcdefghi){e}', + '******abcdefghijklmnopqrtuvwxyz', regex.BESTMATCH).span(), (6, 15)) + + # Hg issue 163: allow lookarounds in conditionals. + self.assertEqual(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc').span(), + (0, 6)) + self.assertEqual(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'), None) + self.assertEqual(regex.search(r'(?(?<=love\s)you|(?<=hate\s)her)', + "I love you").span(), (7, 10)) + self.assertEqual(regex.findall(r'(?(?<=love\s)you|(?<=hate\s)her)', + "I love you but I don't hate her either"), ['you', 'her']) + + # Hg issue 180: bug of POSIX matching. + self.assertEqual(regex.search(r'(?p)a*(.*?)', 'aaabbb').group(0, 1), + ('aaabbb', 'bbb')) + self.assertEqual(regex.search(r'(?p)a*(.*)', 'aaabbb').group(0, 1), + ('aaabbb', 'bbb')) + self.assertEqual(regex.sub(r'(?p)a*(.*?)', r'\1', 'aaabbb'), 'bbb') + self.assertEqual(regex.sub(r'(?p)a*(.*)', r'\1', 'aaabbb'), 'bbb') + + # Hg issue 192: Named lists reverse matching doesn't work with + # IGNORECASE and V1 + self.assertEqual(regex.match(r'(?irV0)\L', '21', kw=['1']).span(), + (1, 2)) + self.assertEqual(regex.match(r'(?irV1)\L', '21', kw=['1']).span(), + (1, 2)) + + # Hg issue 193: Alternation and .REVERSE flag. + self.assertEqual(regex.search('a|b', '111a222').span(), (3, 4)) + self.assertEqual(regex.search('(?r)a|b', '111a222').span(), (3, 4)) + + # Hg issue 194: .FULLCASE and Backreference + self.assertEqual(regex.search(r'(?if)<(CLI)><\1>', + '').span(), (0, 10)) + self.assertEqual(regex.search(r'(?if)<(CLI)><\1>', + '').span(), (0, 10)) + self.assertEqual(regex.search(r'(?ifr)<\1><(CLI)>', + '').span(), (0, 10)) + + # Hg issue 195: Pickle (or otherwise serial) the compiled regex + r = regex.compile(r'\L', options=['foo', 'bar']) + p = pickle.dumps(r) + r = pickle.loads(p) + self.assertEqual(r.match('foo').span(), (0, 3)) + + # Hg issue 196: Fuzzy matching on repeated regex not working as + # expected + self.assertEqual(regex.match('(x{6}){e<=1}', 'xxxxxx', + flags=regex.BESTMATCH).span(), (0, 6)) + self.assertEqual(regex.match('(x{6}){e<=1}', 'xxxxx', + flags=regex.BESTMATCH).span(), (0, 5)) + self.assertEqual(regex.match('(x{6}){e<=1}', 'x', + flags=regex.BESTMATCH), None) + self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'xxxxxx', + flags=regex.BESTMATCH).span(), (0, 6)) + self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'xxxxx', + flags=regex.BESTMATCH).span(), (0, 5)) + self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'x', + flags=regex.BESTMATCH), None) + + # Hg issue 197: ValueError in regex.compile + self.assertRaises(regex.error, lambda: + regex.compile(b'00000\\0\\00\\^\50\\00\\U05000000')) + + # Hg issue 198: ValueError in regex.compile + self.assertRaises(regex.error, lambda: regex.compile(b"{e', '22', aa=['121', + '22'])), True) + self.assertEqual(bool(regex.search(r'(?ri)\L', '22', aa=['121', + '22'])), True) + self.assertEqual(bool(regex.search(r'(?fi)\L', '22', aa=['121', + '22'])), True) + self.assertEqual(bool(regex.search(r'(?fri)\L', '22', aa=['121', + '22'])), True) + + # Hg issue 208: Named list, (?ri) flags, Backreference + self.assertEqual(regex.search(r'(?r)\1dog..(?<=(\L))$', 'ccdogcc', + aa=['bcb', 'cc']). span(), (0, 7)) + self.assertEqual(regex.search(r'(?ir)\1dog..(?<=(\L))$', + 'ccdogcc', aa=['bcb', 'cc']). span(), (0, 7)) + + # Hg issue 210: Fuzzy matching and Backreference + self.assertEqual(regex.search(r'(2)(?:\1{5}){e<=1}', + '3222212').span(), (1, 7)) + self.assertEqual(regex.search(r'(\d)(?:\1{5}){e<=1}', + '3222212').span(), (1, 7)) + + # Hg issue 211: Segmentation fault with recursive matches and atomic + # groups + self.assertEqual(regex.match(r'''\A(?P(?>\((?&whole)\)|[+\-]))\Z''', + '((-))').span(), (0, 5)) + self.assertEqual(regex.match(r'''\A(?P(?>\((?&whole)\)|[+\-]))\Z''', + '((-)+)'), None) + + # Hg issue 212: Unexpected matching difference with .*? between re and + # regex + self.assertEqual(regex.match(r"x.*? (.).*\1(.*)\1", + 'x |y| z|').span(), (0, 9)) + self.assertEqual(regex.match(r"\.sr (.*?) (.)(.*)\2(.*)\2(.*)", + r'.sr h |||').span(), (0, 35)) + + # Hg issue 213: Segmentation Fault + a = '"\\xF9\\x80\\xAEqdz\\x95L\\xA7\\x89[\\xFE \\x91)\\xF9]\\xDB\'\\x99\\x09=\\x00\\xFD\\x98\\x22\\xDD\\xF1\\xB6\\xC3 Z\\xB6gv\\xA5x\\x93P\\xE1r\\x14\\x8Cv\\x0C\\xC0w\\x15r\\xFFc%" ' + py_regex_pattern = r'''(?P((?>(?"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)))) (?P((?>(?"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))))''' + self.assertEqual(bool(regex.search(py_regex_pattern, a)), False) + + # Hg Issue 216: Invalid match when using negative lookbehind and pipe + self.assertEqual(bool(regex.match('foo(?<=foo)', 'foo')), True) + self.assertEqual(bool(regex.match('foo(?.*\!\w*\:.*)|(?P.*))', + '!')), False) + + # Hg issue 220: Misbehavior of group capture with OR operand + self.assertEqual(regex.match(r'\w*(ea)\w*|\w*e(?!a)\w*', + 'easier').groups(), ('ea', )) + + # Hg issue 225: BESTMATCH in fuzzy match not working + self.assertEqual(regex.search('(^1234$){i,d}', '12234', + regex.BESTMATCH).span(), (0, 5)) + self.assertEqual(regex.search('(^1234$){i,d}', '12234', + regex.BESTMATCH).fuzzy_counts, (0, 1, 0)) + + self.assertEqual(regex.search('(^1234$){s,i,d}', '12234', + regex.BESTMATCH).span(), (0, 5)) + self.assertEqual(regex.search('(^1234$){s,i,d}', '12234', + regex.BESTMATCH).fuzzy_counts, (0, 1, 0)) + + # Hg issue 226: Error matching at start of string + self.assertEqual(regex.search('(^123$){s,i,d}', 'xxxxxxxx123', + regex.BESTMATCH).span(), (0, 11)) + self.assertEqual(regex.search('(^123$){s,i,d}', 'xxxxxxxx123', + regex.BESTMATCH).fuzzy_counts, (0, 8, 0)) + + # Hg issue 227: Incorrect behavior for ? operator with UNICODE + + # IGNORECASE + self.assertEqual(regex.search(r'a?yz', 'xxxxyz', flags=regex.FULLCASE | + regex.IGNORECASE).span(), (4, 6)) + + # Hg issue 230: Is it a bug of (?(DEFINE)...) + self.assertEqual(regex.findall(r'(?:(?![a-d]).)+', 'abcdefgh'), + ['efgh']) + self.assertEqual(regex.findall(r'''(?(DEFINE)(?P(?:(?![a-d]).)))(?&mydef)+''', + 'abcdefgh'), ['efgh']) + + # Hg issue 238: Not fully re backward compatible + self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){1,3}', + '"Erm....yes. T..T...Thank you for that."'), [('Erm....', 'Erm', + '....'), ('T...', 'T', '...')]) + self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){3}', + '"Erm....yes. T..T...Thank you for that."'), []) + self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){2}', + '"Erm....yes. T..T...Thank you for that."'), [('T...', 'T', '...')]) + self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){1}', + '"Erm....yes. T..T...Thank you for that."'), [('Erm....', 'Erm', + '....'), ('T..', 'T', '..'), ('T...', 'T', '...')]) + + # Hg issue 247: Unexpected result with fuzzy matching and lookahead + # expression + self.assertEqual(regex.search(r'(?:ESTONIA(?!\w)){e<=1}', + 'ESTONIAN WORKERS').group(), 'ESTONIAN') + self.assertEqual(regex.search(r'(?:ESTONIA(?=\W)){e<=1}', + 'ESTONIAN WORKERS').group(), 'ESTONIAN') + + self.assertEqual(regex.search(r'(?:(?.))(?&func)', + 'abc').groups(), (None, )) + self.assertEqual(regex.search(r'(?(DEFINE)(?.))(?&func)', + 'abc').groupdict(), {'func': None}) + self.assertEqual(regex.search(r'(?(DEFINE)(?.))(?&func)', + 'abc').capturesdict(), {'func': ['a']}) + + self.assertEqual(regex.search(r'(?(DEFINE)(?.))(?=(?&func))', + 'abc').groups(), (None, )) + self.assertEqual(regex.search(r'(?(DEFINE)(?.))(?=(?&func))', + 'abc').groupdict(), {'func': None}) + self.assertEqual(regex.search(r'(?(DEFINE)(?.))(?=(?&func))', + 'abc').capturesdict(), {'func': ['a']}) + + self.assertEqual(regex.search(r'(?(DEFINE)(?.)).(?<=(?&func))', + 'abc').groups(), (None, )) + self.assertEqual(regex.search(r'(?(DEFINE)(?.)).(?<=(?&func))', + 'abc').groupdict(), {'func': None}) + self.assertEqual(regex.search(r'(?(DEFINE)(?.)).(?<=(?&func))', + 'abc').capturesdict(), {'func': ['a']}) + + # Hg issue 271: Comment logic different between Re and Regex + self.assertEqual(bool(regex.match(r'ab(?#comment\))cd', 'abcd')), True) + + # Hg issue 276: Partial Matches yield incorrect matches and bounds + self.assertEqual(regex.search(r'[a-z]+ [a-z]*?:', 'foo bar', + partial=True).span(), (0, 7)) + self.assertEqual(regex.search(r'(?r):[a-z]*? [a-z]+', 'foo bar', + partial=True).span(), (0, 7)) + + # Hg issue 291: Include Script Extensions as a supported Unicode property + self.assertEqual(bool(regex.match(r'(?u)\p{Script:Beng}', + '\u09EF')), True) + self.assertEqual(bool(regex.match(r'(?u)\p{Script:Bengali}', + '\u09EF')), True) + self.assertEqual(bool(regex.match(r'(?u)\p{Script_Extensions:Bengali}', + '\u09EF')), True) + self.assertEqual(bool(regex.match(r'(?u)\p{Script_Extensions:Beng}', + '\u09EF')), True) + self.assertEqual(bool(regex.match(r'(?u)\p{Script_Extensions:Cakm}', + '\u09EF')), True) + self.assertEqual(bool(regex.match(r'(?u)\p{Script_Extensions:Sylo}', + '\u09EF')), True) + + # Hg issue #293: scx (Script Extensions) property currently matches + # incorrectly + self.assertEqual(bool(regex.match(r'(?u)\p{scx:Latin}', 'P')), True) + self.assertEqual(bool(regex.match(r'(?u)\p{scx:Ahom}', 'P')), False) + self.assertEqual(bool(regex.match(r'(?u)\p{scx:Common}', '4')), True) + self.assertEqual(bool(regex.match(r'(?u)\p{scx:Caucasian_Albanian}', '4')), + False) + self.assertEqual(bool(regex.match(r'(?u)\p{scx:Arabic}', '\u062A')), True) + self.assertEqual(bool(regex.match(r'(?u)\p{scx:Balinese}', '\u062A')), + False) + self.assertEqual(bool(regex.match(r'(?u)\p{scx:Devanagari}', '\u091C')), + True) + self.assertEqual(bool(regex.match(r'(?u)\p{scx:Batak}', '\u091C')), False) + + # Hg issue 296: Group references are not taken into account when group is reporting the last match + self.assertEqual(regex.fullmatch('(?P.)*(?&x)', 'abc').captures('x'), + ['a', 'b', 'c']) + self.assertEqual(regex.fullmatch('(?P.)*(?&x)', 'abc').group('x'), + 'b') + + self.assertEqual(regex.fullmatch('(?P.)(?P.)(?P.)', + 'abc').captures('x'), ['a', 'b', 'c']) + self.assertEqual(regex.fullmatch('(?P.)(?P.)(?P.)', + 'abc').group('x'), 'c') + + # Hg issue 299: Partial gives misleading results with "open ended" regexp + self.assertEqual(regex.match('(?:ab)*', 'ab', partial=True).partial, + False) + self.assertEqual(regex.match('(?:ab)*', 'abab', partial=True).partial, + False) + self.assertEqual(regex.match('(?:ab)*?', '', partial=True).partial, + False) + self.assertEqual(regex.match('(?:ab)*+', 'ab', partial=True).partial, + False) + self.assertEqual(regex.match('(?:ab)*+', 'abab', partial=True).partial, + False) + self.assertEqual(regex.match('(?:ab)+', 'ab', partial=True).partial, + False) + self.assertEqual(regex.match('(?:ab)+', 'abab', partial=True).partial, + False) + self.assertEqual(regex.match('(?:ab)+?', 'ab', partial=True).partial, + False) + self.assertEqual(regex.match('(?:ab)++', 'ab', partial=True).partial, + False) + self.assertEqual(regex.match('(?:ab)++', 'abab', partial=True).partial, + False) + + self.assertEqual(regex.match('(?r)(?:ab)*', 'ab', partial=True).partial, + False) + self.assertEqual(regex.match('(?r)(?:ab)*', 'abab', partial=True).partial, + False) + self.assertEqual(regex.match('(?r)(?:ab)*?', '', partial=True).partial, + False) + self.assertEqual(regex.match('(?r)(?:ab)*+', 'ab', partial=True).partial, + False) + self.assertEqual(regex.match('(?r)(?:ab)*+', 'abab', partial=True).partial, + False) + self.assertEqual(regex.match('(?r)(?:ab)+', 'ab', partial=True).partial, + False) + self.assertEqual(regex.match('(?r)(?:ab)+', 'abab', partial=True).partial, + False) + self.assertEqual(regex.match('(?r)(?:ab)+?', 'ab', partial=True).partial, + False) + self.assertEqual(regex.match('(?r)(?:ab)++', 'ab', partial=True).partial, + False) + self.assertEqual(regex.match('(?r)(?:ab)++', 'abab', partial=True).partial, + False) + + self.assertEqual(regex.match('a*', '', partial=True).partial, False) + self.assertEqual(regex.match('a*?', '', partial=True).partial, False) + self.assertEqual(regex.match('a*+', '', partial=True).partial, False) + self.assertEqual(regex.match('a+', '', partial=True).partial, True) + self.assertEqual(regex.match('a+?', '', partial=True).partial, True) + self.assertEqual(regex.match('a++', '', partial=True).partial, True) + self.assertEqual(regex.match('a+', 'a', partial=True).partial, False) + self.assertEqual(regex.match('a+?', 'a', partial=True).partial, False) + self.assertEqual(regex.match('a++', 'a', partial=True).partial, False) + + self.assertEqual(regex.match('(?r)a*', '', partial=True).partial, False) + self.assertEqual(regex.match('(?r)a*?', '', partial=True).partial, False) + self.assertEqual(regex.match('(?r)a*+', '', partial=True).partial, False) + self.assertEqual(regex.match('(?r)a+', '', partial=True).partial, True) + self.assertEqual(regex.match('(?r)a+?', '', partial=True).partial, True) + self.assertEqual(regex.match('(?r)a++', '', partial=True).partial, True) + self.assertEqual(regex.match('(?r)a+', 'a', partial=True).partial, False) + self.assertEqual(regex.match('(?r)a+?', 'a', partial=True).partial, False) + self.assertEqual(regex.match('(?r)a++', 'a', partial=True).partial, False) + + self.assertEqual(regex.match(r"(?:\s*\w+'*)+", 'whatever', partial=True).partial, + False) + + # Hg issue 300: segmentation fault + pattern = ('(?PGGCGTCACACTTTGCTATGCCATAGCAT[AG]TTTATCCATAAGA' + 'TTAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCATAACAGAACATATTGA' + 'CTATCCGGTATTACCCGGCATGACAGGAGTAAAA){e<=1}' + '(?P[ACGT]{1059}){e<=2}' + '(?PTAATCGTCTTGTTTGATACACAAGGGTCGCATCTGCGGCCCTTTTGCTTTTTTAAG' + 'TTGTAAGGATATGCCATTCTAGA){e<=0}' + '(?P[ACGT]{18}){e<=0}' + '(?PAGATCGG[CT]AGAGCGTCGTGTAGGGAAAGAGTGTGG){e<=1}') + + text = ('GCACGGCGTCACACTTTGCTATGCCATAGCATATTTATCCATAAGATTAGCGGATCCTACC' + 'TGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCATAACAGAACATATTGACTATCCGGTATTACC' + 'CGGCATGACAGGAGTAAAAATGGCTATCGACGAAAACAAACAGAAAGCGTTGGCGGCAGCACTGGGC' + 'CAGATTGAGAAACAATTTGGTAAAGGCTCCATCATGCGCCTGGGTGAAGACCGTTCCATGGATGTGG' + 'AAACCATCTCTACCGGTTCGCTTTCACTGGATATCGCGCTTGGGGCAGGTGGTCTGCCGATGGGCCG' + 'TATCGTCGAAATCTACGGACCGGAATCTTCCGGTAAAACCACGCTGACGCTGCAGGTGATCGCCGCA' + 'GCGCAGCGTGAAGGTAAAACCTGTGCGTTTATCGATGCTGAACACGCGCTGGACCCAATCTACGCAC' + 'GTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGA' + 'AATCTGTGACGCCCTGGCGCGTTCTGGCGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTG' + 'ACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCATATGGGCCTTGCGGCACGTATGATGA' + 'GCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAACACGCTGCTGATCTTCATCAACCC' + 'CATCCGTATGAAAATTGGTGTGATGTTCGGCAACCCGGAAACCACTTACCGGTGGTAACGCGCTGAA' + 'ATTCTACGCCTCTGTTCGTCTCGACATCCGTTAAATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTG' + 'GGTAGCGAAACCCGCGTGAAAGTGGTGAAGAACAAAATCGCTGCGCCGTTTAAACAGGCTGAATTCC' + 'AGATCCTCTACGGCGAAGGTATCAACTTCTACCCCGAACTGGTTGACCTGGGCGTAAAAGAGAAGCT' + 'GATCGAGAAAGCAGGCGCGTGGTACAGCTACAAAGGTGAGAAGATCGGTCAGGGTAAAGCGAATGCG' + 'ACTGCCTGGCTGAAATTTAACCCGGAAACCGCGAAAGAGATCGAGTGAAAAGTACGTGAGTTGCTGC' + 'TGAGCAACCCGAACTCAACGCCGGATTTCTCTGTAGATGATAGCGAAGGCGTAGCAGAAACTAACGA' + 'AGATTTTTAATCGTCTTGTTTGATACACAAGGGTCGCATCTGCGGCCCTTTTGCTTTTTTAAGTTGT' + 'AAGGATATGCCATTCTAGACAGTTAACACACCAACAAAGATCGGTAGAGCGTCGTGTAGGGAAAGAG' + 'TGTGGTACC') + + m = regex.search(pattern, text, flags=regex.BESTMATCH) + self.assertEqual(m.fuzzy_counts, (0, 1, 0)) + self.assertEqual(m.fuzzy_changes, ([], [1206], [])) + + # Hg issue 306: Fuzzy match parameters not respecting quantifier scope + self.assertEqual(regex.search(r'(?e)(dogf(((oo){e<1})|((00){e<1}))d){e<2}', + 'dogfood').fuzzy_counts, (0, 0, 0)) + self.assertEqual(regex.search(r'(?e)(dogf(((oo){e<1})|((00){e<1}))d){e<2}', + 'dogfoot').fuzzy_counts, (1, 0, 0)) + + # Hg issue 312: \X not matching graphemes with zero-width-joins + self.assertEqual(regex.findall(r'\X', + '\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466'), + ['\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466']) + + # Hg issue 320: Abnormal performance + self.assertEqual(bool(regex.search(r'(?=a)a', 'a')), True) + self.assertEqual(bool(regex.search(r'(?!b)a', 'a')), True) + + # Hg issue 327: .fullmatch() causes MemoryError + self.assertEqual(regex.fullmatch(r'((\d)*?)*?', '123').span(), (0, 3)) + + # Hg issue 329: Wrong group matches when question mark quantifier is used within a look behind + self.assertEqual(regex.search(r'''(?(DEFINE)(?(?THIS_SHOULD_NOT_MATCHx?)|(?right))).*(?<=(?&mydef).*)''', + 'x right').capturesdict(), {'mydef': ['right'], 'wrong': [], 'right': + ['right']}) + + # Hg issue 338: specifying allowed characters when fuzzy-matching + self.assertEqual(bool(regex.match(r'(?:cat){e<=1:[u]}', 'cut')), True) + self.assertEqual(bool(regex.match(r'(?:cat){e<=1:u}', 'cut')), True) + + # Hg issue 353: fuzzy changes negative indexes + self.assertEqual(regex.search(r'(?be)(AGTGTTCCCCGCGCCAGCGGGGATAAACCG){s<=5,i<=5,d<=5,s+i+d<=10}', + 'TTCCCCGCGCCAGCGGGGATAAACCG').fuzzy_changes, ([], [], [0, 1, 3, 5])) + + # Git issue 364: Contradictory values in fuzzy_counts and fuzzy_changes + self.assertEqual(regex.match(r'(?:bc){e}', 'c').fuzzy_counts, (1, 0, + 1)) + self.assertEqual(regex.match(r'(?:bc){e}', 'c').fuzzy_changes, ([0], + [], [1])) + self.assertEqual(regex.match(r'(?e)(?:bc){e}', 'c').fuzzy_counts, (0, + 0, 1)) + self.assertEqual(regex.match(r'(?e)(?:bc){e}', 'c').fuzzy_changes, + ([], [], [0])) + self.assertEqual(regex.match(r'(?b)(?:bc){e}', 'c').fuzzy_counts, (0, + 0, 1)) + self.assertEqual(regex.match(r'(?b)(?:bc){e}', 'c').fuzzy_changes, + ([], [], [0])) + + # Git issue 370: Confusions about Fuzzy matching behavior + self.assertEqual(regex.match('(?e)(?:^(\\$ )?\\d{1,3}(,\\d{3})*(\\.\\d{2})$){e}', + '$ 10,112.111.12').fuzzy_counts, (6, 0, 5)) + self.assertEqual(regex.match('(?e)(?:^(\\$ )?\\d{1,3}(,\\d{3})*(\\.\\d{2})$){s<=1}', + '$ 10,112.111.12').fuzzy_counts, (1, 0, 0)) + self.assertEqual(regex.match('(?e)(?:^(\\$ )?\\d{1,3}(,\\d{3})*(\\.\\d{2})$){s<=1,i<=1,d<=1}', + '$ 10,112.111.12').fuzzy_counts, (1, 0, 0)) + self.assertEqual(regex.match('(?e)(?:^(\\$ )?\\d{1,3}(,\\d{3})*(\\.\\d{2})$){s<=3}', + '$ 10,1a2.111.12').fuzzy_counts, (2, 0, 0)) + self.assertEqual(regex.match('(?e)(?:^(\\$ )?\\d{1,3}(,\\d{3})*(\\.\\d{2})$){s<=2}', + '$ 10,1a2.111.12').fuzzy_counts, (2, 0, 0)) + + self.assertEqual(regex.fullmatch(r'(?e)(?:0?,0(?:,0)?){s<=1,d<=1}', + ',0;0').fuzzy_counts, (1, 0, 0)) + self.assertEqual(regex.fullmatch(r'(?e)(?:0??,0(?:,0)?){s<=1,d<=1}', + ',0;0').fuzzy_counts, (1, 0, 0)) + + # Git issue 371: Specifying character set when fuzzy-matching allows characters not in the set + self.assertEqual(regex.search(r"\b(?e)(?:\d{6,20}){i<=5:[\-\\\/]}\b", + "cat dog starting at 00:01132.000. hello world"), None) + + # Git issue 385: Comments in expressions + self.assertEqual(bool(regex.compile('(?#)')), True) + self.assertEqual(bool(regex.compile('(?x)(?#)')), True) + + # Git issue 394: Unexpected behaviour in fuzzy matching with limited character set with IGNORECASE flag + self.assertEqual(regex.findall(r'(\d+){i<=2:[ab]}', '123X4Y5'), + ['123', '4', '5']) + self.assertEqual(regex.findall(r'(?i)(\d+){i<=2:[ab]}', '123X4Y5'), + ['123', '4', '5']) + + # Git issue 403: Fuzzy matching with wrong distance (unnecessary substitutions) + self.assertEqual(regex.match(r'^(test){e<=5}$', 'terstin', + flags=regex.B).fuzzy_counts, (0, 3, 0)) + + # Git issue 408: regex fails with a quantified backreference but succeeds with repeated backref + self.assertEqual(bool(regex.match(r"(?:(x*)\1\1\1)*x$", "x" * 5)), True) + self.assertEqual(bool(regex.match(r"(?:(x*)\1{3})*x$", "x" * 5)), True) + + # Git issue 415: Fuzzy character restrictions don't apply to insertions at "right edge" + self.assertEqual(regex.match(r't(?:es){s<=1:\d}t', 'te5t').group(), + 'te5t') + self.assertEqual(regex.match(r't(?:es){s<=1:\d}t', 'tezt'), None) + self.assertEqual(regex.match(r't(?:es){i<=1:\d}t', 'tes5t').group(), + 'tes5t') + self.assertEqual(regex.match(r't(?:es){i<=1:\d}t', 'teszt'), None) + self.assertEqual(regex.match(r't(?:es){i<=1:\d}t', + 'tes5t').fuzzy_changes, ([], [3], [])) + self.assertEqual(regex.match(r't(es){i<=1,0.*)(?PCTTCC){e<=1}(?P([ACGT]){4,6})(?PCAATACCGACTCCTCACTGTGT){e<=2}(?P([ACGT]){0,6}$)' + + m = regex.match(pattern, sequence, flags=regex.BESTMATCH) + self.assertEqual(m.span(), (0, 50)) + self.assertEqual(m.groupdict(), {'insert': 'TTCAGACGTGTGCT', 'anchor': 'CTTCC', 'umi': 'GATCT', 'sid': 'CAATACCGACTCCTCACTGTGT', 'end': 'GTCT'}) + + m = regex.match(pattern, sequence, flags=regex.ENHANCEMATCH) + self.assertEqual(m.span(), (0, 50)) + self.assertEqual(m.groupdict(), {'insert': 'TTCAGACGTGTGCT', 'anchor': 'CTTCC', 'umi': 'GATCT', 'sid': 'CAATACCGACTCCTCACTGTGT', 'end': 'GTCT'}) + + # Git issue 433: Disagreement between fuzzy_counts and fuzzy_changes + pattern = r'(?P.*)(?PAACACTGG){e<=1}(?P([AT][CG]){5}){e<=2}(?PGTAACCGAAG){e<=2}(?P([ACGT]){0,6}$)' + + sequence = 'GGAAAACACTGGTCTCAGTCTCGTAACCGAAGTGGTCG' + m = regex.match(pattern, sequence, flags=regex.BESTMATCH) + self.assertEqual(m.fuzzy_counts, (0, 0, 0)) + self.assertEqual(m.fuzzy_changes, ([], [], [])) + + sequence = 'GGAAAACACTGGTCTCAGTCTCGTCCCCGAAGTGGTCG' + m = regex.match(pattern, sequence, flags=regex.BESTMATCH) + self.assertEqual(m.fuzzy_counts, (2, 0, 0)) + self.assertEqual(m.fuzzy_changes, ([24, 25], [], [])) + + # Git issue 439: Unmatched groups: sub vs subf + self.assertEqual(regex.sub(r'(test1)|(test2)', r'matched: \1\2', 'test1'), 'matched: test1') + self.assertEqual(regex.subf(r'(test1)|(test2)', r'matched: {1}{2}', 'test1'), 'matched: test1') + self.assertEqual(regex.search(r'(test1)|(test2)', 'matched: test1').expand(r'matched: \1\2'), 'matched: test1'), + self.assertEqual(regex.search(r'(test1)|(test2)', 'matched: test1').expandf(r'matched: {1}{2}'), 'matched: test1') + + # Git issue 442: Fuzzy regex matching doesn't seem to test insertions correctly + self.assertEqual(regex.search(r"(?:\bha\b){i:[ ]}", "having"), None) + self.assertEqual(regex.search(r"(?:\bha\b){i:[ ]}", "having", flags=regex.I), None) + + # Git issue 467: Scoped inline flags 'a', 'u' and 'L' affect global flags + self.assertEqual(regex.match(r'(?a:\w)\w', 'd\N{CYRILLIC SMALL LETTER ZHE}').span(), (0, 2)) + self.assertEqual(regex.match(r'(?a:\w)(?u:\w)', 'd\N{CYRILLIC SMALL LETTER ZHE}').span(), (0, 2)) + + def test_fuzzy_ext(self): + self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')), + True) + self.assertEqual(bool(regex.fullmatch(r'(?:a){e<=1:[a-z]}', 'e')), + True) + self.assertEqual(bool(regex.fullmatch(r'(?:a){e<=1:[a-z]}', '-')), + False) + self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', '-')), + False) + + self.assertEqual(bool(regex.fullmatch(r'(?:a){e<=1:[a-z]}', 'ae')), + True) + self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', + 'ae')), True) + self.assertEqual(bool(regex.fullmatch(r'(?:a){e<=1:[a-z]}', 'a-')), + False) + self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', + 'a-')), False) + + self.assertEqual(bool(regex.fullmatch(r'(?:ab){e<=1:[a-z]}', 'ae')), + True) + self.assertEqual(bool(regex.fullmatch(r'(?r)(?:ab){e<=1:[a-z]}', + 'ae')), True) + self.assertEqual(bool(regex.fullmatch(r'(?:ab){e<=1:[a-z]}', 'a-')), + False) + self.assertEqual(bool(regex.fullmatch(r'(?r)(?:ab){e<=1:[a-z]}', + 'a-')), False) + + self.assertEqual(bool(regex.fullmatch(r'(a)\1{e<=1:[a-z]}', 'ae')), + True) + self.assertEqual(bool(regex.fullmatch(r'(?r)\1{e<=1:[a-z]}(a)', + 'ea')), True) + self.assertEqual(bool(regex.fullmatch(r'(a)\1{e<=1:[a-z]}', 'a-')), + False) + self.assertEqual(bool(regex.fullmatch(r'(?r)\1{e<=1:[a-z]}(a)', + '-a')), False) + + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}', + 'ts')), True) + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}', + 'st')), True) + self.assertEqual(bool(regex.fullmatch(r'(?firu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}', + 'st')), True) + self.assertEqual(bool(regex.fullmatch(r'(?firu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}', + 'ts')), True) + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}', + '-s')), False) + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}', + 's-')), False) + self.assertEqual(bool(regex.fullmatch(r'(?firu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}', + 's-')), False) + self.assertEqual(bool(regex.fullmatch(r'(?firu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}', + '-s')), False) + + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}', + 'ssst')), True) + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}', + 'ssts')), True) + self.assertEqual(bool(regex.fullmatch(r'(?firu)\1{e<=1:[a-z]}(\N{LATIN SMALL LETTER SHARP S})', + 'stss')), True) + self.assertEqual(bool(regex.fullmatch(r'(?firu)\1{e<=1:[a-z]}(\N{LATIN SMALL LETTER SHARP S})', + 'tsss')), True) + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}', + 'ss-s')), False) + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}', + 'sss-')), False) + self.assertEqual(bool(regex.fullmatch(r'(?firu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}', + '-s')), False) + self.assertEqual(bool(regex.fullmatch(r'(?firu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}', + 's-')), False) + + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(ss)\1{e<=1:[a-z]}', + '\N{LATIN SMALL LETTER SHARP S}ts')), True) + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(ss)\1{e<=1:[a-z]}', + '\N{LATIN SMALL LETTER SHARP S}st')), True) + self.assertEqual(bool(regex.fullmatch(r'(?firu)\1{e<=1:[a-z]}(ss)', + 'st\N{LATIN SMALL LETTER SHARP S}')), True) + self.assertEqual(bool(regex.fullmatch(r'(?firu)\1{e<=1:[a-z]}(ss)', + 'ts\N{LATIN SMALL LETTER SHARP S}')), True) + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(ss)\1{e<=1:[a-z]}', + '\N{LATIN SMALL LETTER SHARP S}-s')), False) + self.assertEqual(bool(regex.fullmatch(r'(?fiu)(ss)\1{e<=1:[a-z]}', + '\N{LATIN SMALL LETTER SHARP S}s-')), False) + self.assertEqual(bool(regex.fullmatch(r'(?firu)(ss)\1{e<=1:[a-z]}', + 's-\N{LATIN SMALL LETTER SHARP S}')), False) + self.assertEqual(bool(regex.fullmatch(r'(?firu)(ss)\1{e<=1:[a-z]}', + '-s\N{LATIN SMALL LETTER SHARP S}')), False) + + def test_subscripted_captures(self): + self.assertEqual(regex.match(r'(?P.)+', + 'abc').expandf('{0} {0[0]} {0[-1]}'), 'abc abc abc') + self.assertEqual(regex.match(r'(?P.)+', + 'abc').expandf('{1} {1[0]} {1[1]} {1[2]} {1[-1]} {1[-2]} {1[-3]}'), + 'c a b c c b a') + self.assertEqual(regex.match(r'(?P.)+', + 'abc').expandf('{x} {x[0]} {x[1]} {x[2]} {x[-1]} {x[-2]} {x[-3]}'), + 'c a b c c b a') + + self.assertEqual(regex.subf(r'(?P.)+', r'{0} {0[0]} {0[-1]}', + 'abc'), 'abc abc abc') + self.assertEqual(regex.subf(r'(?P.)+', + '{1} {1[0]} {1[1]} {1[2]} {1[-1]} {1[-2]} {1[-3]}', 'abc'), + 'c a b c c b a') + self.assertEqual(regex.subf(r'(?P.)+', + '{x} {x[0]} {x[1]} {x[2]} {x[-1]} {x[-2]} {x[-3]}', 'abc'), + 'c a b c c b a') + + def test_more_zerowidth(self): + if sys.version_info >= (3, 7, 0): + self.assertEqual(regex.split(r'\b|:+', 'a::bc'), ['', 'a', '', '', + 'bc', '']) + self.assertEqual(regex.sub(r'\b|:+', '-', 'a::bc'), '-a---bc-') + self.assertEqual(regex.findall(r'\b|:+', 'a::bc'), ['', '', '::', + '', '']) + self.assertEqual([m.span() for m in regex.finditer(r'\b|:+', + 'a::bc')], [(0, 0), (1, 1), (1, 3), (3, 3), (5, 5)]) + self.assertEqual([m.span() for m in regex.finditer(r'(?m)^\s*?$', + 'foo\n\n\nbar')], [(4, 4), (4, 5), (5, 5)]) + +def test_main(): + unittest.main(verbosity=2) + +if __name__ == "__main__": + test_main() diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scikit_image.libs/libgomp-a34b3233.so.1.0.0 b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scikit_image.libs/libgomp-a34b3233.so.1.0.0 new file mode 100644 index 0000000000000000000000000000000000000000..47dd823472ef987de68b8698424dabe93daa1fc0 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scikit_image.libs/libgomp-a34b3233.so.1.0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d465c659f45fe97537c0e1009faeb1ea6e40a5642ddffdee9fc1b73c0732421e +size 168192 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0 b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0 new file mode 100644 index 0000000000000000000000000000000000000000..47dd823472ef987de68b8698424dabe93daa1fc0 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d465c659f45fe97537c0e1009faeb1ea6e40a5642ddffdee9fc1b73c0732421e +size 168192 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scipy.libs/libquadmath-96973f99.so.0.0.0 b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scipy.libs/libquadmath-96973f99.so.0.0.0 new file mode 100644 index 0000000000000000000000000000000000000000..8407877bef27900f55c2fe0057a751d2d0cd0530 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/scipy.libs/libquadmath-96973f99.so.0.0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97cda85ddb5163e2da6e1edb4e1d6b557833a99a40eda079ae37e5039465b65d +size 247608 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/__init__.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..941f772a8241561ee10ac7c75babf9a5534e446b --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/__init__.py @@ -0,0 +1,21 @@ +# Import seaborn objects +from .rcmod import * # noqa: F401,F403 +from .utils import * # noqa: F401,F403 +from .palettes import * # noqa: F401,F403 +from .relational import * # noqa: F401,F403 +from .regression import * # noqa: F401,F403 +from .categorical import * # noqa: F401,F403 +from .distributions import * # noqa: F401,F403 +from .matrix import * # noqa: F401,F403 +from .miscplot import * # noqa: F401,F403 +from .axisgrid import * # noqa: F401,F403 +from .widgets import * # noqa: F401,F403 +from .colors import xkcd_rgb, crayons # noqa: F401 +from . import cm # noqa: F401 + +# Capture the original matplotlib rcParams +import matplotlib as mpl +_orig_rc_params = mpl.rcParams.copy() + +# Define the seaborn version +__version__ = "0.11.2" diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_decorators.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_decorators.py new file mode 100644 index 0000000000000000000000000000000000000000..d1c24b870b91438c64c8e14ccf28f69e59b0b441 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_decorators.py @@ -0,0 +1,62 @@ +from inspect import signature, Parameter +from functools import wraps +import warnings + + +# This function was adapted from scikit-learn +# github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/validation.py +def _deprecate_positional_args(f): + """Decorator for methods that issues warnings for positional arguments. + + Using the keyword-only argument syntax in pep 3102, arguments after the + * will issue a warning when passed as a positional argument. + + Parameters + ---------- + f : function + function to check arguments on + + """ + sig = signature(f) + kwonly_args = [] + all_args = [] + + for name, param in sig.parameters.items(): + if param.kind == Parameter.POSITIONAL_OR_KEYWORD: + all_args.append(name) + elif param.kind == Parameter.KEYWORD_ONLY: + kwonly_args.append(name) + + @wraps(f) + def inner_f(*args, **kwargs): + extra_args = len(args) - len(all_args) + if extra_args > 0: + plural = "s" if extra_args > 1 else "" + article = "" if plural else "a " + warnings.warn( + "Pass the following variable{} as {}keyword arg{}: {}. " + "From version 0.12, the only valid positional argument " + "will be `data`, and passing other arguments without an " + "explicit keyword will result in an error or misinterpretation." + .format(plural, article, plural, + ", ".join(kwonly_args[:extra_args])), + FutureWarning + ) + kwargs.update({k: arg for k, arg in zip(sig.parameters, args)}) + return f(**kwargs) + return inner_f + + +def share_init_params_with_map(cls): + """Make cls.map a classmethod with same signature as cls.__init__.""" + map_sig = signature(cls.map) + init_sig = signature(cls.__init__) + + new = [v for k, v in init_sig.parameters.items() if k != "self"] + new.insert(0, map_sig.parameters["cls"]) + cls.map.__signature__ = map_sig.replace(parameters=new) + cls.map.__doc__ = cls.__init__.__doc__ + + cls.map = classmethod(cls.map) + + return cls diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_docstrings.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_docstrings.py new file mode 100644 index 0000000000000000000000000000000000000000..2ab210b6ffbf63f21ebee9a4a3d59dcbc94fcb57 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_docstrings.py @@ -0,0 +1,198 @@ +import re +import pydoc +from .external.docscrape import NumpyDocString + + +class DocstringComponents: + + regexp = re.compile(r"\n((\n|.)+)\n\s*", re.MULTILINE) + + def __init__(self, comp_dict, strip_whitespace=True): + """Read entries from a dict, optionally stripping outer whitespace.""" + if strip_whitespace: + entries = {} + for key, val in comp_dict.items(): + m = re.match(self.regexp, val) + if m is None: + entries[key] = val + else: + entries[key] = m.group(1) + else: + entries = comp_dict.copy() + + self.entries = entries + + def __getattr__(self, attr): + """Provide dot access to entries for clean raw docstrings.""" + if attr in self.entries: + return self.entries[attr] + else: + try: + return self.__getattribute__(attr) + except AttributeError as err: + # If Python is run with -OO, it will strip docstrings and our lookup + # from self.entries will fail. We check for __debug__, which is actually + # set to False by -O (it is True for normal execution). + # But we only want to see an error when building the docs; + # not something users should see, so this slight inconsistency is fine. + if __debug__: + raise err + else: + pass + + @classmethod + def from_nested_components(cls, **kwargs): + """Add multiple sub-sets of components.""" + return cls(kwargs, strip_whitespace=False) + + @classmethod + def from_function_params(cls, func): + """Use the numpydoc parser to extract components from existing func.""" + params = NumpyDocString(pydoc.getdoc(func))["Parameters"] + comp_dict = {} + for p in params: + name = p.name + type = p.type + desc = "\n ".join(p.desc) + comp_dict[name] = f"{name} : {type}\n {desc}" + + return cls(comp_dict) + + +# TODO is "vector" the best term here? We mean to imply 1D data with a variety +# of types? + +# TODO now that we can parse numpydoc style strings, do we need to define dicts +# of docstring components, or just write out a docstring? + + +_core_params = dict( + data=""" +data : :class:`pandas.DataFrame`, :class:`numpy.ndarray`, mapping, or sequence + Input data structure. Either a long-form collection of vectors that can be + assigned to named variables or a wide-form dataset that will be internally + reshaped. + """, # TODO add link to user guide narrative when exists + xy=""" +x, y : vectors or keys in ``data`` + Variables that specify positions on the x and y axes. + """, + hue=""" +hue : vector or key in ``data`` + Semantic variable that is mapped to determine the color of plot elements. + """, + palette=""" +palette : string, list, dict, or :class:`matplotlib.colors.Colormap` + Method for choosing the colors to use when mapping the ``hue`` semantic. + String values are passed to :func:`color_palette`. List or dict values + imply categorical mapping, while a colormap object implies numeric mapping. + """, # noqa: E501 + hue_order=""" +hue_order : vector of strings + Specify the order of processing and plotting for categorical levels of the + ``hue`` semantic. + """, + hue_norm=""" +hue_norm : tuple or :class:`matplotlib.colors.Normalize` + Either a pair of values that set the normalization range in data units + or an object that will map from data units into a [0, 1] interval. Usage + implies numeric mapping. + """, + color=""" +color : :mod:`matplotlib color ` + Single color specification for when hue mapping is not used. Otherwise, the + plot will try to hook into the matplotlib property cycle. + """, + ax=""" +ax : :class:`matplotlib.axes.Axes` + Pre-existing axes for the plot. Otherwise, call :func:`matplotlib.pyplot.gca` + internally. + """, # noqa: E501 +) + + +_core_returns = dict( + ax=""" +:class:`matplotlib.axes.Axes` + The matplotlib axes containing the plot. + """, + facetgrid=""" +:class:`FacetGrid` + An object managing one or more subplots that correspond to conditional data + subsets with convenient methods for batch-setting of axes attributes. + """, + jointgrid=""" +:class:`JointGrid` + An object managing multiple subplots that correspond to joint and marginal axes + for plotting a bivariate relationship or distribution. + """, + pairgrid=""" +:class:`PairGrid` + An object managing multiple subplots that correspond to joint and marginal axes + for pairwise combinations of multiple variables in a dataset. + """, +) + + +_seealso_blurbs = dict( + + # Relational plots + scatterplot=""" +scatterplot : Plot data using points. + """, + lineplot=""" +lineplot : Plot data using lines. + """, + + # Distribution plots + displot=""" +displot : Figure-level interface to distribution plot functions. + """, + histplot=""" +histplot : Plot a histogram of binned counts with optional normalization or smoothing. + """, + kdeplot=""" +kdeplot : Plot univariate or bivariate distributions using kernel density estimation. + """, + ecdfplot=""" +ecdfplot : Plot empirical cumulative distribution functions. + """, + rugplot=""" +rugplot : Plot a tick at each observation value along the x and/or y axes. + """, + + # Categorical plots + stripplot=""" +stripplot : Plot a categorical scatter with jitter. + """, + swarmplot=""" +swarmplot : Plot a categorical scatter with non-overlapping points. + """, + violinplot=""" +violinplot : Draw an enhanced boxplot using kernel density estimation. + """, + pointplot=""" +pointplot : Plot point estimates and CIs using markers and lines. + """, + + # Multiples + jointplot=""" +jointplot : Draw a bivariate plot with univariate marginal distributions. + """, + pairplot=""" +jointplot : Draw multiple bivariate plots with univariate marginal distributions. + """, + jointgrid=""" +JointGrid : Set up a figure with joint and marginal views on bivariate data. + """, + pairgrid=""" +PairGrid : Set up a figure with joint and marginal views on multiple variables. + """, +) + + +_core_docs = dict( + params=DocstringComponents(_core_params), + returns=DocstringComponents(_core_returns), + seealso=DocstringComponents(_seealso_blurbs), +) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_statistics.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_statistics.py new file mode 100644 index 0000000000000000000000000000000000000000..a0acd36f3a41f44b6635b9648e88188cd1813887 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_statistics.py @@ -0,0 +1,441 @@ +"""Statistical transformations for visualization. + +This module is currently private, but is being written to eventually form part +of the public API. + +The classes should behave roughly in the style of scikit-learn. + +- All data-independent parameters should be passed to the class constructor. +- Each class should impelment a default transformation that is exposed through + __call__. These are currently written for vector arguements, but I think + consuming a whole `plot_data` DataFrame and return it with transformed + variables would make more sense. +- Some class have data-dependent preprocessing that should be cached and used + multiple times (think defining histogram bins off all data and then counting + observations within each bin multiple times per data subsets). These currently + have unique names, but it would be good to have a common name. Not quite + `fit`, but something similar. +- Alternatively, the transform interface could take some information about grouping + variables and do a groupby internally. +- Some classes should define alternate transforms that might make the most sense + with a different function. For example, KDE usually evaluates the distribution + on a regular grid, but it would be useful for it to transform at the actual + datapoints. Then again, this could be controlled by a parameter at the time of + class instantiation. + +""" +from distutils.version import LooseVersion +from numbers import Number +import numpy as np +import scipy as sp +from scipy import stats + +from .utils import _check_argument + + +class KDE: + """Univariate and bivariate kernel density estimator.""" + def __init__( + self, *, + bw_method=None, + bw_adjust=1, + gridsize=200, + cut=3, + clip=None, + cumulative=False, + ): + """Initialize the estimator with its parameters. + + Parameters + ---------- + bw_method : string, scalar, or callable, optional + Method for determining the smoothing bandwidth to use; passed to + :class:`scipy.stats.gaussian_kde`. + bw_adjust : number, optional + Factor that multiplicatively scales the value chosen using + ``bw_method``. Increasing will make the curve smoother. See Notes. + gridsize : int, optional + Number of points on each dimension of the evaluation grid. + cut : number, optional + Factor, multiplied by the smoothing bandwidth, that determines how + far the evaluation grid extends past the extreme datapoints. When + set to 0, truncate the curve at the data limits. + clip : pair of numbers or None, or a pair of such pairs + Do not evaluate the density outside of these limits. + cumulative : bool, optional + If True, estimate a cumulative distribution function. + + """ + if clip is None: + clip = None, None + + self.bw_method = bw_method + self.bw_adjust = bw_adjust + self.gridsize = gridsize + self.cut = cut + self.clip = clip + self.cumulative = cumulative + + self.support = None + + def _define_support_grid(self, x, bw, cut, clip, gridsize): + """Create the grid of evaluation points depending for vector x.""" + clip_lo = -np.inf if clip[0] is None else clip[0] + clip_hi = +np.inf if clip[1] is None else clip[1] + gridmin = max(x.min() - bw * cut, clip_lo) + gridmax = min(x.max() + bw * cut, clip_hi) + return np.linspace(gridmin, gridmax, gridsize) + + def _define_support_univariate(self, x, weights): + """Create a 1D grid of evaluation points.""" + kde = self._fit(x, weights) + bw = np.sqrt(kde.covariance.squeeze()) + grid = self._define_support_grid( + x, bw, self.cut, self.clip, self.gridsize + ) + return grid + + def _define_support_bivariate(self, x1, x2, weights): + """Create a 2D grid of evaluation points.""" + clip = self.clip + if clip[0] is None or np.isscalar(clip[0]): + clip = (clip, clip) + + kde = self._fit([x1, x2], weights) + bw = np.sqrt(np.diag(kde.covariance).squeeze()) + + grid1 = self._define_support_grid( + x1, bw[0], self.cut, clip[0], self.gridsize + ) + grid2 = self._define_support_grid( + x2, bw[1], self.cut, clip[1], self.gridsize + ) + + return grid1, grid2 + + def define_support(self, x1, x2=None, weights=None, cache=True): + """Create the evaluation grid for a given data set.""" + if x2 is None: + support = self._define_support_univariate(x1, weights) + else: + support = self._define_support_bivariate(x1, x2, weights) + + if cache: + self.support = support + + return support + + def _fit(self, fit_data, weights=None): + """Fit the scipy kde while adding bw_adjust logic and version check.""" + fit_kws = {"bw_method": self.bw_method} + if weights is not None: + if LooseVersion(sp.__version__) < "1.2.0": + msg = "Weighted KDE requires scipy >= 1.2.0" + raise RuntimeError(msg) + fit_kws["weights"] = weights + + kde = stats.gaussian_kde(fit_data, **fit_kws) + kde.set_bandwidth(kde.factor * self.bw_adjust) + + return kde + + def _eval_univariate(self, x, weights=None): + """Fit and evaluate a univariate on univariate data.""" + support = self.support + if support is None: + support = self.define_support(x, cache=False) + + kde = self._fit(x, weights) + + if self.cumulative: + s_0 = support[0] + density = np.array([ + kde.integrate_box_1d(s_0, s_i) for s_i in support + ]) + else: + density = kde(support) + + return density, support + + def _eval_bivariate(self, x1, x2, weights=None): + """Fit and evaluate a univariate on bivariate data.""" + support = self.support + if support is None: + support = self.define_support(x1, x2, cache=False) + + kde = self._fit([x1, x2], weights) + + if self.cumulative: + + grid1, grid2 = support + density = np.zeros((grid1.size, grid2.size)) + p0 = grid1.min(), grid2.min() + for i, xi in enumerate(grid1): + for j, xj in enumerate(grid2): + density[i, j] = kde.integrate_box(p0, (xi, xj)) + + else: + + xx1, xx2 = np.meshgrid(*support) + density = kde([xx1.ravel(), xx2.ravel()]).reshape(xx1.shape) + + return density, support + + def __call__(self, x1, x2=None, weights=None): + """Fit and evaluate on univariate or bivariate data.""" + if x2 is None: + return self._eval_univariate(x1, weights) + else: + return self._eval_bivariate(x1, x2, weights) + + +class Histogram: + """Univariate and bivariate histogram estimator.""" + def __init__( + self, + stat="count", + bins="auto", + binwidth=None, + binrange=None, + discrete=False, + cumulative=False, + ): + """Initialize the estimator with its parameters. + + Parameters + ---------- + stat : str + Aggregate statistic to compute in each bin. + + - `count`: show the number of observations in each bin + - `frequency`: show the number of observations divided by the bin width + - `probability`: or `proportion`: normalize such that bar heights sum to 1 + - `percent`: normalize such that bar heights sum to 100 + - `density`: normalize such that the total area of the histogram equals 1 + + bins : str, number, vector, or a pair of such values + Generic bin parameter that can be the name of a reference rule, + the number of bins, or the breaks of the bins. + Passed to :func:`numpy.histogram_bin_edges`. + binwidth : number or pair of numbers + Width of each bin, overrides ``bins`` but can be used with + ``binrange``. + binrange : pair of numbers or a pair of pairs + Lowest and highest value for bin edges; can be used either + with ``bins`` or ``binwidth``. Defaults to data extremes. + discrete : bool or pair of bools + If True, set ``binwidth`` and ``binrange`` such that bin + edges cover integer values in the dataset. + cumulative : bool + If True, return the cumulative statistic. + + """ + stat_choices = [ + "count", "frequency", "density", "probability", "proportion", "percent", + ] + _check_argument("stat", stat_choices, stat) + + self.stat = stat + self.bins = bins + self.binwidth = binwidth + self.binrange = binrange + self.discrete = discrete + self.cumulative = cumulative + + self.bin_kws = None + + def _define_bin_edges(self, x, weights, bins, binwidth, binrange, discrete): + """Inner function that takes bin parameters as arguments.""" + if binrange is None: + start, stop = x.min(), x.max() + else: + start, stop = binrange + + if discrete: + bin_edges = np.arange(start - .5, stop + 1.5) + elif binwidth is not None: + step = binwidth + bin_edges = np.arange(start, stop + step, step) + else: + bin_edges = np.histogram_bin_edges( + x, bins, binrange, weights, + ) + return bin_edges + + def define_bin_params(self, x1, x2=None, weights=None, cache=True): + """Given data, return numpy.histogram parameters to define bins.""" + if x2 is None: + + bin_edges = self._define_bin_edges( + x1, weights, self.bins, self.binwidth, self.binrange, self.discrete, + ) + + if isinstance(self.bins, (str, Number)): + n_bins = len(bin_edges) - 1 + bin_range = bin_edges.min(), bin_edges.max() + bin_kws = dict(bins=n_bins, range=bin_range) + else: + bin_kws = dict(bins=bin_edges) + + else: + + bin_edges = [] + for i, x in enumerate([x1, x2]): + + # Resolve out whether bin parameters are shared + # or specific to each variable + + bins = self.bins + if not bins or isinstance(bins, (str, Number)): + pass + elif isinstance(bins[i], str): + bins = bins[i] + elif len(bins) == 2: + bins = bins[i] + + binwidth = self.binwidth + if binwidth is None: + pass + elif not isinstance(binwidth, Number): + binwidth = binwidth[i] + + binrange = self.binrange + if binrange is None: + pass + elif not isinstance(binrange[0], Number): + binrange = binrange[i] + + discrete = self.discrete + if not isinstance(discrete, bool): + discrete = discrete[i] + + # Define the bins for this variable + + bin_edges.append(self._define_bin_edges( + x, weights, bins, binwidth, binrange, discrete, + )) + + bin_kws = dict(bins=tuple(bin_edges)) + + if cache: + self.bin_kws = bin_kws + + return bin_kws + + def _eval_bivariate(self, x1, x2, weights): + """Inner function for histogram of two variables.""" + bin_kws = self.bin_kws + if bin_kws is None: + bin_kws = self.define_bin_params(x1, x2, cache=False) + + density = self.stat == "density" + + hist, *bin_edges = np.histogram2d( + x1, x2, **bin_kws, weights=weights, density=density + ) + + area = np.outer( + np.diff(bin_edges[0]), + np.diff(bin_edges[1]), + ) + + if self.stat == "probability" or self.stat == "proportion": + hist = hist.astype(float) / hist.sum() + elif self.stat == "percent": + hist = hist.astype(float) / hist.sum() * 100 + elif self.stat == "frequency": + hist = hist.astype(float) / area + + if self.cumulative: + if self.stat in ["density", "frequency"]: + hist = (hist * area).cumsum(axis=0).cumsum(axis=1) + else: + hist = hist.cumsum(axis=0).cumsum(axis=1) + + return hist, bin_edges + + def _eval_univariate(self, x, weights): + """Inner function for histogram of one variable.""" + bin_kws = self.bin_kws + if bin_kws is None: + bin_kws = self.define_bin_params(x, weights=weights, cache=False) + + density = self.stat == "density" + hist, bin_edges = np.histogram( + x, **bin_kws, weights=weights, density=density, + ) + + if self.stat == "probability" or self.stat == "proportion": + hist = hist.astype(float) / hist.sum() + elif self.stat == "percent": + hist = hist.astype(float) / hist.sum() * 100 + elif self.stat == "frequency": + hist = hist.astype(float) / np.diff(bin_edges) + + if self.cumulative: + if self.stat in ["density", "frequency"]: + hist = (hist * np.diff(bin_edges)).cumsum() + else: + hist = hist.cumsum() + + return hist, bin_edges + + def __call__(self, x1, x2=None, weights=None): + """Count the occurrences in each bin, maybe normalize.""" + if x2 is None: + return self._eval_univariate(x1, weights) + else: + return self._eval_bivariate(x1, x2, weights) + + +class ECDF: + """Univariate empirical cumulative distribution estimator.""" + def __init__(self, stat="proportion", complementary=False): + """Initialize the class with its paramters + + Parameters + ---------- + stat : {{"proportion", "count"}} + Distribution statistic to compute. + complementary : bool + If True, use the complementary CDF (1 - CDF) + + """ + _check_argument("stat", ["count", "proportion"], stat) + self.stat = stat + self.complementary = complementary + + def _eval_bivariate(self, x1, x2, weights): + """Inner function for ECDF of two variables.""" + raise NotImplementedError("Bivariate ECDF is not implemented") + + def _eval_univariate(self, x, weights): + """Inner function for ECDF of one variable.""" + sorter = x.argsort() + x = x[sorter] + weights = weights[sorter] + y = weights.cumsum() + + if self.stat == "proportion": + y = y / y.max() + + x = np.r_[-np.inf, x] + y = np.r_[0, y] + + if self.complementary: + y = y.max() - y + + return y, x + + def __call__(self, x1, x2=None, weights=None): + """Return proportion or count of observations below each sorted datapoint.""" + x1 = np.asarray(x1) + if weights is None: + weights = np.ones_like(x1) + else: + weights = np.asarray(weights) + + if x2 is None: + return self._eval_univariate(x1, weights) + else: + return self._eval_bivariate(x1, x2, weights) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_testing.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_testing.py new file mode 100644 index 0000000000000000000000000000000000000000..20b11ebcb3c13c90ab00c7646b295f04a6691c02 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/_testing.py @@ -0,0 +1,108 @@ +import numpy as np +import matplotlib as mpl +from matplotlib.colors import to_rgb, to_rgba +from numpy.testing import assert_array_equal + + +LINE_PROPS = [ + "alpha", + "color", + "linewidth", + "linestyle", + "xydata", + "zorder", +] + +COLLECTION_PROPS = [ + "alpha", + "edgecolor", + "facecolor", + "fill", + "hatch", + "linestyle", + "linewidth", + "paths", + "zorder", +] + +BAR_PROPS = [ + "alpha", + "edgecolor", + "facecolor", + "fill", + "hatch", + "height", + "linestyle", + "linewidth", + "xy", + "zorder", +] + + +def assert_colors_equal(a, b, check_alpha=True): + + def handle_array(x): + + if isinstance(x, np.ndarray): + if x.ndim > 1: + x = np.unique(x, axis=0).squeeze() + if x.ndim > 1: + raise ValueError("Color arrays must be 1 dimensional") + return x + + a = handle_array(a) + b = handle_array(b) + + f = to_rgba if check_alpha else to_rgb + assert f(a) == f(b) + + +def assert_artists_equal(list1, list2, properties): + + assert len(list1) == len(list2) + for a1, a2 in zip(list1, list2): + prop1 = a1.properties() + prop2 = a2.properties() + for key in properties: + v1 = prop1[key] + v2 = prop2[key] + if key == "paths": + for p1, p2 in zip(v1, v2): + assert_array_equal(p1.vertices, p2.vertices) + assert_array_equal(p1.codes, p2.codes) + elif isinstance(v1, np.ndarray): + assert_array_equal(v1, v2) + elif key == "color": + v1 = mpl.colors.to_rgba(v1) + v2 = mpl.colors.to_rgba(v2) + assert v1 == v2 + else: + assert v1 == v2 + + +def assert_legends_equal(leg1, leg2): + + assert leg1.get_title().get_text() == leg2.get_title().get_text() + for t1, t2 in zip(leg1.get_texts(), leg2.get_texts()): + assert t1.get_text() == t2.get_text() + + assert_artists_equal( + leg1.get_patches(), leg2.get_patches(), BAR_PROPS, + ) + assert_artists_equal( + leg1.get_lines(), leg2.get_lines(), LINE_PROPS, + ) + + +def assert_plots_equal(ax1, ax2, labels=True): + + assert_artists_equal(ax1.patches, ax2.patches, BAR_PROPS) + assert_artists_equal(ax1.lines, ax2.lines, LINE_PROPS) + + poly1 = ax1.findobj(mpl.collections.PolyCollection) + poly2 = ax2.findobj(mpl.collections.PolyCollection) + assert_artists_equal(poly1, poly2, COLLECTION_PROPS) + + if labels: + assert ax1.get_xlabel() == ax2.get_xlabel() + assert ax1.get_ylabel() == ax2.get_ylabel() diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/apionly.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/apionly.py new file mode 100644 index 0000000000000000000000000000000000000000..1a27045b1caf86cb17537033ee6a332f4f8fac39 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/apionly.py @@ -0,0 +1,9 @@ +import warnings +from seaborn import * # noqa +reset_orig() # noqa + +msg = ( + "As seaborn no longer sets a default style on import, the seaborn.apionly " + "module is deprecated. It will be removed in a future version." +) +warnings.warn(msg, UserWarning) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/axisgrid.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/axisgrid.py new file mode 100644 index 0000000000000000000000000000000000000000..cc9e92945f2885b41ae592661d8ad864343d048f --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/axisgrid.py @@ -0,0 +1,2386 @@ +from itertools import product +from inspect import signature +import warnings +from textwrap import dedent +from distutils.version import LooseVersion + +import numpy as np +import pandas as pd +import matplotlib as mpl +import matplotlib.pyplot as plt + +from ._core import VectorPlotter, variable_type, categorical_order +from . import utils +from .utils import _check_argument, adjust_legend_subtitles, _draw_figure +from .palettes import color_palette, blend_palette +from ._decorators import _deprecate_positional_args +from ._docstrings import ( + DocstringComponents, + _core_docs, +) + + +__all__ = ["FacetGrid", "PairGrid", "JointGrid", "pairplot", "jointplot"] + + +_param_docs = DocstringComponents.from_nested_components( + core=_core_docs["params"], +) + + +class _BaseGrid: + """Base class for grids of subplots.""" + + def set(self, **kwargs): + """Set attributes on each subplot Axes.""" + for ax in self.axes.flat: + if ax is not None: # Handle removed axes + ax.set(**kwargs) + return self + + @property + def fig(self): + """DEPRECATED: prefer the `figure` property.""" + # Grid.figure is preferred because it matches the Axes attribute name. + # But as the maintanace burden on having this property is minimal, + # let's be slow about formally deprecating it. For now just note its deprecation + # in the docstring; add a warning in version 0.13, and eventually remove it. + return self._figure + + @property + def figure(self): + """Access the :class:`matplotlib.figure.Figure` object underlying the grid.""" + return self._figure + + def savefig(self, *args, **kwargs): + """ + Save an image of the plot. + + This wraps :meth:`matplotlib.figure.Figure.savefig`, using bbox_inches="tight" + by default. Parameters are passed through to the matplotlib function. + + """ + kwargs = kwargs.copy() + kwargs.setdefault("bbox_inches", "tight") + self.figure.savefig(*args, **kwargs) + + +class Grid(_BaseGrid): + """A grid that can have multiple subplots and an external legend.""" + _margin_titles = False + _legend_out = True + + def __init__(self): + + self._tight_layout_rect = [0, 0, 1, 1] + self._tight_layout_pad = None + + # This attribute is set externally and is a hack to handle newer functions that + # don't add proxy artists onto the Axes. We need an overall cleaner approach. + self._extract_legend_handles = False + + def tight_layout(self, *args, **kwargs): + """Call fig.tight_layout within rect that exclude the legend.""" + kwargs = kwargs.copy() + kwargs.setdefault("rect", self._tight_layout_rect) + if self._tight_layout_pad is not None: + kwargs.setdefault("pad", self._tight_layout_pad) + self._figure.tight_layout(*args, **kwargs) + + def add_legend(self, legend_data=None, title=None, label_order=None, + adjust_subtitles=False, **kwargs): + """Draw a legend, maybe placing it outside axes and resizing the figure. + + Parameters + ---------- + legend_data : dict + Dictionary mapping label names (or two-element tuples where the + second element is a label name) to matplotlib artist handles. The + default reads from ``self._legend_data``. + title : string + Title for the legend. The default reads from ``self._hue_var``. + label_order : list of labels + The order that the legend entries should appear in. The default + reads from ``self.hue_names``. + adjust_subtitles : bool + If True, modify entries with invisible artists to left-align + the labels and set the font size to that of a title. + kwargs : key, value pairings + Other keyword arguments are passed to the underlying legend methods + on the Figure or Axes object. + + Returns + ------- + self : Grid instance + Returns self for easy chaining. + + """ + # Find the data for the legend + if legend_data is None: + legend_data = self._legend_data + if label_order is None: + if self.hue_names is None: + label_order = list(legend_data.keys()) + else: + label_order = list(map(utils.to_utf8, self.hue_names)) + + blank_handle = mpl.patches.Patch(alpha=0, linewidth=0) + handles = [legend_data.get(l, blank_handle) for l in label_order] + title = self._hue_var if title is None else title + if LooseVersion(mpl.__version__) < LooseVersion("3.0"): + try: + title_size = mpl.rcParams["axes.labelsize"] * .85 + except TypeError: # labelsize is something like "large" + title_size = mpl.rcParams["axes.labelsize"] + else: + title_size = mpl.rcParams["legend.title_fontsize"] + + # Unpack nested labels from a hierarchical legend + labels = [] + for entry in label_order: + if isinstance(entry, tuple): + _, label = entry + else: + label = entry + labels.append(label) + + # Set default legend kwargs + kwargs.setdefault("scatterpoints", 1) + + if self._legend_out: + + kwargs.setdefault("frameon", False) + kwargs.setdefault("loc", "center right") + + # Draw a full-figure legend outside the grid + figlegend = self._figure.legend(handles, labels, **kwargs) + + self._legend = figlegend + figlegend.set_title(title, prop={"size": title_size}) + + if adjust_subtitles: + adjust_legend_subtitles(figlegend) + + # Draw the plot to set the bounding boxes correctly + _draw_figure(self._figure) + + # Calculate and set the new width of the figure so the legend fits + legend_width = figlegend.get_window_extent().width / self._figure.dpi + fig_width, fig_height = self._figure.get_size_inches() + self._figure.set_size_inches(fig_width + legend_width, fig_height) + + # Draw the plot again to get the new transformations + _draw_figure(self._figure) + + # Now calculate how much space we need on the right side + legend_width = figlegend.get_window_extent().width / self._figure.dpi + space_needed = legend_width / (fig_width + legend_width) + margin = .04 if self._margin_titles else .01 + self._space_needed = margin + space_needed + right = 1 - self._space_needed + + # Place the subplot axes to give space for the legend + self._figure.subplots_adjust(right=right) + self._tight_layout_rect[2] = right + + else: + # Draw a legend in the first axis + ax = self.axes.flat[0] + kwargs.setdefault("loc", "best") + + leg = ax.legend(handles, labels, **kwargs) + leg.set_title(title, prop={"size": title_size}) + self._legend = leg + + if adjust_subtitles: + adjust_legend_subtitles(leg) + + return self + + def _update_legend_data(self, ax): + """Extract the legend data from an axes object and save it.""" + data = {} + + # Get data directly from the legend, which is necessary + # for newer functions that don't add labeled proxy artists + if ax.legend_ is not None and self._extract_legend_handles: + handles = ax.legend_.legendHandles + labels = [t.get_text() for t in ax.legend_.texts] + data.update({l: h for h, l in zip(handles, labels)}) + + handles, labels = ax.get_legend_handles_labels() + data.update({l: h for h, l in zip(handles, labels)}) + + self._legend_data.update(data) + + # Now clear the legend + ax.legend_ = None + + def _get_palette(self, data, hue, hue_order, palette): + """Get a list of colors for the hue variable.""" + if hue is None: + palette = color_palette(n_colors=1) + + else: + hue_names = categorical_order(data[hue], hue_order) + n_colors = len(hue_names) + + # By default use either the current color palette or HUSL + if palette is None: + current_palette = utils.get_color_cycle() + if n_colors > len(current_palette): + colors = color_palette("husl", n_colors) + else: + colors = color_palette(n_colors=n_colors) + + # Allow for palette to map from hue variable names + elif isinstance(palette, dict): + color_names = [palette[h] for h in hue_names] + colors = color_palette(color_names, n_colors) + + # Otherwise act as if we just got a list of colors + else: + colors = color_palette(palette, n_colors) + + palette = color_palette(colors, n_colors) + + return palette + + @property + def legend(self): + """The :class:`matplotlib.legend.Legend` object, if present.""" + try: + return self._legend + except AttributeError: + return None + + +_facet_docs = dict( + + data=dedent("""\ + data : DataFrame + Tidy ("long-form") dataframe where each column is a variable and each + row is an observation.\ + """), + rowcol=dedent("""\ + row, col : vectors or keys in ``data`` + Variables that define subsets to plot on different facets.\ + """), + rowcol_order=dedent("""\ + {row,col}_order : vector of strings + Specify the order in which levels of the ``row`` and/or ``col`` variables + appear in the grid of subplots.\ + """), + col_wrap=dedent("""\ + col_wrap : int + "Wrap" the column variable at this width, so that the column facets + span multiple rows. Incompatible with a ``row`` facet.\ + """), + share_xy=dedent("""\ + share{x,y} : bool, 'col', or 'row' optional + If true, the facets will share y axes across columns and/or x axes + across rows.\ + """), + height=dedent("""\ + height : scalar + Height (in inches) of each facet. See also: ``aspect``.\ + """), + aspect=dedent("""\ + aspect : scalar + Aspect ratio of each facet, so that ``aspect * height`` gives the width + of each facet in inches.\ + """), + palette=dedent("""\ + palette : palette name, list, or dict + Colors to use for the different levels of the ``hue`` variable. Should + be something that can be interpreted by :func:`color_palette`, or a + dictionary mapping hue levels to matplotlib colors.\ + """), + legend_out=dedent("""\ + legend_out : bool + If ``True``, the figure size will be extended, and the legend will be + drawn outside the plot on the center right.\ + """), + margin_titles=dedent("""\ + margin_titles : bool + If ``True``, the titles for the row variable are drawn to the right of + the last column. This option is experimental and may not work in all + cases.\ + """), + facet_kws=dedent("""\ + facet_kws : dict + Additional parameters passed to :class:`FacetGrid`. + """), +) + + +class FacetGrid(Grid): + """Multi-plot grid for plotting conditional relationships.""" + @_deprecate_positional_args + def __init__( + self, data, *, + row=None, col=None, hue=None, col_wrap=None, + sharex=True, sharey=True, height=3, aspect=1, palette=None, + row_order=None, col_order=None, hue_order=None, hue_kws=None, + dropna=False, legend_out=True, despine=True, + margin_titles=False, xlim=None, ylim=None, subplot_kws=None, + gridspec_kws=None, size=None + ): + + super(FacetGrid, self).__init__() + + # Handle deprecations + if size is not None: + height = size + msg = ("The `size` parameter has been renamed to `height`; " + "please update your code.") + warnings.warn(msg, UserWarning) + + # Determine the hue facet layer information + hue_var = hue + if hue is None: + hue_names = None + else: + hue_names = categorical_order(data[hue], hue_order) + + colors = self._get_palette(data, hue, hue_order, palette) + + # Set up the lists of names for the row and column facet variables + if row is None: + row_names = [] + else: + row_names = categorical_order(data[row], row_order) + + if col is None: + col_names = [] + else: + col_names = categorical_order(data[col], col_order) + + # Additional dict of kwarg -> list of values for mapping the hue var + hue_kws = hue_kws if hue_kws is not None else {} + + # Make a boolean mask that is True anywhere there is an NA + # value in one of the faceting variables, but only if dropna is True + none_na = np.zeros(len(data), bool) + if dropna: + row_na = none_na if row is None else data[row].isnull() + col_na = none_na if col is None else data[col].isnull() + hue_na = none_na if hue is None else data[hue].isnull() + not_na = ~(row_na | col_na | hue_na) + else: + not_na = ~none_na + + # Compute the grid shape + ncol = 1 if col is None else len(col_names) + nrow = 1 if row is None else len(row_names) + self._n_facets = ncol * nrow + + self._col_wrap = col_wrap + if col_wrap is not None: + if row is not None: + err = "Cannot use `row` and `col_wrap` together." + raise ValueError(err) + ncol = col_wrap + nrow = int(np.ceil(len(col_names) / col_wrap)) + self._ncol = ncol + self._nrow = nrow + + # Calculate the base figure size + # This can get stretched later by a legend + # TODO this doesn't account for axis labels + figsize = (ncol * height * aspect, nrow * height) + + # Validate some inputs + if col_wrap is not None: + margin_titles = False + + # Build the subplot keyword dictionary + subplot_kws = {} if subplot_kws is None else subplot_kws.copy() + gridspec_kws = {} if gridspec_kws is None else gridspec_kws.copy() + if xlim is not None: + subplot_kws["xlim"] = xlim + if ylim is not None: + subplot_kws["ylim"] = ylim + + # --- Initialize the subplot grid + + # Disable autolayout so legend_out works properly + with mpl.rc_context({"figure.autolayout": False}): + fig = plt.figure(figsize=figsize) + + if col_wrap is None: + + kwargs = dict(squeeze=False, + sharex=sharex, sharey=sharey, + subplot_kw=subplot_kws, + gridspec_kw=gridspec_kws) + + axes = fig.subplots(nrow, ncol, **kwargs) + + if col is None and row is None: + axes_dict = {} + elif col is None: + axes_dict = dict(zip(row_names, axes.flat)) + elif row is None: + axes_dict = dict(zip(col_names, axes.flat)) + else: + facet_product = product(row_names, col_names) + axes_dict = dict(zip(facet_product, axes.flat)) + + else: + + # If wrapping the col variable we need to make the grid ourselves + if gridspec_kws: + warnings.warn("`gridspec_kws` ignored when using `col_wrap`") + + n_axes = len(col_names) + axes = np.empty(n_axes, object) + axes[0] = fig.add_subplot(nrow, ncol, 1, **subplot_kws) + if sharex: + subplot_kws["sharex"] = axes[0] + if sharey: + subplot_kws["sharey"] = axes[0] + for i in range(1, n_axes): + axes[i] = fig.add_subplot(nrow, ncol, i + 1, **subplot_kws) + + axes_dict = dict(zip(col_names, axes)) + + # --- Set up the class attributes + + # Attributes that are part of the public API but accessed through + # a property so that Sphinx adds them to the auto class doc + self._figure = fig + self._axes = axes + self._axes_dict = axes_dict + self._legend = None + + # Public attributes that aren't explicitly documented + # (It's not obvious that having them be public was a good idea) + self.data = data + self.row_names = row_names + self.col_names = col_names + self.hue_names = hue_names + self.hue_kws = hue_kws + + # Next the private variables + self._nrow = nrow + self._row_var = row + self._ncol = ncol + self._col_var = col + + self._margin_titles = margin_titles + self._margin_titles_texts = [] + self._col_wrap = col_wrap + self._hue_var = hue_var + self._colors = colors + self._legend_out = legend_out + self._legend_data = {} + self._x_var = None + self._y_var = None + self._dropna = dropna + self._not_na = not_na + + # --- Make the axes look good + + self.tight_layout() + if despine: + self.despine() + + if sharex in [True, 'col']: + for ax in self._not_bottom_axes: + for label in ax.get_xticklabels(): + label.set_visible(False) + ax.xaxis.offsetText.set_visible(False) + ax.xaxis.label.set_visible(False) + + if sharey in [True, 'row']: + for ax in self._not_left_axes: + for label in ax.get_yticklabels(): + label.set_visible(False) + ax.yaxis.offsetText.set_visible(False) + ax.yaxis.label.set_visible(False) + + __init__.__doc__ = dedent("""\ + Initialize the matplotlib figure and FacetGrid object. + + This class maps a dataset onto multiple axes arrayed in a grid of rows + and columns that correspond to *levels* of variables in the dataset. + The plots it produces are often called "lattice", "trellis", or + "small-multiple" graphics. + + It can also represent levels of a third variable with the ``hue`` + parameter, which plots different subsets of data in different colors. + This uses color to resolve elements on a third dimension, but only + draws subsets on top of each other and will not tailor the ``hue`` + parameter for the specific visualization the way that axes-level + functions that accept ``hue`` will. + + The basic workflow is to initialize the :class:`FacetGrid` object with + the dataset and the variables that are used to structure the grid. Then + one or more plotting functions can be applied to each subset by calling + :meth:`FacetGrid.map` or :meth:`FacetGrid.map_dataframe`. Finally, the + plot can be tweaked with other methods to do things like change the + axis labels, use different ticks, or add a legend. See the detailed + code examples below for more information. + + .. warning:: + + When using seaborn functions that infer semantic mappings from a + dataset, care must be taken to synchronize those mappings across + facets (e.g., by defing the ``hue`` mapping with a palette dict or + setting the data type of the variables to ``category``). In most cases, + it will be better to use a figure-level function (e.g. :func:`relplot` + or :func:`catplot`) than to use :class:`FacetGrid` directly. + + See the :ref:`tutorial ` for more information. + + Parameters + ---------- + {data} + row, col, hue : strings + Variables that define subsets of the data, which will be drawn on + separate facets in the grid. See the ``{{var}}_order`` parameters to + control the order of levels of this variable. + {col_wrap} + {share_xy} + {height} + {aspect} + {palette} + {{row,col,hue}}_order : lists + Order for the levels of the faceting variables. By default, this + will be the order that the levels appear in ``data`` or, if the + variables are pandas categoricals, the category order. + hue_kws : dictionary of param -> list of values mapping + Other keyword arguments to insert into the plotting call to let + other plot attributes vary across levels of the hue variable (e.g. + the markers in a scatterplot). + {legend_out} + despine : boolean + Remove the top and right spines from the plots. + {margin_titles} + {{x, y}}lim: tuples + Limits for each of the axes on each facet (only relevant when + share{{x, y}} is True). + subplot_kws : dict + Dictionary of keyword arguments passed to matplotlib subplot(s) + methods. + gridspec_kws : dict + Dictionary of keyword arguments passed to + :class:`matplotlib.gridspec.GridSpec` + (via :meth:`matplotlib.figure.Figure.subplots`). + Ignored if ``col_wrap`` is not ``None``. + + See Also + -------- + PairGrid : Subplot grid for plotting pairwise relationships + relplot : Combine a relational plot and a :class:`FacetGrid` + displot : Combine a distribution plot and a :class:`FacetGrid` + catplot : Combine a categorical plot and a :class:`FacetGrid` + lmplot : Combine a regression plot and a :class:`FacetGrid` + + Examples + -------- + + .. note:: + + These examples use seaborn functions to demonstrate some of the + advanced features of the class, but in most cases you will want + to use figue-level functions (e.g. :func:`displot`, :func:`relplot`) + to make the plots shown here. + + .. include:: ../docstrings/FacetGrid.rst + + """).format(**_facet_docs) + + def facet_data(self): + """Generator for name indices and data subsets for each facet. + + Yields + ------ + (i, j, k), data_ijk : tuple of ints, DataFrame + The ints provide an index into the {row, col, hue}_names attribute, + and the dataframe contains a subset of the full data corresponding + to each facet. The generator yields subsets that correspond with + the self.axes.flat iterator, or self.axes[i, j] when `col_wrap` + is None. + + """ + data = self.data + + # Construct masks for the row variable + if self.row_names: + row_masks = [data[self._row_var] == n for n in self.row_names] + else: + row_masks = [np.repeat(True, len(self.data))] + + # Construct masks for the column variable + if self.col_names: + col_masks = [data[self._col_var] == n for n in self.col_names] + else: + col_masks = [np.repeat(True, len(self.data))] + + # Construct masks for the hue variable + if self.hue_names: + hue_masks = [data[self._hue_var] == n for n in self.hue_names] + else: + hue_masks = [np.repeat(True, len(self.data))] + + # Here is the main generator loop + for (i, row), (j, col), (k, hue) in product(enumerate(row_masks), + enumerate(col_masks), + enumerate(hue_masks)): + data_ijk = data[row & col & hue & self._not_na] + yield (i, j, k), data_ijk + + def map(self, func, *args, **kwargs): + """Apply a plotting function to each facet's subset of the data. + + Parameters + ---------- + func : callable + A plotting function that takes data and keyword arguments. It + must plot to the currently active matplotlib Axes and take a + `color` keyword argument. If faceting on the `hue` dimension, + it must also take a `label` keyword argument. + args : strings + Column names in self.data that identify variables with data to + plot. The data for each variable is passed to `func` in the + order the variables are specified in the call. + kwargs : keyword arguments + All keyword arguments are passed to the plotting function. + + Returns + ------- + self : object + Returns self. + + """ + # If color was a keyword argument, grab it here + kw_color = kwargs.pop("color", None) + + # How we use the function depends on where it comes from + func_module = str(getattr(func, "__module__", "")) + + # Check for categorical plots without order information + if func_module == "seaborn.categorical": + if "order" not in kwargs: + warning = ("Using the {} function without specifying " + "`order` is likely to produce an incorrect " + "plot.".format(func.__name__)) + warnings.warn(warning) + if len(args) == 3 and "hue_order" not in kwargs: + warning = ("Using the {} function without specifying " + "`hue_order` is likely to produce an incorrect " + "plot.".format(func.__name__)) + warnings.warn(warning) + + # Iterate over the data subsets + for (row_i, col_j, hue_k), data_ijk in self.facet_data(): + + # If this subset is null, move on + if not data_ijk.values.size: + continue + + # Get the current axis + modify_state = not func_module.startswith("seaborn") + ax = self.facet_axis(row_i, col_j, modify_state) + + # Decide what color to plot with + kwargs["color"] = self._facet_color(hue_k, kw_color) + + # Insert the other hue aesthetics if appropriate + for kw, val_list in self.hue_kws.items(): + kwargs[kw] = val_list[hue_k] + + # Insert a label in the keyword arguments for the legend + if self._hue_var is not None: + kwargs["label"] = utils.to_utf8(self.hue_names[hue_k]) + + # Get the actual data we are going to plot with + plot_data = data_ijk[list(args)] + if self._dropna: + plot_data = plot_data.dropna() + plot_args = [v for k, v in plot_data.iteritems()] + + # Some matplotlib functions don't handle pandas objects correctly + if func_module.startswith("matplotlib"): + plot_args = [v.values for v in plot_args] + + # Draw the plot + self._facet_plot(func, ax, plot_args, kwargs) + + # Finalize the annotations and layout + self._finalize_grid(args[:2]) + + return self + + def map_dataframe(self, func, *args, **kwargs): + """Like ``.map`` but passes args as strings and inserts data in kwargs. + + This method is suitable for plotting with functions that accept a + long-form DataFrame as a `data` keyword argument and access the + data in that DataFrame using string variable names. + + Parameters + ---------- + func : callable + A plotting function that takes data and keyword arguments. Unlike + the `map` method, a function used here must "understand" Pandas + objects. It also must plot to the currently active matplotlib Axes + and take a `color` keyword argument. If faceting on the `hue` + dimension, it must also take a `label` keyword argument. + args : strings + Column names in self.data that identify variables with data to + plot. The data for each variable is passed to `func` in the + order the variables are specified in the call. + kwargs : keyword arguments + All keyword arguments are passed to the plotting function. + + Returns + ------- + self : object + Returns self. + + """ + + # If color was a keyword argument, grab it here + kw_color = kwargs.pop("color", None) + + # Iterate over the data subsets + for (row_i, col_j, hue_k), data_ijk in self.facet_data(): + + # If this subset is null, move on + if not data_ijk.values.size: + continue + + # Get the current axis + modify_state = not str(func.__module__).startswith("seaborn") + ax = self.facet_axis(row_i, col_j, modify_state) + + # Decide what color to plot with + kwargs["color"] = self._facet_color(hue_k, kw_color) + + # Insert the other hue aesthetics if appropriate + for kw, val_list in self.hue_kws.items(): + kwargs[kw] = val_list[hue_k] + + # Insert a label in the keyword arguments for the legend + if self._hue_var is not None: + kwargs["label"] = self.hue_names[hue_k] + + # Stick the facet dataframe into the kwargs + if self._dropna: + data_ijk = data_ijk.dropna() + kwargs["data"] = data_ijk + + # Draw the plot + self._facet_plot(func, ax, args, kwargs) + + # For axis labels, prefer to use positional args for backcompat + # but also extract the x/y kwargs and use if no corresponding arg + axis_labels = [kwargs.get("x", None), kwargs.get("y", None)] + for i, val in enumerate(args[:2]): + axis_labels[i] = val + self._finalize_grid(axis_labels) + + return self + + def _facet_color(self, hue_index, kw_color): + + color = self._colors[hue_index] + if kw_color is not None: + return kw_color + elif color is not None: + return color + + def _facet_plot(self, func, ax, plot_args, plot_kwargs): + + # Draw the plot + if str(func.__module__).startswith("seaborn"): + plot_kwargs = plot_kwargs.copy() + semantics = ["x", "y", "hue", "size", "style"] + for key, val in zip(semantics, plot_args): + plot_kwargs[key] = val + plot_args = [] + plot_kwargs["ax"] = ax + func(*plot_args, **plot_kwargs) + + # Sort out the supporting information + self._update_legend_data(ax) + + def _finalize_grid(self, axlabels): + """Finalize the annotations and layout.""" + self.set_axis_labels(*axlabels) + self.set_titles() + self.tight_layout() + + def facet_axis(self, row_i, col_j, modify_state=True): + """Make the axis identified by these indices active and return it.""" + + # Calculate the actual indices of the axes to plot on + if self._col_wrap is not None: + ax = self.axes.flat[col_j] + else: + ax = self.axes[row_i, col_j] + + # Get a reference to the axes object we want, and make it active + if modify_state: + plt.sca(ax) + return ax + + def despine(self, **kwargs): + """Remove axis spines from the facets.""" + utils.despine(self._figure, **kwargs) + return self + + def set_axis_labels(self, x_var=None, y_var=None, clear_inner=True, **kwargs): + """Set axis labels on the left column and bottom row of the grid.""" + if x_var is not None: + self._x_var = x_var + self.set_xlabels(x_var, clear_inner=clear_inner, **kwargs) + if y_var is not None: + self._y_var = y_var + self.set_ylabels(y_var, clear_inner=clear_inner, **kwargs) + + return self + + def set_xlabels(self, label=None, clear_inner=True, **kwargs): + """Label the x axis on the bottom row of the grid.""" + if label is None: + label = self._x_var + for ax in self._bottom_axes: + ax.set_xlabel(label, **kwargs) + if clear_inner: + for ax in self._not_bottom_axes: + ax.set_xlabel("") + return self + + def set_ylabels(self, label=None, clear_inner=True, **kwargs): + """Label the y axis on the left column of the grid.""" + if label is None: + label = self._y_var + for ax in self._left_axes: + ax.set_ylabel(label, **kwargs) + if clear_inner: + for ax in self._not_left_axes: + ax.set_ylabel("") + return self + + def set_xticklabels(self, labels=None, step=None, **kwargs): + """Set x axis tick labels of the grid.""" + for ax in self.axes.flat: + curr_ticks = ax.get_xticks() + ax.set_xticks(curr_ticks) + if labels is None: + curr_labels = [l.get_text() for l in ax.get_xticklabels()] + if step is not None: + xticks = ax.get_xticks()[::step] + curr_labels = curr_labels[::step] + ax.set_xticks(xticks) + ax.set_xticklabels(curr_labels, **kwargs) + else: + ax.set_xticklabels(labels, **kwargs) + return self + + def set_yticklabels(self, labels=None, **kwargs): + """Set y axis tick labels on the left column of the grid.""" + for ax in self.axes.flat: + curr_ticks = ax.get_yticks() + ax.set_yticks(curr_ticks) + if labels is None: + curr_labels = [l.get_text() for l in ax.get_yticklabels()] + ax.set_yticklabels(curr_labels, **kwargs) + else: + ax.set_yticklabels(labels, **kwargs) + return self + + def set_titles(self, template=None, row_template=None, col_template=None, + **kwargs): + """Draw titles either above each facet or on the grid margins. + + Parameters + ---------- + template : string + Template for all titles with the formatting keys {col_var} and + {col_name} (if using a `col` faceting variable) and/or {row_var} + and {row_name} (if using a `row` faceting variable). + row_template: + Template for the row variable when titles are drawn on the grid + margins. Must have {row_var} and {row_name} formatting keys. + col_template: + Template for the row variable when titles are drawn on the grid + margins. Must have {col_var} and {col_name} formatting keys. + + Returns + ------- + self: object + Returns self. + + """ + args = dict(row_var=self._row_var, col_var=self._col_var) + kwargs["size"] = kwargs.pop("size", mpl.rcParams["axes.labelsize"]) + + # Establish default templates + if row_template is None: + row_template = "{row_var} = {row_name}" + if col_template is None: + col_template = "{col_var} = {col_name}" + if template is None: + if self._row_var is None: + template = col_template + elif self._col_var is None: + template = row_template + else: + template = " | ".join([row_template, col_template]) + + row_template = utils.to_utf8(row_template) + col_template = utils.to_utf8(col_template) + template = utils.to_utf8(template) + + if self._margin_titles: + + # Remove any existing title texts + for text in self._margin_titles_texts: + text.remove() + self._margin_titles_texts = [] + + if self.row_names is not None: + # Draw the row titles on the right edge of the grid + for i, row_name in enumerate(self.row_names): + ax = self.axes[i, -1] + args.update(dict(row_name=row_name)) + title = row_template.format(**args) + text = ax.annotate( + title, xy=(1.02, .5), xycoords="axes fraction", + rotation=270, ha="left", va="center", + **kwargs + ) + self._margin_titles_texts.append(text) + + if self.col_names is not None: + # Draw the column titles as normal titles + for j, col_name in enumerate(self.col_names): + args.update(dict(col_name=col_name)) + title = col_template.format(**args) + self.axes[0, j].set_title(title, **kwargs) + + return self + + # Otherwise title each facet with all the necessary information + if (self._row_var is not None) and (self._col_var is not None): + for i, row_name in enumerate(self.row_names): + for j, col_name in enumerate(self.col_names): + args.update(dict(row_name=row_name, col_name=col_name)) + title = template.format(**args) + self.axes[i, j].set_title(title, **kwargs) + elif self.row_names is not None and len(self.row_names): + for i, row_name in enumerate(self.row_names): + args.update(dict(row_name=row_name)) + title = template.format(**args) + self.axes[i, 0].set_title(title, **kwargs) + elif self.col_names is not None and len(self.col_names): + for i, col_name in enumerate(self.col_names): + args.update(dict(col_name=col_name)) + title = template.format(**args) + # Index the flat array so col_wrap works + self.axes.flat[i].set_title(title, **kwargs) + return self + + def refline(self, *, x=None, y=None, color='.5', linestyle='--', **line_kws): + """Add a reference line(s) to each facet. + + Parameters + ---------- + x, y : numeric + Value(s) to draw the line(s) at. + color : :mod:`matplotlib color ` + Specifies the color of the reference line(s). Pass ``color=None`` to + use ``hue`` mapping. + linestyle : str + Specifies the style of the reference line(s). + line_kws : key, value mappings + Other keyword arguments are passed to :meth:`matplotlib.axes.Axes.axvline` + when ``x`` is not None and :meth:`matplotlib.axes.Axes.axhline` when ``y`` + is not None. + + Returns + ------- + :class:`FacetGrid` instance + Returns ``self`` for easy method chaining. + + """ + line_kws['color'] = color + line_kws['linestyle'] = linestyle + + if x is not None: + self.map(plt.axvline, x=x, **line_kws) + + if y is not None: + self.map(plt.axhline, y=y, **line_kws) + + # ------ Properties that are part of the public API and documented by Sphinx + + @property + def axes(self): + """An array of the :class:`matplotlib.axes.Axes` objects in the grid.""" + return self._axes + + @property + def ax(self): + """The :class:`matplotlib.axes.Axes` when no faceting variables are assigned.""" + if self.axes.shape == (1, 1): + return self.axes[0, 0] + else: + err = ( + "Use the `.axes` attribute when facet variables are assigned." + ) + raise AttributeError(err) + + @property + def axes_dict(self): + """A mapping of facet names to corresponding :class:`matplotlib.axes.Axes`. + + If only one of ``row`` or ``col`` is assigned, each key is a string + representing a level of that variable. If both facet dimensions are + assigned, each key is a ``({row_level}, {col_level})`` tuple. + + """ + return self._axes_dict + + # ------ Private properties, that require some computation to get + + @property + def _inner_axes(self): + """Return a flat array of the inner axes.""" + if self._col_wrap is None: + return self.axes[:-1, 1:].flat + else: + axes = [] + n_empty = self._nrow * self._ncol - self._n_facets + for i, ax in enumerate(self.axes): + append = ( + i % self._ncol + and i < (self._ncol * (self._nrow - 1)) + and i < (self._ncol * (self._nrow - 1) - n_empty) + ) + if append: + axes.append(ax) + return np.array(axes, object).flat + + @property + def _left_axes(self): + """Return a flat array of the left column of axes.""" + if self._col_wrap is None: + return self.axes[:, 0].flat + else: + axes = [] + for i, ax in enumerate(self.axes): + if not i % self._ncol: + axes.append(ax) + return np.array(axes, object).flat + + @property + def _not_left_axes(self): + """Return a flat array of axes that aren't on the left column.""" + if self._col_wrap is None: + return self.axes[:, 1:].flat + else: + axes = [] + for i, ax in enumerate(self.axes): + if i % self._ncol: + axes.append(ax) + return np.array(axes, object).flat + + @property + def _bottom_axes(self): + """Return a flat array of the bottom row of axes.""" + if self._col_wrap is None: + return self.axes[-1, :].flat + else: + axes = [] + n_empty = self._nrow * self._ncol - self._n_facets + for i, ax in enumerate(self.axes): + append = ( + i >= (self._ncol * (self._nrow - 1)) + or i >= (self._ncol * (self._nrow - 1) - n_empty) + ) + if append: + axes.append(ax) + return np.array(axes, object).flat + + @property + def _not_bottom_axes(self): + """Return a flat array of axes that aren't on the bottom row.""" + if self._col_wrap is None: + return self.axes[:-1, :].flat + else: + axes = [] + n_empty = self._nrow * self._ncol - self._n_facets + for i, ax in enumerate(self.axes): + append = ( + i < (self._ncol * (self._nrow - 1)) + and i < (self._ncol * (self._nrow - 1) - n_empty) + ) + if append: + axes.append(ax) + return np.array(axes, object).flat + + +class PairGrid(Grid): + """Subplot grid for plotting pairwise relationships in a dataset. + + This object maps each variable in a dataset onto a column and row in a + grid of multiple axes. Different axes-level plotting functions can be + used to draw bivariate plots in the upper and lower triangles, and the + the marginal distribution of each variable can be shown on the diagonal. + + Several different common plots can be generated in a single line using + :func:`pairplot`. Use :class:`PairGrid` when you need more flexibility. + + See the :ref:`tutorial ` for more information. + + """ + @_deprecate_positional_args + def __init__( + self, data, *, + hue=None, hue_order=None, palette=None, + hue_kws=None, vars=None, x_vars=None, y_vars=None, + corner=False, diag_sharey=True, height=2.5, aspect=1, + layout_pad=.5, despine=True, dropna=False, size=None + ): + """Initialize the plot figure and PairGrid object. + + Parameters + ---------- + data : DataFrame + Tidy (long-form) dataframe where each column is a variable and + each row is an observation. + hue : string (variable name) + Variable in ``data`` to map plot aspects to different colors. This + variable will be excluded from the default x and y variables. + hue_order : list of strings + Order for the levels of the hue variable in the palette + palette : dict or seaborn color palette + Set of colors for mapping the ``hue`` variable. If a dict, keys + should be values in the ``hue`` variable. + hue_kws : dictionary of param -> list of values mapping + Other keyword arguments to insert into the plotting call to let + other plot attributes vary across levels of the hue variable (e.g. + the markers in a scatterplot). + vars : list of variable names + Variables within ``data`` to use, otherwise use every column with + a numeric datatype. + {x, y}_vars : lists of variable names + Variables within ``data`` to use separately for the rows and + columns of the figure; i.e. to make a non-square plot. + corner : bool + If True, don't add axes to the upper (off-diagonal) triangle of the + grid, making this a "corner" plot. + height : scalar + Height (in inches) of each facet. + aspect : scalar + Aspect * height gives the width (in inches) of each facet. + layout_pad : scalar + Padding between axes; passed to ``fig.tight_layout``. + despine : boolean + Remove the top and right spines from the plots. + dropna : boolean + Drop missing values from the data before plotting. + + See Also + -------- + pairplot : Easily drawing common uses of :class:`PairGrid`. + FacetGrid : Subplot grid for plotting conditional relationships. + + Examples + -------- + + .. include:: ../docstrings/PairGrid.rst + + """ + + super(PairGrid, self).__init__() + + # Handle deprecations + if size is not None: + height = size + msg = ("The `size` parameter has been renamed to `height`; " + "please update your code.") + warnings.warn(UserWarning(msg)) + + # Sort out the variables that define the grid + numeric_cols = self._find_numeric_cols(data) + if hue in numeric_cols: + numeric_cols.remove(hue) + if vars is not None: + x_vars = list(vars) + y_vars = list(vars) + if x_vars is None: + x_vars = numeric_cols + if y_vars is None: + y_vars = numeric_cols + + if np.isscalar(x_vars): + x_vars = [x_vars] + if np.isscalar(y_vars): + y_vars = [y_vars] + + self.x_vars = x_vars = list(x_vars) + self.y_vars = y_vars = list(y_vars) + self.square_grid = self.x_vars == self.y_vars + + if not x_vars: + raise ValueError("No variables found for grid columns.") + if not y_vars: + raise ValueError("No variables found for grid rows.") + + # Create the figure and the array of subplots + figsize = len(x_vars) * height * aspect, len(y_vars) * height + + # Disable autolayout so legend_out works + with mpl.rc_context({"figure.autolayout": False}): + fig = plt.figure(figsize=figsize) + + axes = fig.subplots(len(y_vars), len(x_vars), + sharex="col", sharey="row", + squeeze=False) + + # Possibly remove upper axes to make a corner grid + # Note: setting up the axes is usually the most time-intensive part + # of using the PairGrid. We are foregoing the speed improvement that + # we would get by just not setting up the hidden axes so that we can + # avoid implementing fig.subplots ourselves. But worth thinking about. + self._corner = corner + if corner: + hide_indices = np.triu_indices_from(axes, 1) + for i, j in zip(*hide_indices): + axes[i, j].remove() + axes[i, j] = None + + self._figure = fig + self.axes = axes + self.data = data + + # Save what we are going to do with the diagonal + self.diag_sharey = diag_sharey + self.diag_vars = None + self.diag_axes = None + + self._dropna = dropna + + # Label the axes + self._add_axis_labels() + + # Sort out the hue variable + self._hue_var = hue + if hue is None: + self.hue_names = hue_order = ["_nolegend_"] + self.hue_vals = pd.Series(["_nolegend_"] * len(data), + index=data.index) + else: + # We need hue_order and hue_names because the former is used to control + # the order of drawing and the latter is used to control the order of + # the legend. hue_names can become string-typed while hue_order must + # retain the type of the input data. This is messy but results from + # the fact that PairGrid can implement the hue-mapping logic itself + # (and was originally written exclusively that way) but now can delegate + # to the axes-level functions, while always handling legend creation. + # See GH2307 + hue_names = hue_order = categorical_order(data[hue], hue_order) + if dropna: + # Filter NA from the list of unique hue names + hue_names = list(filter(pd.notnull, hue_names)) + self.hue_names = hue_names + self.hue_vals = data[hue] + + # Additional dict of kwarg -> list of values for mapping the hue var + self.hue_kws = hue_kws if hue_kws is not None else {} + + self._orig_palette = palette + self._hue_order = hue_order + self.palette = self._get_palette(data, hue, hue_order, palette) + self._legend_data = {} + + # Make the plot look nice + for ax in axes[:-1, :].flat: + if ax is None: + continue + for label in ax.get_xticklabels(): + label.set_visible(False) + ax.xaxis.offsetText.set_visible(False) + ax.xaxis.label.set_visible(False) + + for ax in axes[:, 1:].flat: + if ax is None: + continue + for label in ax.get_yticklabels(): + label.set_visible(False) + ax.yaxis.offsetText.set_visible(False) + ax.yaxis.label.set_visible(False) + + self._tight_layout_rect = [.01, .01, .99, .99] + self._tight_layout_pad = layout_pad + self._despine = despine + if despine: + utils.despine(fig=fig) + self.tight_layout(pad=layout_pad) + + def map(self, func, **kwargs): + """Plot with the same function in every subplot. + + Parameters + ---------- + func : callable plotting function + Must take x, y arrays as positional arguments and draw onto the + "currently active" matplotlib Axes. Also needs to accept kwargs + called ``color`` and ``label``. + + """ + row_indices, col_indices = np.indices(self.axes.shape) + indices = zip(row_indices.flat, col_indices.flat) + self._map_bivariate(func, indices, **kwargs) + + return self + + def map_lower(self, func, **kwargs): + """Plot with a bivariate function on the lower diagonal subplots. + + Parameters + ---------- + func : callable plotting function + Must take x, y arrays as positional arguments and draw onto the + "currently active" matplotlib Axes. Also needs to accept kwargs + called ``color`` and ``label``. + + """ + indices = zip(*np.tril_indices_from(self.axes, -1)) + self._map_bivariate(func, indices, **kwargs) + return self + + def map_upper(self, func, **kwargs): + """Plot with a bivariate function on the upper diagonal subplots. + + Parameters + ---------- + func : callable plotting function + Must take x, y arrays as positional arguments and draw onto the + "currently active" matplotlib Axes. Also needs to accept kwargs + called ``color`` and ``label``. + + """ + indices = zip(*np.triu_indices_from(self.axes, 1)) + self._map_bivariate(func, indices, **kwargs) + return self + + def map_offdiag(self, func, **kwargs): + """Plot with a bivariate function on the off-diagonal subplots. + + Parameters + ---------- + func : callable plotting function + Must take x, y arrays as positional arguments and draw onto the + "currently active" matplotlib Axes. Also needs to accept kwargs + called ``color`` and ``label``. + + """ + if self.square_grid: + self.map_lower(func, **kwargs) + if not self._corner: + self.map_upper(func, **kwargs) + else: + indices = [] + for i, (y_var) in enumerate(self.y_vars): + for j, (x_var) in enumerate(self.x_vars): + if x_var != y_var: + indices.append((i, j)) + self._map_bivariate(func, indices, **kwargs) + return self + + def map_diag(self, func, **kwargs): + """Plot with a univariate function on each diagonal subplot. + + Parameters + ---------- + func : callable plotting function + Must take an x array as a positional argument and draw onto the + "currently active" matplotlib Axes. Also needs to accept kwargs + called ``color`` and ``label``. + + """ + # Add special diagonal axes for the univariate plot + if self.diag_axes is None: + diag_vars = [] + diag_axes = [] + for i, y_var in enumerate(self.y_vars): + for j, x_var in enumerate(self.x_vars): + if x_var == y_var: + + # Make the density axes + diag_vars.append(x_var) + ax = self.axes[i, j] + diag_ax = ax.twinx() + diag_ax.set_axis_off() + diag_axes.append(diag_ax) + + # Work around matplotlib bug + # https://github.com/matplotlib/matplotlib/issues/15188 + if not plt.rcParams.get("ytick.left", True): + for tick in ax.yaxis.majorTicks: + tick.tick1line.set_visible(False) + + # Remove main y axis from density axes in a corner plot + if self._corner: + ax.yaxis.set_visible(False) + if self._despine: + utils.despine(ax=ax, left=True) + # TODO add optional density ticks (on the right) + # when drawing a corner plot? + + if self.diag_sharey and diag_axes: + # This may change in future matplotlibs + # See https://github.com/matplotlib/matplotlib/pull/9923 + group = diag_axes[0].get_shared_y_axes() + for ax in diag_axes[1:]: + group.join(ax, diag_axes[0]) + + self.diag_vars = np.array(diag_vars, np.object_) + self.diag_axes = np.array(diag_axes, np.object_) + + if "hue" not in signature(func).parameters: + return self._map_diag_iter_hue(func, **kwargs) + + # Loop over diagonal variables and axes, making one plot in each + for var, ax in zip(self.diag_vars, self.diag_axes): + + plot_kwargs = kwargs.copy() + if str(func.__module__).startswith("seaborn"): + plot_kwargs["ax"] = ax + else: + plt.sca(ax) + + vector = self.data[var] + if self._hue_var is not None: + hue = self.data[self._hue_var] + else: + hue = None + + if self._dropna: + not_na = vector.notna() + if hue is not None: + not_na &= hue.notna() + vector = vector[not_na] + if hue is not None: + hue = hue[not_na] + + plot_kwargs.setdefault("hue", hue) + plot_kwargs.setdefault("hue_order", self._hue_order) + plot_kwargs.setdefault("palette", self._orig_palette) + func(x=vector, **plot_kwargs) + ax.legend_ = None + + self._add_axis_labels() + return self + + def _map_diag_iter_hue(self, func, **kwargs): + """Put marginal plot on each diagonal axes, iterating over hue.""" + # Plot on each of the diagonal axes + fixed_color = kwargs.pop("color", None) + + for var, ax in zip(self.diag_vars, self.diag_axes): + hue_grouped = self.data[var].groupby(self.hue_vals) + + plot_kwargs = kwargs.copy() + if str(func.__module__).startswith("seaborn"): + plot_kwargs["ax"] = ax + else: + plt.sca(ax) + + for k, label_k in enumerate(self._hue_order): + + # Attempt to get data for this level, allowing for empty + try: + data_k = hue_grouped.get_group(label_k) + except KeyError: + data_k = pd.Series([], dtype=float) + + if fixed_color is None: + color = self.palette[k] + else: + color = fixed_color + + if self._dropna: + data_k = utils.remove_na(data_k) + + if str(func.__module__).startswith("seaborn"): + func(x=data_k, label=label_k, color=color, **plot_kwargs) + else: + func(data_k, label=label_k, color=color, **plot_kwargs) + + self._add_axis_labels() + + return self + + def _map_bivariate(self, func, indices, **kwargs): + """Draw a bivariate plot on the indicated axes.""" + # This is a hack to handle the fact that new distribution plots don't add + # their artists onto the axes. This is probably superior in general, but + # we'll need a better way to handle it in the axisgrid functions. + from .distributions import histplot, kdeplot + if func is histplot or func is kdeplot: + self._extract_legend_handles = True + + kws = kwargs.copy() # Use copy as we insert other kwargs + for i, j in indices: + x_var = self.x_vars[j] + y_var = self.y_vars[i] + ax = self.axes[i, j] + if ax is None: # i.e. we are in corner mode + continue + self._plot_bivariate(x_var, y_var, ax, func, **kws) + self._add_axis_labels() + + if "hue" in signature(func).parameters: + self.hue_names = list(self._legend_data) + + def _plot_bivariate(self, x_var, y_var, ax, func, **kwargs): + """Draw a bivariate plot on the specified axes.""" + if "hue" not in signature(func).parameters: + self._plot_bivariate_iter_hue(x_var, y_var, ax, func, **kwargs) + return + + kwargs = kwargs.copy() + if str(func.__module__).startswith("seaborn"): + kwargs["ax"] = ax + else: + plt.sca(ax) + + if x_var == y_var: + axes_vars = [x_var] + else: + axes_vars = [x_var, y_var] + + if self._hue_var is not None and self._hue_var not in axes_vars: + axes_vars.append(self._hue_var) + + data = self.data[axes_vars] + if self._dropna: + data = data.dropna() + + x = data[x_var] + y = data[y_var] + if self._hue_var is None: + hue = None + else: + hue = data.get(self._hue_var) + + kwargs.setdefault("hue", hue) + kwargs.setdefault("hue_order", self._hue_order) + kwargs.setdefault("palette", self._orig_palette) + func(x=x, y=y, **kwargs) + + self._update_legend_data(ax) + + def _plot_bivariate_iter_hue(self, x_var, y_var, ax, func, **kwargs): + """Draw a bivariate plot while iterating over hue subsets.""" + kwargs = kwargs.copy() + if str(func.__module__).startswith("seaborn"): + kwargs["ax"] = ax + else: + plt.sca(ax) + + if x_var == y_var: + axes_vars = [x_var] + else: + axes_vars = [x_var, y_var] + + hue_grouped = self.data.groupby(self.hue_vals) + for k, label_k in enumerate(self._hue_order): + + kws = kwargs.copy() + + # Attempt to get data for this level, allowing for empty + try: + data_k = hue_grouped.get_group(label_k) + except KeyError: + data_k = pd.DataFrame(columns=axes_vars, + dtype=float) + + if self._dropna: + data_k = data_k[axes_vars].dropna() + + x = data_k[x_var] + y = data_k[y_var] + + for kw, val_list in self.hue_kws.items(): + kws[kw] = val_list[k] + kws.setdefault("color", self.palette[k]) + if self._hue_var is not None: + kws["label"] = label_k + + if str(func.__module__).startswith("seaborn"): + func(x=x, y=y, **kws) + else: + func(x, y, **kws) + + self._update_legend_data(ax) + + def _add_axis_labels(self): + """Add labels to the left and bottom Axes.""" + for ax, label in zip(self.axes[-1, :], self.x_vars): + ax.set_xlabel(label) + for ax, label in zip(self.axes[:, 0], self.y_vars): + ax.set_ylabel(label) + if self._corner: + self.axes[0, 0].set_ylabel("") + + def _find_numeric_cols(self, data): + """Find which variables in a DataFrame are numeric.""" + numeric_cols = [] + for col in data: + if variable_type(data[col]) == "numeric": + numeric_cols.append(col) + return numeric_cols + + +class JointGrid(_BaseGrid): + """Grid for drawing a bivariate plot with marginal univariate plots. + + Many plots can be drawn by using the figure-level interface :func:`jointplot`. + Use this class directly when you need more flexibility. + + """ + + @_deprecate_positional_args + def __init__( + self, *, + x=None, y=None, + data=None, + height=6, ratio=5, space=.2, + dropna=False, xlim=None, ylim=None, size=None, marginal_ticks=False, + hue=None, palette=None, hue_order=None, hue_norm=None, + ): + # Handle deprecations + if size is not None: + height = size + msg = ("The `size` parameter has been renamed to `height`; " + "please update your code.") + warnings.warn(msg, UserWarning) + + # Set up the subplot grid + f = plt.figure(figsize=(height, height)) + gs = plt.GridSpec(ratio + 1, ratio + 1) + + ax_joint = f.add_subplot(gs[1:, :-1]) + ax_marg_x = f.add_subplot(gs[0, :-1], sharex=ax_joint) + ax_marg_y = f.add_subplot(gs[1:, -1], sharey=ax_joint) + + self._figure = f + self.ax_joint = ax_joint + self.ax_marg_x = ax_marg_x + self.ax_marg_y = ax_marg_y + + # Turn off tick visibility for the measure axis on the marginal plots + plt.setp(ax_marg_x.get_xticklabels(), visible=False) + plt.setp(ax_marg_y.get_yticklabels(), visible=False) + plt.setp(ax_marg_x.get_xticklabels(minor=True), visible=False) + plt.setp(ax_marg_y.get_yticklabels(minor=True), visible=False) + + # Turn off the ticks on the density axis for the marginal plots + if not marginal_ticks: + plt.setp(ax_marg_x.yaxis.get_majorticklines(), visible=False) + plt.setp(ax_marg_x.yaxis.get_minorticklines(), visible=False) + plt.setp(ax_marg_y.xaxis.get_majorticklines(), visible=False) + plt.setp(ax_marg_y.xaxis.get_minorticklines(), visible=False) + plt.setp(ax_marg_x.get_yticklabels(), visible=False) + plt.setp(ax_marg_y.get_xticklabels(), visible=False) + plt.setp(ax_marg_x.get_yticklabels(minor=True), visible=False) + plt.setp(ax_marg_y.get_xticklabels(minor=True), visible=False) + ax_marg_x.yaxis.grid(False) + ax_marg_y.xaxis.grid(False) + + # Process the input variables + p = VectorPlotter(data=data, variables=dict(x=x, y=y, hue=hue)) + plot_data = p.plot_data.loc[:, p.plot_data.notna().any()] + + # Possibly drop NA + if dropna: + plot_data = plot_data.dropna() + + def get_var(var): + vector = plot_data.get(var, None) + if vector is not None: + vector = vector.rename(p.variables.get(var, None)) + return vector + + self.x = get_var("x") + self.y = get_var("y") + self.hue = get_var("hue") + + for axis in "xy": + name = p.variables.get(axis, None) + if name is not None: + getattr(ax_joint, f"set_{axis}label")(name) + + if xlim is not None: + ax_joint.set_xlim(xlim) + if ylim is not None: + ax_joint.set_ylim(ylim) + + # Store the semantic mapping parameters for axes-level functions + self._hue_params = dict(palette=palette, hue_order=hue_order, hue_norm=hue_norm) + + # Make the grid look nice + utils.despine(f) + if not marginal_ticks: + utils.despine(ax=ax_marg_x, left=True) + utils.despine(ax=ax_marg_y, bottom=True) + for axes in [ax_marg_x, ax_marg_y]: + for axis in [axes.xaxis, axes.yaxis]: + axis.label.set_visible(False) + f.tight_layout() + f.subplots_adjust(hspace=space, wspace=space) + + def _inject_kwargs(self, func, kws, params): + """Add params to kws if they are accepted by func.""" + func_params = signature(func).parameters + for key, val in params.items(): + if key in func_params: + kws.setdefault(key, val) + + def plot(self, joint_func, marginal_func, **kwargs): + """Draw the plot by passing functions for joint and marginal axes. + + This method passes the ``kwargs`` dictionary to both functions. If you + need more control, call :meth:`JointGrid.plot_joint` and + :meth:`JointGrid.plot_marginals` directly with specific parameters. + + Parameters + ---------- + joint_func, marginal_func : callables + Functions to draw the bivariate and univariate plots. See methods + referenced above for information about the required characteristics + of these functions. + kwargs + Additional keyword arguments are passed to both functions. + + Returns + ------- + :class:`JointGrid` instance + Returns ``self`` for easy method chaining. + + """ + self.plot_marginals(marginal_func, **kwargs) + self.plot_joint(joint_func, **kwargs) + return self + + def plot_joint(self, func, **kwargs): + """Draw a bivariate plot on the joint axes of the grid. + + Parameters + ---------- + func : plotting callable + If a seaborn function, it should accept ``x`` and ``y``. Otherwise, + it must accept ``x`` and ``y`` vectors of data as the first two + positional arguments, and it must plot on the "current" axes. + If ``hue`` was defined in the class constructor, the function must + accept ``hue`` as a parameter. + kwargs + Keyword argument are passed to the plotting function. + + Returns + ------- + :class:`JointGrid` instance + Returns ``self`` for easy method chaining. + + """ + kwargs = kwargs.copy() + if str(func.__module__).startswith("seaborn"): + kwargs["ax"] = self.ax_joint + else: + plt.sca(self.ax_joint) + if self.hue is not None: + kwargs["hue"] = self.hue + self._inject_kwargs(func, kwargs, self._hue_params) + + if str(func.__module__).startswith("seaborn"): + func(x=self.x, y=self.y, **kwargs) + else: + func(self.x, self.y, **kwargs) + + return self + + def plot_marginals(self, func, **kwargs): + """Draw univariate plots on each marginal axes. + + Parameters + ---------- + func : plotting callable + If a seaborn function, it should accept ``x`` and ``y`` and plot + when only one of them is defined. Otherwise, it must accept a vector + of data as the first positional argument and determine its orientation + using the ``vertical`` parameter, and it must plot on the "current" axes. + If ``hue`` was defined in the class constructor, it must accept ``hue`` + as a parameter. + kwargs + Keyword argument are passed to the plotting function. + + Returns + ------- + :class:`JointGrid` instance + Returns ``self`` for easy method chaining. + + """ + seaborn_func = ( + str(func.__module__).startswith("seaborn") + # deprecated distplot has a legacy API, special case it + and not func.__name__ == "distplot" + ) + func_params = signature(func).parameters + kwargs = kwargs.copy() + if self.hue is not None: + kwargs["hue"] = self.hue + self._inject_kwargs(func, kwargs, self._hue_params) + + if "legend" in func_params: + kwargs.setdefault("legend", False) + + if "orientation" in func_params: + # e.g. plt.hist + orient_kw_x = {"orientation": "vertical"} + orient_kw_y = {"orientation": "horizontal"} + elif "vertical" in func_params: + # e.g. sns.distplot (also how did this get backwards?) + orient_kw_x = {"vertical": False} + orient_kw_y = {"vertical": True} + + if seaborn_func: + func(x=self.x, ax=self.ax_marg_x, **kwargs) + else: + plt.sca(self.ax_marg_x) + func(self.x, **orient_kw_x, **kwargs) + + if seaborn_func: + func(y=self.y, ax=self.ax_marg_y, **kwargs) + else: + plt.sca(self.ax_marg_y) + func(self.y, **orient_kw_y, **kwargs) + + self.ax_marg_x.yaxis.get_label().set_visible(False) + self.ax_marg_y.xaxis.get_label().set_visible(False) + + return self + + def refline( + self, *, x=None, y=None, joint=True, marginal=True, + color='.5', linestyle='--', **line_kws + ): + """Add a reference line(s) to joint and/or marginal axes. + + Parameters + ---------- + x, y : numeric + Value(s) to draw the line(s) at. + joint, marginal : bools + Whether to add the reference line(s) to the joint/marginal axes. + color : :mod:`matplotlib color ` + Specifies the color of the reference line(s). + linestyle : str + Specifies the style of the reference line(s). + line_kws : key, value mappings + Other keyword arguments are passed to :meth:`matplotlib.axes.Axes.axvline` + when ``x`` is not None and :meth:`matplotlib.axes.Axes.axhline` when ``y`` + is not None. + + Returns + ------- + :class:`JointGrid` instance + Returns ``self`` for easy method chaining. + + """ + line_kws['color'] = color + line_kws['linestyle'] = linestyle + + if x is not None: + if joint: + self.ax_joint.axvline(x, **line_kws) + if marginal: + self.ax_marg_x.axvline(x, **line_kws) + + if y is not None: + if joint: + self.ax_joint.axhline(y, **line_kws) + if marginal: + self.ax_marg_y.axhline(y, **line_kws) + + return self + + def set_axis_labels(self, xlabel="", ylabel="", **kwargs): + """Set axis labels on the bivariate axes. + + Parameters + ---------- + xlabel, ylabel : strings + Label names for the x and y variables. + kwargs : key, value mappings + Other keyword arguments are passed to the following functions: + + - :meth:`matplotlib.axes.Axes.set_xlabel` + - :meth:`matplotlib.axes.Axes.set_ylabel` + + Returns + ------- + :class:`JointGrid` instance + Returns ``self`` for easy method chaining. + + """ + self.ax_joint.set_xlabel(xlabel, **kwargs) + self.ax_joint.set_ylabel(ylabel, **kwargs) + return self + + +JointGrid.__init__.__doc__ = """\ +Set up the grid of subplots and store data internally for easy plotting. + +Parameters +---------- +{params.core.xy} +{params.core.data} +height : number + Size of each side of the figure in inches (it will be square). +ratio : number + Ratio of joint axes height to marginal axes height. +space : number + Space between the joint and marginal axes +dropna : bool + If True, remove missing observations before plotting. +{{x, y}}lim : pairs of numbers + Set axis limits to these values before plotting. +marginal_ticks : bool + If False, suppress ticks on the count/density axis of the marginal plots. +{params.core.hue} + Note: unlike in :class:`FacetGrid` or :class:`PairGrid`, the axes-level + functions must support ``hue`` to use it in :class:`JointGrid`. +{params.core.palette} +{params.core.hue_order} +{params.core.hue_norm} + +See Also +-------- +{seealso.jointplot} +{seealso.pairgrid} +{seealso.pairplot} + +Examples +-------- + +.. include:: ../docstrings/JointGrid.rst + +""".format( + params=_param_docs, + returns=_core_docs["returns"], + seealso=_core_docs["seealso"], +) + + +@_deprecate_positional_args +def pairplot( + data, *, + hue=None, hue_order=None, palette=None, + vars=None, x_vars=None, y_vars=None, + kind="scatter", diag_kind="auto", markers=None, + height=2.5, aspect=1, corner=False, dropna=False, + plot_kws=None, diag_kws=None, grid_kws=None, size=None, +): + """Plot pairwise relationships in a dataset. + + By default, this function will create a grid of Axes such that each numeric + variable in ``data`` will by shared across the y-axes across a single row and + the x-axes across a single column. The diagonal plots are treated + differently: a univariate distribution plot is drawn to show the marginal + distribution of the data in each column. + + It is also possible to show a subset of variables or plot different + variables on the rows and columns. + + This is a high-level interface for :class:`PairGrid` that is intended to + make it easy to draw a few common styles. You should use :class:`PairGrid` + directly if you need more flexibility. + + Parameters + ---------- + data : `pandas.DataFrame` + Tidy (long-form) dataframe where each column is a variable and + each row is an observation. + hue : name of variable in ``data`` + Variable in ``data`` to map plot aspects to different colors. + hue_order : list of strings + Order for the levels of the hue variable in the palette + palette : dict or seaborn color palette + Set of colors for mapping the ``hue`` variable. If a dict, keys + should be values in the ``hue`` variable. + vars : list of variable names + Variables within ``data`` to use, otherwise use every column with + a numeric datatype. + {x, y}_vars : lists of variable names + Variables within ``data`` to use separately for the rows and + columns of the figure; i.e. to make a non-square plot. + kind : {'scatter', 'kde', 'hist', 'reg'} + Kind of plot to make. + diag_kind : {'auto', 'hist', 'kde', None} + Kind of plot for the diagonal subplots. If 'auto', choose based on + whether or not ``hue`` is used. + markers : single matplotlib marker code or list + Either the marker to use for all scatterplot points or a list of markers + with a length the same as the number of levels in the hue variable so that + differently colored points will also have different scatterplot + markers. + height : scalar + Height (in inches) of each facet. + aspect : scalar + Aspect * height gives the width (in inches) of each facet. + corner : bool + If True, don't add axes to the upper (off-diagonal) triangle of the + grid, making this a "corner" plot. + dropna : boolean + Drop missing values from the data before plotting. + {plot, diag, grid}_kws : dicts + Dictionaries of keyword arguments. ``plot_kws`` are passed to the + bivariate plotting function, ``diag_kws`` are passed to the univariate + plotting function, and ``grid_kws`` are passed to the :class:`PairGrid` + constructor. + + Returns + ------- + grid : :class:`PairGrid` + Returns the underlying :class:`PairGrid` instance for further tweaking. + + See Also + -------- + PairGrid : Subplot grid for more flexible plotting of pairwise relationships. + JointGrid : Grid for plotting joint and marginal distributions of two variables. + + Examples + -------- + + .. include:: ../docstrings/pairplot.rst + + """ + # Avoid circular import + from .distributions import histplot, kdeplot + + # Handle deprecations + if size is not None: + height = size + msg = ("The `size` parameter has been renamed to `height`; " + "please update your code.") + warnings.warn(msg, UserWarning) + + if not isinstance(data, pd.DataFrame): + raise TypeError( + "'data' must be pandas DataFrame object, not: {typefound}".format( + typefound=type(data))) + + plot_kws = {} if plot_kws is None else plot_kws.copy() + diag_kws = {} if diag_kws is None else diag_kws.copy() + grid_kws = {} if grid_kws is None else grid_kws.copy() + + # Resolve "auto" diag kind + if diag_kind == "auto": + if hue is None: + diag_kind = "kde" if kind == "kde" else "hist" + else: + diag_kind = "hist" if kind == "hist" else "kde" + + # Set up the PairGrid + grid_kws.setdefault("diag_sharey", diag_kind == "hist") + grid = PairGrid(data, vars=vars, x_vars=x_vars, y_vars=y_vars, hue=hue, + hue_order=hue_order, palette=palette, corner=corner, + height=height, aspect=aspect, dropna=dropna, **grid_kws) + + # Add the markers here as PairGrid has figured out how many levels of the + # hue variable are needed and we don't want to duplicate that process + if markers is not None: + if kind == "reg": + # Needed until regplot supports style + if grid.hue_names is None: + n_markers = 1 + else: + n_markers = len(grid.hue_names) + if not isinstance(markers, list): + markers = [markers] * n_markers + if len(markers) != n_markers: + raise ValueError(("markers must be a singleton or a list of " + "markers for each level of the hue variable")) + grid.hue_kws = {"marker": markers} + elif kind == "scatter": + if isinstance(markers, str): + plot_kws["marker"] = markers + elif hue is not None: + plot_kws["style"] = data[hue] + plot_kws["markers"] = markers + + # Draw the marginal plots on the diagonal + diag_kws = diag_kws.copy() + diag_kws.setdefault("legend", False) + if diag_kind == "hist": + grid.map_diag(histplot, **diag_kws) + elif diag_kind == "kde": + diag_kws.setdefault("fill", True) + diag_kws.setdefault("warn_singular", False) + grid.map_diag(kdeplot, **diag_kws) + + # Maybe plot on the off-diagonals + if diag_kind is not None: + plotter = grid.map_offdiag + else: + plotter = grid.map + + if kind == "scatter": + from .relational import scatterplot # Avoid circular import + plotter(scatterplot, **plot_kws) + elif kind == "reg": + from .regression import regplot # Avoid circular import + plotter(regplot, **plot_kws) + elif kind == "kde": + from .distributions import kdeplot # Avoid circular import + plot_kws.setdefault("warn_singular", False) + plotter(kdeplot, **plot_kws) + elif kind == "hist": + from .distributions import histplot # Avoid circular import + plotter(histplot, **plot_kws) + + # Add a legend + if hue is not None: + grid.add_legend() + + grid.tight_layout() + + return grid + + +@_deprecate_positional_args +def jointplot( + *, + x=None, y=None, + data=None, + kind="scatter", color=None, height=6, ratio=5, space=.2, + dropna=False, xlim=None, ylim=None, marginal_ticks=False, + joint_kws=None, marginal_kws=None, + hue=None, palette=None, hue_order=None, hue_norm=None, + **kwargs +): + # Avoid circular imports + from .relational import scatterplot + from .regression import regplot, residplot + from .distributions import histplot, kdeplot, _freedman_diaconis_bins + + # Handle deprecations + if "size" in kwargs: + height = kwargs.pop("size") + msg = ("The `size` parameter has been renamed to `height`; " + "please update your code.") + warnings.warn(msg, UserWarning) + + # Set up empty default kwarg dicts + joint_kws = {} if joint_kws is None else joint_kws.copy() + joint_kws.update(kwargs) + marginal_kws = {} if marginal_kws is None else marginal_kws.copy() + + # Handle deprecations of distplot-specific kwargs + distplot_keys = [ + "rug", "fit", "hist_kws", "norm_hist" "hist_kws", "rug_kws", + ] + unused_keys = [] + for key in distplot_keys: + if key in marginal_kws: + unused_keys.append(key) + marginal_kws.pop(key) + if unused_keys and kind != "kde": + msg = ( + "The marginal plotting function has changed to `histplot`," + " which does not accept the following argument(s): {}." + ).format(", ".join(unused_keys)) + warnings.warn(msg, UserWarning) + + # Validate the plot kind + plot_kinds = ["scatter", "hist", "hex", "kde", "reg", "resid"] + _check_argument("kind", plot_kinds, kind) + + # Raise early if using `hue` with a kind that does not support it + if hue is not None and kind in ["hex", "reg", "resid"]: + msg = ( + f"Use of `hue` with `kind='{kind}'` is not currently supported." + ) + raise ValueError(msg) + + # Make a colormap based off the plot color + # (Currently used only for kind="hex") + if color is None: + color = "C0" + color_rgb = mpl.colors.colorConverter.to_rgb(color) + colors = [utils.set_hls_values(color_rgb, l=l) # noqa + for l in np.linspace(1, 0, 12)] + cmap = blend_palette(colors, as_cmap=True) + + # Matplotlib's hexbin plot is not na-robust + if kind == "hex": + dropna = True + + # Initialize the JointGrid object + grid = JointGrid( + data=data, x=x, y=y, hue=hue, + palette=palette, hue_order=hue_order, hue_norm=hue_norm, + dropna=dropna, height=height, ratio=ratio, space=space, + xlim=xlim, ylim=ylim, marginal_ticks=marginal_ticks, + ) + + if grid.hue is not None: + marginal_kws.setdefault("legend", False) + + # Plot the data using the grid + if kind.startswith("scatter"): + + joint_kws.setdefault("color", color) + grid.plot_joint(scatterplot, **joint_kws) + + if grid.hue is None: + marg_func = histplot + else: + marg_func = kdeplot + marginal_kws.setdefault("warn_singular", False) + marginal_kws.setdefault("fill", True) + + marginal_kws.setdefault("color", color) + grid.plot_marginals(marg_func, **marginal_kws) + + elif kind.startswith("hist"): + + # TODO process pair parameters for bins, etc. and pass + # to both jount and marginal plots + + joint_kws.setdefault("color", color) + grid.plot_joint(histplot, **joint_kws) + + marginal_kws.setdefault("kde", False) + marginal_kws.setdefault("color", color) + + marg_x_kws = marginal_kws.copy() + marg_y_kws = marginal_kws.copy() + + pair_keys = "bins", "binwidth", "binrange" + for key in pair_keys: + if isinstance(joint_kws.get(key), tuple): + x_val, y_val = joint_kws[key] + marg_x_kws.setdefault(key, x_val) + marg_y_kws.setdefault(key, y_val) + + histplot(data=data, x=x, hue=hue, **marg_x_kws, ax=grid.ax_marg_x) + histplot(data=data, y=y, hue=hue, **marg_y_kws, ax=grid.ax_marg_y) + + elif kind.startswith("kde"): + + joint_kws.setdefault("color", color) + joint_kws.setdefault("warn_singular", False) + grid.plot_joint(kdeplot, **joint_kws) + + marginal_kws.setdefault("color", color) + if "fill" in joint_kws: + marginal_kws.setdefault("fill", joint_kws["fill"]) + + grid.plot_marginals(kdeplot, **marginal_kws) + + elif kind.startswith("hex"): + + x_bins = min(_freedman_diaconis_bins(grid.x), 50) + y_bins = min(_freedman_diaconis_bins(grid.y), 50) + gridsize = int(np.mean([x_bins, y_bins])) + + joint_kws.setdefault("gridsize", gridsize) + joint_kws.setdefault("cmap", cmap) + grid.plot_joint(plt.hexbin, **joint_kws) + + marginal_kws.setdefault("kde", False) + marginal_kws.setdefault("color", color) + grid.plot_marginals(histplot, **marginal_kws) + + elif kind.startswith("reg"): + + marginal_kws.setdefault("color", color) + marginal_kws.setdefault("kde", True) + grid.plot_marginals(histplot, **marginal_kws) + + joint_kws.setdefault("color", color) + grid.plot_joint(regplot, **joint_kws) + + elif kind.startswith("resid"): + + joint_kws.setdefault("color", color) + grid.plot_joint(residplot, **joint_kws) + + x, y = grid.ax_joint.collections[0].get_offsets().T + marginal_kws.setdefault("color", color) + histplot(x=x, hue=hue, ax=grid.ax_marg_x, **marginal_kws) + histplot(y=y, hue=hue, ax=grid.ax_marg_y, **marginal_kws) + + return grid + + +jointplot.__doc__ = """\ +Draw a plot of two variables with bivariate and univariate graphs. + +This function provides a convenient interface to the :class:`JointGrid` +class, with several canned plot kinds. This is intended to be a fairly +lightweight wrapper; if you need more flexibility, you should use +:class:`JointGrid` directly. + +Parameters +---------- +{params.core.xy} +{params.core.data} +kind : {{ "scatter" | "kde" | "hist" | "hex" | "reg" | "resid" }} + Kind of plot to draw. See the examples for references to the underlying functions. +{params.core.color} +height : numeric + Size of the figure (it will be square). +ratio : numeric + Ratio of joint axes height to marginal axes height. +space : numeric + Space between the joint and marginal axes +dropna : bool + If True, remove observations that are missing from ``x`` and ``y``. +{{x, y}}lim : pairs of numbers + Axis limits to set before plotting. +marginal_ticks : bool + If False, suppress ticks on the count/density axis of the marginal plots. +{{joint, marginal}}_kws : dicts + Additional keyword arguments for the plot components. +{params.core.hue} + Semantic variable that is mapped to determine the color of plot elements. +{params.core.palette} +{params.core.hue_order} +{params.core.hue_norm} +kwargs + Additional keyword arguments are passed to the function used to + draw the plot on the joint Axes, superseding items in the + ``joint_kws`` dictionary. + +Returns +------- +{returns.jointgrid} + +See Also +-------- +{seealso.jointgrid} +{seealso.pairgrid} +{seealso.pairplot} + +Examples +-------- + +.. include:: ../docstrings/jointplot.rst + +""".format( + params=_param_docs, + returns=_core_docs["returns"], + seealso=_core_docs["seealso"], +) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/categorical.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/categorical.py new file mode 100644 index 0000000000000000000000000000000000000000..14db0864f3fc9bfa18d14998048e1ec2be295f52 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/categorical.py @@ -0,0 +1,4023 @@ +from textwrap import dedent +from numbers import Number +import colorsys +import numpy as np +from scipy import stats +import pandas as pd +import matplotlib as mpl +from matplotlib.collections import PatchCollection +import matplotlib.patches as Patches +import matplotlib.pyplot as plt +import warnings +from distutils.version import LooseVersion + +from ._core import variable_type, infer_orient, categorical_order +from . import utils +from .utils import remove_na +from .algorithms import bootstrap +from .palettes import color_palette, husl_palette, light_palette, dark_palette +from .axisgrid import FacetGrid, _facet_docs +from ._decorators import _deprecate_positional_args + + +__all__ = [ + "catplot", "factorplot", + "stripplot", "swarmplot", + "boxplot", "violinplot", "boxenplot", + "pointplot", "barplot", "countplot", +] + + +class _CategoricalPlotter(object): + + width = .8 + default_palette = "light" + require_numeric = True + + def establish_variables(self, x=None, y=None, hue=None, data=None, + orient=None, order=None, hue_order=None, + units=None): + """Convert input specification into a common representation.""" + # Option 1: + # We are plotting a wide-form dataset + # ----------------------------------- + if x is None and y is None: + + # Do a sanity check on the inputs + if hue is not None: + error = "Cannot use `hue` without `x` and `y`" + raise ValueError(error) + + # No hue grouping with wide inputs + plot_hues = None + hue_title = None + hue_names = None + + # No statistical units with wide inputs + plot_units = None + + # We also won't get a axes labels here + value_label = None + group_label = None + + # Option 1a: + # The input data is a Pandas DataFrame + # ------------------------------------ + + if isinstance(data, pd.DataFrame): + + # Order the data correctly + if order is None: + order = [] + # Reduce to just numeric columns + for col in data: + if variable_type(data[col]) == "numeric": + order.append(col) + plot_data = data[order] + group_names = order + group_label = data.columns.name + + # Convert to a list of arrays, the common representation + iter_data = plot_data.iteritems() + plot_data = [np.asarray(s, float) for k, s in iter_data] + + # Option 1b: + # The input data is an array or list + # ---------------------------------- + + else: + + # We can't reorder the data + if order is not None: + error = "Input data must be a pandas object to reorder" + raise ValueError(error) + + # The input data is an array + if hasattr(data, "shape"): + if len(data.shape) == 1: + if np.isscalar(data[0]): + plot_data = [data] + else: + plot_data = list(data) + elif len(data.shape) == 2: + nr, nc = data.shape + if nr == 1 or nc == 1: + plot_data = [data.ravel()] + else: + plot_data = [data[:, i] for i in range(nc)] + else: + error = ("Input `data` can have no " + "more than 2 dimensions") + raise ValueError(error) + + # Check if `data` is None to let us bail out here (for testing) + elif data is None: + plot_data = [[]] + + # The input data is a flat list + elif np.isscalar(data[0]): + plot_data = [data] + + # The input data is a nested list + # This will catch some things that might fail later + # but exhaustive checks are hard + else: + plot_data = data + + # Convert to a list of arrays, the common representation + plot_data = [np.asarray(d, float) for d in plot_data] + + # The group names will just be numeric indices + group_names = list(range((len(plot_data)))) + + # Figure out the plotting orientation + orient = "h" if str(orient).startswith("h") else "v" + + # Option 2: + # We are plotting a long-form dataset + # ----------------------------------- + + else: + + # See if we need to get variables from `data` + if data is not None: + x = data.get(x, x) + y = data.get(y, y) + hue = data.get(hue, hue) + units = data.get(units, units) + + # Validate the inputs + for var in [x, y, hue, units]: + if isinstance(var, str): + err = "Could not interpret input '{}'".format(var) + raise ValueError(err) + + # Figure out the plotting orientation + orient = infer_orient( + x, y, orient, require_numeric=self.require_numeric + ) + + # Option 2a: + # We are plotting a single set of data + # ------------------------------------ + if x is None or y is None: + + # Determine where the data are + vals = y if x is None else x + + # Put them into the common representation + plot_data = [np.asarray(vals)] + + # Get a label for the value axis + if hasattr(vals, "name"): + value_label = vals.name + else: + value_label = None + + # This plot will not have group labels or hue nesting + groups = None + group_label = None + group_names = [] + plot_hues = None + hue_names = None + hue_title = None + plot_units = None + + # Option 2b: + # We are grouping the data values by another variable + # --------------------------------------------------- + else: + + # Determine which role each variable will play + if orient == "v": + vals, groups = y, x + else: + vals, groups = x, y + + # Get the categorical axis label + group_label = None + if hasattr(groups, "name"): + group_label = groups.name + + # Get the order on the categorical axis + group_names = categorical_order(groups, order) + + # Group the numeric data + plot_data, value_label = self._group_longform(vals, groups, + group_names) + + # Now handle the hue levels for nested ordering + if hue is None: + plot_hues = None + hue_title = None + hue_names = None + else: + + # Get the order of the hue levels + hue_names = categorical_order(hue, hue_order) + + # Group the hue data + plot_hues, hue_title = self._group_longform(hue, groups, + group_names) + + # Now handle the units for nested observations + if units is None: + plot_units = None + else: + plot_units, _ = self._group_longform(units, groups, + group_names) + + # Assign object attributes + # ------------------------ + self.orient = orient + self.plot_data = plot_data + self.group_label = group_label + self.value_label = value_label + self.group_names = group_names + self.plot_hues = plot_hues + self.hue_title = hue_title + self.hue_names = hue_names + self.plot_units = plot_units + + def _group_longform(self, vals, grouper, order): + """Group a long-form variable by another with correct order.""" + # Ensure that the groupby will work + if not isinstance(vals, pd.Series): + if isinstance(grouper, pd.Series): + index = grouper.index + else: + index = None + vals = pd.Series(vals, index=index) + + # Group the val data + grouped_vals = vals.groupby(grouper) + out_data = [] + for g in order: + try: + g_vals = grouped_vals.get_group(g) + except KeyError: + g_vals = np.array([]) + out_data.append(g_vals) + + # Get the vals axis label + label = vals.name + + return out_data, label + + def establish_colors(self, color, palette, saturation): + """Get a list of colors for the main component of the plots.""" + if self.hue_names is None: + n_colors = len(self.plot_data) + else: + n_colors = len(self.hue_names) + + # Determine the main colors + if color is None and palette is None: + # Determine whether the current palette will have enough values + # If not, we'll default to the husl palette so each is distinct + current_palette = utils.get_color_cycle() + if n_colors <= len(current_palette): + colors = color_palette(n_colors=n_colors) + else: + colors = husl_palette(n_colors, l=.7) # noqa + + elif palette is None: + # When passing a specific color, the interpretation depends + # on whether there is a hue variable or not. + # If so, we will make a blend palette so that the different + # levels have some amount of variation. + if self.hue_names is None: + colors = [color] * n_colors + else: + if self.default_palette == "light": + colors = light_palette(color, n_colors) + elif self.default_palette == "dark": + colors = dark_palette(color, n_colors) + else: + raise RuntimeError("No default palette specified") + else: + + # Let `palette` be a dict mapping level to color + if isinstance(palette, dict): + if self.hue_names is None: + levels = self.group_names + else: + levels = self.hue_names + palette = [palette[l] for l in levels] + + colors = color_palette(palette, n_colors) + + # Desaturate a bit because these are patches + if saturation < 1: + colors = color_palette(colors, desat=saturation) + + # Convert the colors to a common representations + rgb_colors = color_palette(colors) + + # Determine the gray color to use for the lines framing the plot + light_vals = [colorsys.rgb_to_hls(*c)[1] for c in rgb_colors] + lum = min(light_vals) * .6 + gray = mpl.colors.rgb2hex((lum, lum, lum)) + + # Assign object attributes + self.colors = rgb_colors + self.gray = gray + + @property + def hue_offsets(self): + """A list of center positions for plots when hue nesting is used.""" + n_levels = len(self.hue_names) + if self.dodge: + each_width = self.width / n_levels + offsets = np.linspace(0, self.width - each_width, n_levels) + offsets -= offsets.mean() + else: + offsets = np.zeros(n_levels) + + return offsets + + @property + def nested_width(self): + """A float with the width of plot elements when hue nesting is used.""" + if self.dodge: + width = self.width / len(self.hue_names) * .98 + else: + width = self.width + return width + + def annotate_axes(self, ax): + """Add descriptive labels to an Axes object.""" + if self.orient == "v": + xlabel, ylabel = self.group_label, self.value_label + else: + xlabel, ylabel = self.value_label, self.group_label + + if xlabel is not None: + ax.set_xlabel(xlabel) + if ylabel is not None: + ax.set_ylabel(ylabel) + + group_names = self.group_names + if not group_names: + group_names = ["" for _ in range(len(self.plot_data))] + + if self.orient == "v": + ax.set_xticks(np.arange(len(self.plot_data))) + ax.set_xticklabels(group_names) + else: + ax.set_yticks(np.arange(len(self.plot_data))) + ax.set_yticklabels(group_names) + + if self.orient == "v": + ax.xaxis.grid(False) + ax.set_xlim(-.5, len(self.plot_data) - .5, auto=None) + else: + ax.yaxis.grid(False) + ax.set_ylim(-.5, len(self.plot_data) - .5, auto=None) + + if self.hue_names is not None: + leg = ax.legend(loc="best", title=self.hue_title) + if self.hue_title is not None: + if LooseVersion(mpl.__version__) < "3.0": + # Old Matplotlib has no legend title size rcparam + try: + title_size = mpl.rcParams["axes.labelsize"] * .85 + except TypeError: # labelsize is something like "large" + title_size = mpl.rcParams["axes.labelsize"] + prop = mpl.font_manager.FontProperties(size=title_size) + leg.set_title(self.hue_title, prop=prop) + + def add_legend_data(self, ax, color, label): + """Add a dummy patch object so we can get legend data.""" + rect = plt.Rectangle([0, 0], 0, 0, + linewidth=self.linewidth / 2, + edgecolor=self.gray, + facecolor=color, + label=label) + ax.add_patch(rect) + + +class _BoxPlotter(_CategoricalPlotter): + + def __init__(self, x, y, hue, data, order, hue_order, + orient, color, palette, saturation, + width, dodge, fliersize, linewidth): + + self.establish_variables(x, y, hue, data, orient, order, hue_order) + self.establish_colors(color, palette, saturation) + + self.dodge = dodge + self.width = width + self.fliersize = fliersize + + if linewidth is None: + linewidth = mpl.rcParams["lines.linewidth"] + self.linewidth = linewidth + + def draw_boxplot(self, ax, kws): + """Use matplotlib to draw a boxplot on an Axes.""" + vert = self.orient == "v" + + props = {} + for obj in ["box", "whisker", "cap", "median", "flier"]: + props[obj] = kws.pop(obj + "props", {}) + + for i, group_data in enumerate(self.plot_data): + + if self.plot_hues is None: + + # Handle case where there is data at this level + if group_data.size == 0: + continue + + # Draw a single box or a set of boxes + # with a single level of grouping + box_data = np.asarray(remove_na(group_data)) + + # Handle case where there is no non-null data + if box_data.size == 0: + continue + + artist_dict = ax.boxplot(box_data, + vert=vert, + patch_artist=True, + positions=[i], + widths=self.width, + **kws) + color = self.colors[i] + self.restyle_boxplot(artist_dict, color, props) + else: + # Draw nested groups of boxes + offsets = self.hue_offsets + for j, hue_level in enumerate(self.hue_names): + + # Add a legend for this hue level + if not i: + self.add_legend_data(ax, self.colors[j], hue_level) + + # Handle case where there is data at this level + if group_data.size == 0: + continue + + hue_mask = self.plot_hues[i] == hue_level + box_data = np.asarray(remove_na(group_data[hue_mask])) + + # Handle case where there is no non-null data + if box_data.size == 0: + continue + + center = i + offsets[j] + artist_dict = ax.boxplot(box_data, + vert=vert, + patch_artist=True, + positions=[center], + widths=self.nested_width, + **kws) + self.restyle_boxplot(artist_dict, self.colors[j], props) + # Add legend data, but just for one set of boxes + + def restyle_boxplot(self, artist_dict, color, props): + """Take a drawn matplotlib boxplot and make it look nice.""" + for box in artist_dict["boxes"]: + box.update(dict(facecolor=color, + zorder=.9, + edgecolor=self.gray, + linewidth=self.linewidth)) + box.update(props["box"]) + for whisk in artist_dict["whiskers"]: + whisk.update(dict(color=self.gray, + linewidth=self.linewidth, + linestyle="-")) + whisk.update(props["whisker"]) + for cap in artist_dict["caps"]: + cap.update(dict(color=self.gray, + linewidth=self.linewidth)) + cap.update(props["cap"]) + for med in artist_dict["medians"]: + med.update(dict(color=self.gray, + linewidth=self.linewidth)) + med.update(props["median"]) + for fly in artist_dict["fliers"]: + fly.update(dict(markerfacecolor=self.gray, + marker="d", + markeredgecolor=self.gray, + markersize=self.fliersize)) + fly.update(props["flier"]) + + def plot(self, ax, boxplot_kws): + """Make the plot.""" + self.draw_boxplot(ax, boxplot_kws) + self.annotate_axes(ax) + if self.orient == "h": + ax.invert_yaxis() + + +class _ViolinPlotter(_CategoricalPlotter): + + def __init__(self, x, y, hue, data, order, hue_order, + bw, cut, scale, scale_hue, gridsize, + width, inner, split, dodge, orient, linewidth, + color, palette, saturation): + + self.establish_variables(x, y, hue, data, orient, order, hue_order) + self.establish_colors(color, palette, saturation) + self.estimate_densities(bw, cut, scale, scale_hue, gridsize) + + self.gridsize = gridsize + self.width = width + self.dodge = dodge + + if inner is not None: + if not any([inner.startswith("quart"), + inner.startswith("box"), + inner.startswith("stick"), + inner.startswith("point")]): + err = "Inner style '{}' not recognized".format(inner) + raise ValueError(err) + self.inner = inner + + if split and self.hue_names is not None and len(self.hue_names) != 2: + msg = "There must be exactly two hue levels to use `split`.'" + raise ValueError(msg) + self.split = split + + if linewidth is None: + linewidth = mpl.rcParams["lines.linewidth"] + self.linewidth = linewidth + + def estimate_densities(self, bw, cut, scale, scale_hue, gridsize): + """Find the support and density for all of the data.""" + # Initialize data structures to keep track of plotting data + if self.hue_names is None: + support = [] + density = [] + counts = np.zeros(len(self.plot_data)) + max_density = np.zeros(len(self.plot_data)) + else: + support = [[] for _ in self.plot_data] + density = [[] for _ in self.plot_data] + size = len(self.group_names), len(self.hue_names) + counts = np.zeros(size) + max_density = np.zeros(size) + + for i, group_data in enumerate(self.plot_data): + + # Option 1: we have a single level of grouping + # -------------------------------------------- + + if self.plot_hues is None: + + # Strip missing datapoints + kde_data = remove_na(group_data) + + # Handle special case of no data at this level + if kde_data.size == 0: + support.append(np.array([])) + density.append(np.array([1.])) + counts[i] = 0 + max_density[i] = 0 + continue + + # Handle special case of a single unique datapoint + elif np.unique(kde_data).size == 1: + support.append(np.unique(kde_data)) + density.append(np.array([1.])) + counts[i] = 1 + max_density[i] = 0 + continue + + # Fit the KDE and get the used bandwidth size + kde, bw_used = self.fit_kde(kde_data, bw) + + # Determine the support grid and get the density over it + support_i = self.kde_support(kde_data, bw_used, cut, gridsize) + density_i = kde.evaluate(support_i) + + # Update the data structures with these results + support.append(support_i) + density.append(density_i) + counts[i] = kde_data.size + max_density[i] = density_i.max() + + # Option 2: we have nested grouping by a hue variable + # --------------------------------------------------- + + else: + for j, hue_level in enumerate(self.hue_names): + + # Handle special case of no data at this category level + if not group_data.size: + support[i].append(np.array([])) + density[i].append(np.array([1.])) + counts[i, j] = 0 + max_density[i, j] = 0 + continue + + # Select out the observations for this hue level + hue_mask = self.plot_hues[i] == hue_level + + # Strip missing datapoints + kde_data = remove_na(group_data[hue_mask]) + + # Handle special case of no data at this level + if kde_data.size == 0: + support[i].append(np.array([])) + density[i].append(np.array([1.])) + counts[i, j] = 0 + max_density[i, j] = 0 + continue + + # Handle special case of a single unique datapoint + elif np.unique(kde_data).size == 1: + support[i].append(np.unique(kde_data)) + density[i].append(np.array([1.])) + counts[i, j] = 1 + max_density[i, j] = 0 + continue + + # Fit the KDE and get the used bandwidth size + kde, bw_used = self.fit_kde(kde_data, bw) + + # Determine the support grid and get the density over it + support_ij = self.kde_support(kde_data, bw_used, + cut, gridsize) + density_ij = kde.evaluate(support_ij) + + # Update the data structures with these results + support[i].append(support_ij) + density[i].append(density_ij) + counts[i, j] = kde_data.size + max_density[i, j] = density_ij.max() + + # Scale the height of the density curve. + # For a violinplot the density is non-quantitative. + # The objective here is to scale the curves relative to 1 so that + # they can be multiplied by the width parameter during plotting. + + if scale == "area": + self.scale_area(density, max_density, scale_hue) + + elif scale == "width": + self.scale_width(density) + + elif scale == "count": + self.scale_count(density, counts, scale_hue) + + else: + raise ValueError("scale method '{}' not recognized".format(scale)) + + # Set object attributes that will be used while plotting + self.support = support + self.density = density + + def fit_kde(self, x, bw): + """Estimate a KDE for a vector of data with flexible bandwidth.""" + kde = stats.gaussian_kde(x, bw) + + # Extract the numeric bandwidth from the KDE object + bw_used = kde.factor + + # At this point, bw will be a numeric scale factor. + # To get the actual bandwidth of the kernel, we multiple by the + # unbiased standard deviation of the data, which we will use + # elsewhere to compute the range of the support. + bw_used = bw_used * x.std(ddof=1) + + return kde, bw_used + + def kde_support(self, x, bw, cut, gridsize): + """Define a grid of support for the violin.""" + support_min = x.min() - bw * cut + support_max = x.max() + bw * cut + return np.linspace(support_min, support_max, gridsize) + + def scale_area(self, density, max_density, scale_hue): + """Scale the relative area under the KDE curve. + + This essentially preserves the "standard" KDE scaling, but the + resulting maximum density will be 1 so that the curve can be + properly multiplied by the violin width. + + """ + if self.hue_names is None: + for d in density: + if d.size > 1: + d /= max_density.max() + else: + for i, group in enumerate(density): + for d in group: + if scale_hue: + max = max_density[i].max() + else: + max = max_density.max() + if d.size > 1: + d /= max + + def scale_width(self, density): + """Scale each density curve to the same height.""" + if self.hue_names is None: + for d in density: + d /= d.max() + else: + for group in density: + for d in group: + d /= d.max() + + def scale_count(self, density, counts, scale_hue): + """Scale each density curve by the number of observations.""" + if self.hue_names is None: + if counts.max() == 0: + d = 0 + else: + for count, d in zip(counts, density): + d /= d.max() + d *= count / counts.max() + else: + for i, group in enumerate(density): + for j, d in enumerate(group): + if counts[i].max() == 0: + d = 0 + else: + count = counts[i, j] + if scale_hue: + scaler = count / counts[i].max() + else: + scaler = count / counts.max() + d /= d.max() + d *= scaler + + @property + def dwidth(self): + + if self.hue_names is None or not self.dodge: + return self.width / 2 + elif self.split: + return self.width / 2 + else: + return self.width / (2 * len(self.hue_names)) + + def draw_violins(self, ax): + """Draw the violins onto `ax`.""" + fill_func = ax.fill_betweenx if self.orient == "v" else ax.fill_between + for i, group_data in enumerate(self.plot_data): + + kws = dict(edgecolor=self.gray, linewidth=self.linewidth) + + # Option 1: we have a single level of grouping + # -------------------------------------------- + + if self.plot_hues is None: + + support, density = self.support[i], self.density[i] + + # Handle special case of no observations in this bin + if support.size == 0: + continue + + # Handle special case of a single observation + elif support.size == 1: + val = support.item() + d = density.item() + self.draw_single_observation(ax, i, val, d) + continue + + # Draw the violin for this group + grid = np.ones(self.gridsize) * i + fill_func(support, + grid - density * self.dwidth, + grid + density * self.dwidth, + facecolor=self.colors[i], + **kws) + + # Draw the interior representation of the data + if self.inner is None: + continue + + # Get a nan-free vector of datapoints + violin_data = remove_na(group_data) + + # Draw box and whisker information + if self.inner.startswith("box"): + self.draw_box_lines(ax, violin_data, support, density, i) + + # Draw quartile lines + elif self.inner.startswith("quart"): + self.draw_quartiles(ax, violin_data, support, density, i) + + # Draw stick observations + elif self.inner.startswith("stick"): + self.draw_stick_lines(ax, violin_data, support, density, i) + + # Draw point observations + elif self.inner.startswith("point"): + self.draw_points(ax, violin_data, i) + + # Option 2: we have nested grouping by a hue variable + # --------------------------------------------------- + + else: + offsets = self.hue_offsets + for j, hue_level in enumerate(self.hue_names): + + support, density = self.support[i][j], self.density[i][j] + kws["facecolor"] = self.colors[j] + + # Add legend data, but just for one set of violins + if not i: + self.add_legend_data(ax, self.colors[j], hue_level) + + # Handle the special case where we have no observations + if support.size == 0: + continue + + # Handle the special case where we have one observation + elif support.size == 1: + val = support.item() + d = density.item() + if self.split: + d = d / 2 + at_group = i + offsets[j] + self.draw_single_observation(ax, at_group, val, d) + continue + + # Option 2a: we are drawing a single split violin + # ----------------------------------------------- + + if self.split: + + grid = np.ones(self.gridsize) * i + if j: + fill_func(support, + grid, + grid + density * self.dwidth, + **kws) + else: + fill_func(support, + grid - density * self.dwidth, + grid, + **kws) + + # Draw the interior representation of the data + if self.inner is None: + continue + + # Get a nan-free vector of datapoints + hue_mask = self.plot_hues[i] == hue_level + violin_data = remove_na(group_data[hue_mask]) + + # Draw quartile lines + if self.inner.startswith("quart"): + self.draw_quartiles(ax, violin_data, + support, density, i, + ["left", "right"][j]) + + # Draw stick observations + elif self.inner.startswith("stick"): + self.draw_stick_lines(ax, violin_data, + support, density, i, + ["left", "right"][j]) + + # The box and point interior plots are drawn for + # all data at the group level, so we just do that once + if not j: + continue + + # Get the whole vector for this group level + violin_data = remove_na(group_data) + + # Draw box and whisker information + if self.inner.startswith("box"): + self.draw_box_lines(ax, violin_data, + support, density, i) + + # Draw point observations + elif self.inner.startswith("point"): + self.draw_points(ax, violin_data, i) + + # Option 2b: we are drawing full nested violins + # ----------------------------------------------- + + else: + grid = np.ones(self.gridsize) * (i + offsets[j]) + fill_func(support, + grid - density * self.dwidth, + grid + density * self.dwidth, + **kws) + + # Draw the interior representation + if self.inner is None: + continue + + # Get a nan-free vector of datapoints + hue_mask = self.plot_hues[i] == hue_level + violin_data = remove_na(group_data[hue_mask]) + + # Draw box and whisker information + if self.inner.startswith("box"): + self.draw_box_lines(ax, violin_data, + support, density, + i + offsets[j]) + + # Draw quartile lines + elif self.inner.startswith("quart"): + self.draw_quartiles(ax, violin_data, + support, density, + i + offsets[j]) + + # Draw stick observations + elif self.inner.startswith("stick"): + self.draw_stick_lines(ax, violin_data, + support, density, + i + offsets[j]) + + # Draw point observations + elif self.inner.startswith("point"): + self.draw_points(ax, violin_data, i + offsets[j]) + + def draw_single_observation(self, ax, at_group, at_quant, density): + """Draw a line to mark a single observation.""" + d_width = density * self.dwidth + if self.orient == "v": + ax.plot([at_group - d_width, at_group + d_width], + [at_quant, at_quant], + color=self.gray, + linewidth=self.linewidth) + else: + ax.plot([at_quant, at_quant], + [at_group - d_width, at_group + d_width], + color=self.gray, + linewidth=self.linewidth) + + def draw_box_lines(self, ax, data, support, density, center): + """Draw boxplot information at center of the density.""" + # Compute the boxplot statistics + q25, q50, q75 = np.percentile(data, [25, 50, 75]) + whisker_lim = 1.5 * stats.iqr(data) + h1 = np.min(data[data >= (q25 - whisker_lim)]) + h2 = np.max(data[data <= (q75 + whisker_lim)]) + + # Draw a boxplot using lines and a point + if self.orient == "v": + ax.plot([center, center], [h1, h2], + linewidth=self.linewidth, + color=self.gray) + ax.plot([center, center], [q25, q75], + linewidth=self.linewidth * 3, + color=self.gray) + ax.scatter(center, q50, + zorder=3, + color="white", + edgecolor=self.gray, + s=np.square(self.linewidth * 2)) + else: + ax.plot([h1, h2], [center, center], + linewidth=self.linewidth, + color=self.gray) + ax.plot([q25, q75], [center, center], + linewidth=self.linewidth * 3, + color=self.gray) + ax.scatter(q50, center, + zorder=3, + color="white", + edgecolor=self.gray, + s=np.square(self.linewidth * 2)) + + def draw_quartiles(self, ax, data, support, density, center, split=False): + """Draw the quartiles as lines at width of density.""" + q25, q50, q75 = np.percentile(data, [25, 50, 75]) + + self.draw_to_density(ax, center, q25, support, density, split, + linewidth=self.linewidth, + dashes=[self.linewidth * 1.5] * 2) + self.draw_to_density(ax, center, q50, support, density, split, + linewidth=self.linewidth, + dashes=[self.linewidth * 3] * 2) + self.draw_to_density(ax, center, q75, support, density, split, + linewidth=self.linewidth, + dashes=[self.linewidth * 1.5] * 2) + + def draw_points(self, ax, data, center): + """Draw individual observations as points at middle of the violin.""" + kws = dict(s=np.square(self.linewidth * 2), + color=self.gray, + edgecolor=self.gray) + + grid = np.ones(len(data)) * center + + if self.orient == "v": + ax.scatter(grid, data, **kws) + else: + ax.scatter(data, grid, **kws) + + def draw_stick_lines(self, ax, data, support, density, + center, split=False): + """Draw individual observations as sticks at width of density.""" + for val in data: + self.draw_to_density(ax, center, val, support, density, split, + linewidth=self.linewidth * .5) + + def draw_to_density(self, ax, center, val, support, density, split, **kws): + """Draw a line orthogonal to the value axis at width of density.""" + idx = np.argmin(np.abs(support - val)) + width = self.dwidth * density[idx] * .99 + + kws["color"] = self.gray + + if self.orient == "v": + if split == "left": + ax.plot([center - width, center], [val, val], **kws) + elif split == "right": + ax.plot([center, center + width], [val, val], **kws) + else: + ax.plot([center - width, center + width], [val, val], **kws) + else: + if split == "left": + ax.plot([val, val], [center - width, center], **kws) + elif split == "right": + ax.plot([val, val], [center, center + width], **kws) + else: + ax.plot([val, val], [center - width, center + width], **kws) + + def plot(self, ax): + """Make the violin plot.""" + self.draw_violins(ax) + self.annotate_axes(ax) + if self.orient == "h": + ax.invert_yaxis() + + +class _CategoricalScatterPlotter(_CategoricalPlotter): + + default_palette = "dark" + require_numeric = False + + @property + def point_colors(self): + """Return an index into the palette for each scatter point.""" + point_colors = [] + for i, group_data in enumerate(self.plot_data): + + # Initialize the array for this group level + group_colors = np.empty(group_data.size, int) + if isinstance(group_data, pd.Series): + group_colors = pd.Series(group_colors, group_data.index) + + if self.plot_hues is None: + + # Use the same color for all points at this level + # group_color = self.colors[i] + group_colors[:] = i + + else: + + # Color the points based on the hue level + + for j, level in enumerate(self.hue_names): + # hue_color = self.colors[j] + if group_data.size: + group_colors[self.plot_hues[i] == level] = j + + point_colors.append(group_colors) + + return point_colors + + def add_legend_data(self, ax): + """Add empty scatterplot artists with labels for the legend.""" + if self.hue_names is not None: + for rgb, label in zip(self.colors, self.hue_names): + ax.scatter([], [], + color=mpl.colors.rgb2hex(rgb), + label=label, + s=60) + + +class _StripPlotter(_CategoricalScatterPlotter): + """1-d scatterplot with categorical organization.""" + def __init__(self, x, y, hue, data, order, hue_order, + jitter, dodge, orient, color, palette): + """Initialize the plotter.""" + self.establish_variables(x, y, hue, data, orient, order, hue_order) + self.establish_colors(color, palette, 1) + + # Set object attributes + self.dodge = dodge + self.width = .8 + + if jitter == 1: # Use a good default for `jitter = True` + jlim = 0.1 + else: + jlim = float(jitter) + if self.hue_names is not None and dodge: + jlim /= len(self.hue_names) + self.jitterer = stats.uniform(-jlim, jlim * 2).rvs + + def draw_stripplot(self, ax, kws): + """Draw the points onto `ax`.""" + palette = np.asarray(self.colors) + for i, group_data in enumerate(self.plot_data): + if self.plot_hues is None or not self.dodge: + + if self.hue_names is None: + hue_mask = np.ones(group_data.size, bool) + else: + hue_mask = np.array([h in self.hue_names + for h in self.plot_hues[i]], bool) + # Broken on older numpys + # hue_mask = np.in1d(self.plot_hues[i], self.hue_names) + + strip_data = group_data[hue_mask] + point_colors = np.asarray(self.point_colors[i][hue_mask]) + + # Plot the points in centered positions + cat_pos = np.ones(strip_data.size) * i + cat_pos += self.jitterer(len(strip_data)) + kws.update(c=palette[point_colors]) + if self.orient == "v": + ax.scatter(cat_pos, strip_data, **kws) + else: + ax.scatter(strip_data, cat_pos, **kws) + + else: + offsets = self.hue_offsets + for j, hue_level in enumerate(self.hue_names): + hue_mask = self.plot_hues[i] == hue_level + strip_data = group_data[hue_mask] + + point_colors = np.asarray(self.point_colors[i][hue_mask]) + + # Plot the points in centered positions + center = i + offsets[j] + cat_pos = np.ones(strip_data.size) * center + cat_pos += self.jitterer(len(strip_data)) + kws.update(c=palette[point_colors]) + if self.orient == "v": + ax.scatter(cat_pos, strip_data, **kws) + else: + ax.scatter(strip_data, cat_pos, **kws) + + def plot(self, ax, kws): + """Make the plot.""" + self.draw_stripplot(ax, kws) + self.add_legend_data(ax) + self.annotate_axes(ax) + if self.orient == "h": + ax.invert_yaxis() + + +class _SwarmPlotter(_CategoricalScatterPlotter): + + def __init__(self, x, y, hue, data, order, hue_order, + dodge, orient, color, palette): + """Initialize the plotter.""" + self.establish_variables(x, y, hue, data, orient, order, hue_order) + self.establish_colors(color, palette, 1) + + # Set object attributes + self.dodge = dodge + self.width = .8 + + def could_overlap(self, xy_i, swarm, d): + """Return a list of all swarm points that could overlap with target. + + Assumes that swarm is a sorted list of all points below xy_i. + """ + _, y_i = xy_i + neighbors = [] + for xy_j in reversed(swarm): + _, y_j = xy_j + if (y_i - y_j) < d: + neighbors.append(xy_j) + else: + break + return np.array(list(reversed(neighbors))) + + def position_candidates(self, xy_i, neighbors, d): + """Return a list of (x, y) coordinates that might be valid.""" + candidates = [xy_i] + x_i, y_i = xy_i + left_first = True + for x_j, y_j in neighbors: + dy = y_i - y_j + dx = np.sqrt(max(d ** 2 - dy ** 2, 0)) * 1.05 + cl, cr = (x_j - dx, y_i), (x_j + dx, y_i) + if left_first: + new_candidates = [cl, cr] + else: + new_candidates = [cr, cl] + candidates.extend(new_candidates) + left_first = not left_first + return np.array(candidates) + + def first_non_overlapping_candidate(self, candidates, neighbors, d): + """Remove candidates from the list if they overlap with the swarm.""" + + # IF we have no neighbours, all candidates are good. + if len(neighbors) == 0: + return candidates[0] + + neighbors_x = neighbors[:, 0] + neighbors_y = neighbors[:, 1] + + d_square = d ** 2 + + for xy_i in candidates: + x_i, y_i = xy_i + + dx = neighbors_x - x_i + dy = neighbors_y - y_i + + sq_distances = np.power(dx, 2.0) + np.power(dy, 2.0) + + # good candidate does not overlap any of neighbors + # which means that squared distance between candidate + # and any of the neighbours has to be at least + # square of the diameter + good_candidate = np.all(sq_distances >= d_square) + + if good_candidate: + return xy_i + + # If `position_candidates` works well + # this should never happen + raise Exception('No non-overlapping candidates found. ' + 'This should not happen.') + + def beeswarm(self, orig_xy, d): + """Adjust x position of points to avoid overlaps.""" + # In this method, ``x`` is always the categorical axis + # Center of the swarm, in point coordinates + midline = orig_xy[0, 0] + + # Start the swarm with the first point + swarm = [orig_xy[0]] + + # Loop over the remaining points + for xy_i in orig_xy[1:]: + + # Find the points in the swarm that could possibly + # overlap with the point we are currently placing + neighbors = self.could_overlap(xy_i, swarm, d) + + # Find positions that would be valid individually + # with respect to each of the swarm neighbors + candidates = self.position_candidates(xy_i, neighbors, d) + + # Sort candidates by their centrality + offsets = np.abs(candidates[:, 0] - midline) + candidates = candidates[np.argsort(offsets)] + + # Find the first candidate that does not overlap any neighbours + new_xy_i = self.first_non_overlapping_candidate(candidates, + neighbors, d) + + # Place it into the swarm + swarm.append(new_xy_i) + + return np.array(swarm) + + def add_gutters(self, points, center, width): + """Stop points from extending beyond their territory.""" + half_width = width / 2 + low_gutter = center - half_width + off_low = points < low_gutter + if off_low.any(): + points[off_low] = low_gutter + high_gutter = center + half_width + off_high = points > high_gutter + if off_high.any(): + points[off_high] = high_gutter + + gutter_prop = (off_high + off_low).sum() / len(points) + if gutter_prop > .05: + msg = ( + "{:.1%} of the points cannot be placed; you may want " + "to decrease the size of the markers or use stripplot." + ).format(gutter_prop) + warnings.warn(msg, UserWarning) + + return points + + def swarm_points(self, ax, points, center, width, s, **kws): + """Find new positions on the categorical axis for each point.""" + # Convert from point size (area) to diameter + default_lw = mpl.rcParams["patch.linewidth"] + lw = kws.get("linewidth", kws.get("lw", default_lw)) + dpi = ax.figure.dpi + d = (np.sqrt(s) + lw) * (dpi / 72) + + # Transform the data coordinates to point coordinates. + # We'll figure out the swarm positions in the latter + # and then convert back to data coordinates and replot + orig_xy = ax.transData.transform(points.get_offsets()) + + # Order the variables so that x is the categorical axis + if self.orient == "h": + orig_xy = orig_xy[:, [1, 0]] + + # Do the beeswarm in point coordinates + new_xy = self.beeswarm(orig_xy, d) + + # Transform the point coordinates back to data coordinates + if self.orient == "h": + new_xy = new_xy[:, [1, 0]] + new_x, new_y = ax.transData.inverted().transform(new_xy).T + + # Add gutters + if self.orient == "v": + self.add_gutters(new_x, center, width) + else: + self.add_gutters(new_y, center, width) + + # Reposition the points so they do not overlap + points.set_offsets(np.c_[new_x, new_y]) + + def draw_swarmplot(self, ax, kws): + """Plot the data.""" + s = kws.pop("s") + + centers = [] + swarms = [] + + palette = np.asarray(self.colors) + + # Set the categorical axes limits here for the swarm math + if self.orient == "v": + ax.set_xlim(-.5, len(self.plot_data) - .5) + else: + ax.set_ylim(-.5, len(self.plot_data) - .5) + + # Plot each swarm + for i, group_data in enumerate(self.plot_data): + + if self.plot_hues is None or not self.dodge: + + width = self.width + + if self.hue_names is None: + hue_mask = np.ones(group_data.size, bool) + else: + hue_mask = np.array([h in self.hue_names + for h in self.plot_hues[i]], bool) + # Broken on older numpys + # hue_mask = np.in1d(self.plot_hues[i], self.hue_names) + + swarm_data = np.asarray(group_data[hue_mask]) + point_colors = np.asarray(self.point_colors[i][hue_mask]) + + # Sort the points for the beeswarm algorithm + sorter = np.argsort(swarm_data) + swarm_data = swarm_data[sorter] + point_colors = point_colors[sorter] + + # Plot the points in centered positions + cat_pos = np.ones(swarm_data.size) * i + kws.update(c=palette[point_colors]) + if self.orient == "v": + points = ax.scatter(cat_pos, swarm_data, s=s, **kws) + else: + points = ax.scatter(swarm_data, cat_pos, s=s, **kws) + + centers.append(i) + swarms.append(points) + + else: + offsets = self.hue_offsets + width = self.nested_width + + for j, hue_level in enumerate(self.hue_names): + hue_mask = self.plot_hues[i] == hue_level + swarm_data = np.asarray(group_data[hue_mask]) + point_colors = np.asarray(self.point_colors[i][hue_mask]) + + # Sort the points for the beeswarm algorithm + sorter = np.argsort(swarm_data) + swarm_data = swarm_data[sorter] + point_colors = point_colors[sorter] + + # Plot the points in centered positions + center = i + offsets[j] + cat_pos = np.ones(swarm_data.size) * center + kws.update(c=palette[point_colors]) + if self.orient == "v": + points = ax.scatter(cat_pos, swarm_data, s=s, **kws) + else: + points = ax.scatter(swarm_data, cat_pos, s=s, **kws) + + centers.append(center) + swarms.append(points) + + # Autoscale the valus axis to set the data/axes transforms properly + ax.autoscale_view(scalex=self.orient == "h", scaley=self.orient == "v") + + # Update the position of each point on the categorical axis + # Do this after plotting so that the numerical axis limits are correct + for center, swarm in zip(centers, swarms): + if swarm.get_offsets().size: + self.swarm_points(ax, swarm, center, width, s, **kws) + + def plot(self, ax, kws): + """Make the full plot.""" + self.draw_swarmplot(ax, kws) + self.add_legend_data(ax) + self.annotate_axes(ax) + if self.orient == "h": + ax.invert_yaxis() + + +class _CategoricalStatPlotter(_CategoricalPlotter): + + require_numeric = True + + @property + def nested_width(self): + """A float with the width of plot elements when hue nesting is used.""" + if self.dodge: + width = self.width / len(self.hue_names) + else: + width = self.width + return width + + def estimate_statistic(self, estimator, ci, n_boot, seed): + + if self.hue_names is None: + statistic = [] + confint = [] + else: + statistic = [[] for _ in self.plot_data] + confint = [[] for _ in self.plot_data] + + for i, group_data in enumerate(self.plot_data): + + # Option 1: we have a single layer of grouping + # -------------------------------------------- + + if self.plot_hues is None: + + if self.plot_units is None: + stat_data = remove_na(group_data) + unit_data = None + else: + unit_data = self.plot_units[i] + have = pd.notnull(np.c_[group_data, unit_data]).all(axis=1) + stat_data = group_data[have] + unit_data = unit_data[have] + + # Estimate a statistic from the vector of data + if not stat_data.size: + statistic.append(np.nan) + else: + statistic.append(estimator(stat_data)) + + # Get a confidence interval for this estimate + if ci is not None: + + if stat_data.size < 2: + confint.append([np.nan, np.nan]) + continue + + if ci == "sd": + + estimate = estimator(stat_data) + sd = np.std(stat_data) + confint.append((estimate - sd, estimate + sd)) + + else: + + boots = bootstrap(stat_data, func=estimator, + n_boot=n_boot, + units=unit_data, + seed=seed) + confint.append(utils.ci(boots, ci)) + + # Option 2: we are grouping by a hue layer + # ---------------------------------------- + + else: + for j, hue_level in enumerate(self.hue_names): + + if not self.plot_hues[i].size: + statistic[i].append(np.nan) + if ci is not None: + confint[i].append((np.nan, np.nan)) + continue + + hue_mask = self.plot_hues[i] == hue_level + if self.plot_units is None: + stat_data = remove_na(group_data[hue_mask]) + unit_data = None + else: + group_units = self.plot_units[i] + have = pd.notnull( + np.c_[group_data, group_units] + ).all(axis=1) + stat_data = group_data[hue_mask & have] + unit_data = group_units[hue_mask & have] + + # Estimate a statistic from the vector of data + if not stat_data.size: + statistic[i].append(np.nan) + else: + statistic[i].append(estimator(stat_data)) + + # Get a confidence interval for this estimate + if ci is not None: + + if stat_data.size < 2: + confint[i].append([np.nan, np.nan]) + continue + + if ci == "sd": + + estimate = estimator(stat_data) + sd = np.std(stat_data) + confint[i].append((estimate - sd, estimate + sd)) + + else: + + boots = bootstrap(stat_data, func=estimator, + n_boot=n_boot, + units=unit_data, + seed=seed) + confint[i].append(utils.ci(boots, ci)) + + # Save the resulting values for plotting + self.statistic = np.array(statistic) + self.confint = np.array(confint) + + def draw_confints(self, ax, at_group, confint, colors, + errwidth=None, capsize=None, **kws): + + if errwidth is not None: + kws.setdefault("lw", errwidth) + else: + kws.setdefault("lw", mpl.rcParams["lines.linewidth"] * 1.8) + + for at, (ci_low, ci_high), color in zip(at_group, + confint, + colors): + if self.orient == "v": + ax.plot([at, at], [ci_low, ci_high], color=color, **kws) + if capsize is not None: + ax.plot([at - capsize / 2, at + capsize / 2], + [ci_low, ci_low], color=color, **kws) + ax.plot([at - capsize / 2, at + capsize / 2], + [ci_high, ci_high], color=color, **kws) + else: + ax.plot([ci_low, ci_high], [at, at], color=color, **kws) + if capsize is not None: + ax.plot([ci_low, ci_low], + [at - capsize / 2, at + capsize / 2], + color=color, **kws) + ax.plot([ci_high, ci_high], + [at - capsize / 2, at + capsize / 2], + color=color, **kws) + + +class _BarPlotter(_CategoricalStatPlotter): + """Show point estimates and confidence intervals with bars.""" + + def __init__(self, x, y, hue, data, order, hue_order, + estimator, ci, n_boot, units, seed, + orient, color, palette, saturation, errcolor, + errwidth, capsize, dodge): + """Initialize the plotter.""" + self.establish_variables(x, y, hue, data, orient, + order, hue_order, units) + self.establish_colors(color, palette, saturation) + self.estimate_statistic(estimator, ci, n_boot, seed) + + self.dodge = dodge + + self.errcolor = errcolor + self.errwidth = errwidth + self.capsize = capsize + + def draw_bars(self, ax, kws): + """Draw the bars onto `ax`.""" + # Get the right matplotlib function depending on the orientation + barfunc = ax.bar if self.orient == "v" else ax.barh + barpos = np.arange(len(self.statistic)) + + if self.plot_hues is None: + + # Draw the bars + barfunc(barpos, self.statistic, self.width, + color=self.colors, align="center", **kws) + + # Draw the confidence intervals + errcolors = [self.errcolor] * len(barpos) + self.draw_confints(ax, + barpos, + self.confint, + errcolors, + self.errwidth, + self.capsize) + + else: + + for j, hue_level in enumerate(self.hue_names): + + # Draw the bars + offpos = barpos + self.hue_offsets[j] + barfunc(offpos, self.statistic[:, j], self.nested_width, + color=self.colors[j], align="center", + label=hue_level, **kws) + + # Draw the confidence intervals + if self.confint.size: + confint = self.confint[:, j] + errcolors = [self.errcolor] * len(offpos) + self.draw_confints(ax, + offpos, + confint, + errcolors, + self.errwidth, + self.capsize) + + def plot(self, ax, bar_kws): + """Make the plot.""" + self.draw_bars(ax, bar_kws) + self.annotate_axes(ax) + if self.orient == "h": + ax.invert_yaxis() + + +class _PointPlotter(_CategoricalStatPlotter): + + default_palette = "dark" + + """Show point estimates and confidence intervals with (joined) points.""" + def __init__(self, x, y, hue, data, order, hue_order, + estimator, ci, n_boot, units, seed, + markers, linestyles, dodge, join, scale, + orient, color, palette, errwidth=None, capsize=None): + """Initialize the plotter.""" + self.establish_variables(x, y, hue, data, orient, + order, hue_order, units) + self.establish_colors(color, palette, 1) + self.estimate_statistic(estimator, ci, n_boot, seed) + + # Override the default palette for single-color plots + if hue is None and color is None and palette is None: + self.colors = [color_palette()[0]] * len(self.colors) + + # Don't join single-layer plots with different colors + if hue is None and palette is not None: + join = False + + # Use a good default for `dodge=True` + if dodge is True and self.hue_names is not None: + dodge = .025 * len(self.hue_names) + + # Make sure we have a marker for each hue level + if isinstance(markers, str): + markers = [markers] * len(self.colors) + self.markers = markers + + # Make sure we have a line style for each hue level + if isinstance(linestyles, str): + linestyles = [linestyles] * len(self.colors) + self.linestyles = linestyles + + # Set the other plot components + self.dodge = dodge + self.join = join + self.scale = scale + self.errwidth = errwidth + self.capsize = capsize + + @property + def hue_offsets(self): + """Offsets relative to the center position for each hue level.""" + if self.dodge: + offset = np.linspace(0, self.dodge, len(self.hue_names)) + offset -= offset.mean() + else: + offset = np.zeros(len(self.hue_names)) + return offset + + def draw_points(self, ax): + """Draw the main data components of the plot.""" + # Get the center positions on the categorical axis + pointpos = np.arange(len(self.statistic)) + + # Get the size of the plot elements + lw = mpl.rcParams["lines.linewidth"] * 1.8 * self.scale + mew = lw * .75 + markersize = np.pi * np.square(lw) * 2 + + if self.plot_hues is None: + + # Draw lines joining each estimate point + if self.join: + color = self.colors[0] + ls = self.linestyles[0] + if self.orient == "h": + ax.plot(self.statistic, pointpos, + color=color, ls=ls, lw=lw) + else: + ax.plot(pointpos, self.statistic, + color=color, ls=ls, lw=lw) + + # Draw the confidence intervals + self.draw_confints(ax, pointpos, self.confint, self.colors, + self.errwidth, self.capsize) + + # Draw the estimate points + marker = self.markers[0] + colors = [mpl.colors.colorConverter.to_rgb(c) for c in self.colors] + if self.orient == "h": + x, y = self.statistic, pointpos + else: + x, y = pointpos, self.statistic + ax.scatter(x, y, + linewidth=mew, marker=marker, s=markersize, + facecolor=colors, edgecolor=colors) + + else: + + offsets = self.hue_offsets + for j, hue_level in enumerate(self.hue_names): + + # Determine the values to plot for this level + statistic = self.statistic[:, j] + + # Determine the position on the categorical and z axes + offpos = pointpos + offsets[j] + z = j + 1 + + # Draw lines joining each estimate point + if self.join: + color = self.colors[j] + ls = self.linestyles[j] + if self.orient == "h": + ax.plot(statistic, offpos, color=color, + zorder=z, ls=ls, lw=lw) + else: + ax.plot(offpos, statistic, color=color, + zorder=z, ls=ls, lw=lw) + + # Draw the confidence intervals + if self.confint.size: + confint = self.confint[:, j] + errcolors = [self.colors[j]] * len(offpos) + self.draw_confints(ax, offpos, confint, errcolors, + self.errwidth, self.capsize, + zorder=z) + + # Draw the estimate points + n_points = len(remove_na(offpos)) + marker = self.markers[j] + color = mpl.colors.colorConverter.to_rgb(self.colors[j]) + + if self.orient == "h": + x, y = statistic, offpos + else: + x, y = offpos, statistic + + if not len(remove_na(statistic)): + x = y = [np.nan] * n_points + + ax.scatter(x, y, label=hue_level, + facecolor=color, edgecolor=color, + linewidth=mew, marker=marker, s=markersize, + zorder=z) + + def plot(self, ax): + """Make the plot.""" + self.draw_points(ax) + self.annotate_axes(ax) + if self.orient == "h": + ax.invert_yaxis() + + +class _CountPlotter(_BarPlotter): + require_numeric = False + + +class _LVPlotter(_CategoricalPlotter): + + def __init__(self, x, y, hue, data, order, hue_order, + orient, color, palette, saturation, + width, dodge, k_depth, linewidth, scale, outlier_prop, + trust_alpha, showfliers=True): + + self.width = width + self.dodge = dodge + self.saturation = saturation + + k_depth_methods = ['proportion', 'tukey', 'trustworthy', 'full'] + if not (k_depth in k_depth_methods or isinstance(k_depth, Number)): + msg = (f'k_depth must be one of {k_depth_methods} or a number, ' + f'but {k_depth} was passed.') + raise ValueError(msg) + self.k_depth = k_depth + + if linewidth is None: + linewidth = mpl.rcParams["lines.linewidth"] + self.linewidth = linewidth + + scales = ['linear', 'exponential', 'area'] + if scale not in scales: + msg = f'scale must be one of {scales}, but {scale} was passed.' + raise ValueError(msg) + self.scale = scale + + if ((outlier_prop > 1) or (outlier_prop <= 0)): + msg = f'outlier_prop {outlier_prop} not in range (0, 1]' + raise ValueError(msg) + self.outlier_prop = outlier_prop + + if not 0 < trust_alpha < 1: + msg = f'trust_alpha {trust_alpha} not in range (0, 1)' + raise ValueError(msg) + self.trust_alpha = trust_alpha + + self.showfliers = showfliers + + self.establish_variables(x, y, hue, data, orient, order, hue_order) + self.establish_colors(color, palette, saturation) + + def _lv_box_ends(self, vals): + """Get the number of data points and calculate `depth` of + letter-value plot.""" + vals = np.asarray(vals) + # Remove infinite values while handling a 'object' dtype + # that can come from pd.Float64Dtype() input + with pd.option_context('mode.use_inf_as_null', True): + vals = vals[~pd.isnull(vals)] + n = len(vals) + p = self.outlier_prop + + # Select the depth, i.e. number of boxes to draw, based on the method + if self.k_depth == 'full': + # extend boxes to 100% of the data + k = int(np.log2(n)) + 1 + elif self.k_depth == 'tukey': + # This results with 5-8 points in each tail + k = int(np.log2(n)) - 3 + elif self.k_depth == 'proportion': + k = int(np.log2(n)) - int(np.log2(n * p)) + 1 + elif self.k_depth == 'trustworthy': + point_conf = 2 * stats.norm.ppf((1 - self.trust_alpha / 2)) ** 2 + k = int(np.log2(n / point_conf)) + 1 + else: + k = int(self.k_depth) # allow having k as input + # If the number happens to be less than 1, set k to 1 + if k < 1: + k = 1 + + # Calculate the upper end for each of the k boxes + upper = [100 * (1 - 0.5 ** (i + 1)) for i in range(k, 0, -1)] + # Calculate the lower end for each of the k boxes + lower = [100 * (0.5 ** (i + 1)) for i in range(k, 0, -1)] + # Stitch the box ends together + percentile_ends = [(i, j) for i, j in zip(lower, upper)] + box_ends = [np.percentile(vals, q) for q in percentile_ends] + return box_ends, k + + def _lv_outliers(self, vals, k): + """Find the outliers based on the letter value depth.""" + box_edge = 0.5 ** (k + 1) + perc_ends = (100 * box_edge, 100 * (1 - box_edge)) + edges = np.percentile(vals, perc_ends) + lower_out = vals[np.where(vals < edges[0])[0]] + upper_out = vals[np.where(vals > edges[1])[0]] + return np.concatenate((lower_out, upper_out)) + + def _width_functions(self, width_func): + # Dictionary of functions for computing the width of the boxes + width_functions = {'linear': lambda h, i, k: (i + 1.) / k, + 'exponential': lambda h, i, k: 2**(-k + i - 1), + 'area': lambda h, i, k: (1 - 2**(-k + i - 2)) / h} + return width_functions[width_func] + + def _lvplot(self, box_data, positions, + color=[255. / 256., 185. / 256., 0.], + widths=1, ax=None, **kws): + + vert = self.orient == "v" + x = positions[0] + box_data = np.asarray(box_data) + + # If we only have one data point, plot a line + if len(box_data) == 1: + kws.update({ + 'color': self.gray, 'linestyle': '-', 'linewidth': self.linewidth + }) + ys = [box_data[0], box_data[0]] + xs = [x - widths / 2, x + widths / 2] + if vert: + xx, yy = xs, ys + else: + xx, yy = ys, xs + ax.plot(xx, yy, **kws) + else: + # Get the number of data points and calculate "depth" of + # letter-value plot + box_ends, k = self._lv_box_ends(box_data) + + # Anonymous functions for calculating the width and height + # of the letter value boxes + width = self._width_functions(self.scale) + + # Function to find height of boxes + def height(b): + return b[1] - b[0] + + # Functions to construct the letter value boxes + def vert_perc_box(x, b, i, k, w): + rect = Patches.Rectangle((x - widths * w / 2, b[0]), + widths * w, + height(b), fill=True) + return rect + + def horz_perc_box(x, b, i, k, w): + rect = Patches.Rectangle((b[0], x - widths * w / 2), + height(b), widths * w, + fill=True) + return rect + + # Scale the width of the boxes so the biggest starts at 1 + w_area = np.array([width(height(b), i, k) + for i, b in enumerate(box_ends)]) + w_area = w_area / np.max(w_area) + + # Calculate the medians + y = np.median(box_data) + + # Calculate the outliers and plot (only if showfliers == True) + outliers = [] + if self.showfliers: + outliers = self._lv_outliers(box_data, k) + hex_color = mpl.colors.rgb2hex(color) + + if vert: + box_func = vert_perc_box + xs_median = [x - widths / 2, x + widths / 2] + ys_median = [y, y] + xs_outliers = np.full(len(outliers), x) + ys_outliers = outliers + + else: + box_func = horz_perc_box + xs_median = [y, y] + ys_median = [x - widths / 2, x + widths / 2] + xs_outliers = outliers + ys_outliers = np.full(len(outliers), x) + + boxes = [box_func(x, b[0], i, k, b[1]) + for i, b in enumerate(zip(box_ends, w_area))] + + # Plot the medians + ax.plot( + xs_median, + ys_median, + c=".15", + alpha=0.45, + solid_capstyle="butt", + linewidth=self.linewidth, + **kws + ) + + # Plot outliers (if any) + if len(outliers) > 0: + ax.scatter(xs_outliers, ys_outliers, marker='d', + c=self.gray, **kws) + + # Construct a color map from the input color + rgb = [hex_color, (1, 1, 1)] + cmap = mpl.colors.LinearSegmentedColormap.from_list('new_map', rgb) + # Make sure that the last boxes contain hue and are not pure white + rgb = [hex_color, cmap(.85)] + cmap = mpl.colors.LinearSegmentedColormap.from_list('new_map', rgb) + collection = PatchCollection( + boxes, cmap=cmap, edgecolor=self.gray, linewidth=self.linewidth + ) + + # Set the color gradation, first box will have color=hex_color + collection.set_array(np.array(np.linspace(1, 0, len(boxes)))) + + # Plot the boxes + ax.add_collection(collection) + + def draw_letter_value_plot(self, ax, kws): + """Use matplotlib to draw a letter value plot on an Axes.""" + for i, group_data in enumerate(self.plot_data): + + if self.plot_hues is None: + + # Handle case where there is data at this level + if group_data.size == 0: + continue + + # Draw a single box or a set of boxes + # with a single level of grouping + box_data = remove_na(group_data) + + # Handle case where there is no non-null data + if box_data.size == 0: + continue + + color = self.colors[i] + + self._lvplot(box_data, + positions=[i], + color=color, + widths=self.width, + ax=ax, + **kws) + + else: + # Draw nested groups of boxes + offsets = self.hue_offsets + for j, hue_level in enumerate(self.hue_names): + + # Add a legend for this hue level + if not i: + self.add_legend_data(ax, self.colors[j], hue_level) + + # Handle case where there is data at this level + if group_data.size == 0: + continue + + hue_mask = self.plot_hues[i] == hue_level + box_data = remove_na(group_data[hue_mask]) + + # Handle case where there is no non-null data + if box_data.size == 0: + continue + + color = self.colors[j] + center = i + offsets[j] + self._lvplot(box_data, + positions=[center], + color=color, + widths=self.nested_width, + ax=ax, + **kws) + + # Autoscale the values axis to make sure all patches are visible + ax.autoscale_view(scalex=self.orient == "h", scaley=self.orient == "v") + + def plot(self, ax, boxplot_kws): + """Make the plot.""" + self.draw_letter_value_plot(ax, boxplot_kws) + self.annotate_axes(ax) + if self.orient == "h": + ax.invert_yaxis() + + +_categorical_docs = dict( + + # Shared narrative docs + categorical_narrative=dedent("""\ + This function always treats one of the variables as categorical and + draws data at ordinal positions (0, 1, ... n) on the relevant axis, even + when the data has a numeric or date type. + + See the :ref:`tutorial ` for more information.\ + """), + main_api_narrative=dedent("""\ + + Input data can be passed in a variety of formats, including: + + - Vectors of data represented as lists, numpy arrays, or pandas Series + objects passed directly to the ``x``, ``y``, and/or ``hue`` parameters. + - A "long-form" DataFrame, in which case the ``x``, ``y``, and ``hue`` + variables will determine how the data are plotted. + - A "wide-form" DataFrame, such that each numeric column will be plotted. + - An array or list of vectors. + + In most cases, it is possible to use numpy or Python objects, but pandas + objects are preferable because the associated names will be used to + annotate the axes. Additionally, you can use Categorical types for the + grouping variables to control the order of plot elements.\ + """), + + # Shared function parameters + input_params=dedent("""\ + x, y, hue : names of variables in ``data`` or vector data, optional + Inputs for plotting long-form data. See examples for interpretation.\ + """), + string_input_params=dedent("""\ + x, y, hue : names of variables in ``data`` + Inputs for plotting long-form data. See examples for interpretation.\ + """), + categorical_data=dedent("""\ + data : DataFrame, array, or list of arrays, optional + Dataset for plotting. If ``x`` and ``y`` are absent, this is + interpreted as wide-form. Otherwise it is expected to be long-form.\ + """), + long_form_data=dedent("""\ + data : DataFrame + Long-form (tidy) dataset for plotting. Each column should correspond + to a variable, and each row should correspond to an observation.\ + """), + order_vars=dedent("""\ + order, hue_order : lists of strings, optional + Order to plot the categorical levels in, otherwise the levels are + inferred from the data objects.\ + """), + stat_api_params=dedent("""\ + estimator : callable that maps vector -> scalar, optional + Statistical function to estimate within each categorical bin. + ci : float or "sd" or None, optional + Size of confidence intervals to draw around estimated values. If + "sd", skip bootstrapping and draw the standard deviation of the + observations. If ``None``, no bootstrapping will be performed, and + error bars will not be drawn. + n_boot : int, optional + Number of bootstrap iterations to use when computing confidence + intervals. + units : name of variable in ``data`` or vector data, optional + Identifier of sampling units, which will be used to perform a + multilevel bootstrap and account for repeated measures design. + seed : int, numpy.random.Generator, or numpy.random.RandomState, optional + Seed or random number generator for reproducible bootstrapping.\ + """), + orient=dedent("""\ + orient : "v" | "h", optional + Orientation of the plot (vertical or horizontal). This is usually + inferred based on the type of the input variables, but it can be used + to resolve ambiguity when both `x` and `y` are numeric or when + plotting wide-form data.\ + """), + color=dedent("""\ + color : matplotlib color, optional + Color for all of the elements, or seed for a gradient palette.\ + """), + palette=dedent("""\ + palette : palette name, list, or dict, optional + Color palette that maps either the grouping variable or the hue + variable. If the palette is a dictionary, keys should be names of + levels and values should be matplotlib colors.\ + """), + saturation=dedent("""\ + saturation : float, optional + Proportion of the original saturation to draw colors at. Large patches + often look better with slightly desaturated colors, but set this to + ``1`` if you want the plot colors to perfectly match the input color + spec.\ + """), + capsize=dedent("""\ + capsize : float, optional + Width of the "caps" on error bars. + """), + errwidth=dedent("""\ + errwidth : float, optional + Thickness of error bar lines (and caps).\ + """), + width=dedent("""\ + width : float, optional + Width of a full element when not using hue nesting, or width of all the + elements for one level of the major grouping variable.\ + """), + dodge=dedent("""\ + dodge : bool, optional + When hue nesting is used, whether elements should be shifted along the + categorical axis.\ + """), + linewidth=dedent("""\ + linewidth : float, optional + Width of the gray lines that frame the plot elements.\ + """), + ax_in=dedent("""\ + ax : matplotlib Axes, optional + Axes object to draw the plot onto, otherwise uses the current Axes.\ + """), + ax_out=dedent("""\ + ax : matplotlib Axes + Returns the Axes object with the plot drawn onto it.\ + """), + + # Shared see also + boxplot=dedent("""\ + boxplot : A traditional box-and-whisker plot with a similar API.\ + """), + violinplot=dedent("""\ + violinplot : A combination of boxplot and kernel density estimation.\ + """), + stripplot=dedent("""\ + stripplot : A scatterplot where one variable is categorical. Can be used + in conjunction with other plots to show each observation.\ + """), + swarmplot=dedent("""\ + swarmplot : A categorical scatterplot where the points do not overlap. Can + be used with other plots to show each observation.\ + """), + barplot=dedent("""\ + barplot : Show point estimates and confidence intervals using bars.\ + """), + countplot=dedent("""\ + countplot : Show the counts of observations in each categorical bin.\ + """), + pointplot=dedent("""\ + pointplot : Show point estimates and confidence intervals using scatterplot + glyphs.\ + """), + catplot=dedent("""\ + catplot : Combine a categorical plot with a :class:`FacetGrid`.\ + """), + boxenplot=dedent("""\ + boxenplot : An enhanced boxplot for larger datasets.\ + """), + +) + +_categorical_docs.update(_facet_docs) + + +@_deprecate_positional_args +def boxplot( + *, + x=None, y=None, + hue=None, data=None, + order=None, hue_order=None, + orient=None, color=None, palette=None, saturation=.75, + width=.8, dodge=True, fliersize=5, linewidth=None, + whis=1.5, ax=None, + **kwargs +): + + plotter = _BoxPlotter(x, y, hue, data, order, hue_order, + orient, color, palette, saturation, + width, dodge, fliersize, linewidth) + + if ax is None: + ax = plt.gca() + kwargs.update(dict(whis=whis)) + + plotter.plot(ax, kwargs) + return ax + + +boxplot.__doc__ = dedent("""\ + Draw a box plot to show distributions with respect to categories. + + A box plot (or box-and-whisker plot) shows the distribution of quantitative + data in a way that facilitates comparisons between variables or across + levels of a categorical variable. The box shows the quartiles of the + dataset while the whiskers extend to show the rest of the distribution, + except for points that are determined to be "outliers" using a method + that is a function of the inter-quartile range. + + {main_api_narrative} + + {categorical_narrative} + + Parameters + ---------- + {input_params} + {categorical_data} + {order_vars} + {orient} + {color} + {palette} + {saturation} + {width} + {dodge} + fliersize : float, optional + Size of the markers used to indicate outlier observations. + {linewidth} + whis : float, optional + Proportion of the IQR past the low and high quartiles to extend the + plot whiskers. Points outside this range will be identified as + outliers. + {ax_in} + kwargs : key, value mappings + Other keyword arguments are passed through to + :meth:`matplotlib.axes.Axes.boxplot`. + + Returns + ------- + {ax_out} + + See Also + -------- + {violinplot} + {stripplot} + {swarmplot} + {catplot} + + Examples + -------- + + Draw a single horizontal boxplot: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns + >>> sns.set_theme(style="whitegrid") + >>> tips = sns.load_dataset("tips") + >>> ax = sns.boxplot(x=tips["total_bill"]) + + Draw a vertical boxplot grouped by a categorical variable: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxplot(x="day", y="total_bill", data=tips) + + Draw a boxplot with nested grouping by two categorical variables: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxplot(x="day", y="total_bill", hue="smoker", + ... data=tips, palette="Set3") + + Draw a boxplot with nested grouping when some bins are empty: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxplot(x="day", y="total_bill", hue="time", + ... data=tips, linewidth=2.5) + + Control box order by passing an explicit order: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxplot(x="time", y="tip", data=tips, + ... order=["Dinner", "Lunch"]) + + Draw a boxplot for each numeric variable in a DataFrame: + + .. plot:: + :context: close-figs + + >>> iris = sns.load_dataset("iris") + >>> ax = sns.boxplot(data=iris, orient="h", palette="Set2") + + Use ``hue`` without changing box position or width: + + .. plot:: + :context: close-figs + + >>> tips["weekend"] = tips["day"].isin(["Sat", "Sun"]) + >>> ax = sns.boxplot(x="day", y="total_bill", hue="weekend", + ... data=tips, dodge=False) + + Use :func:`swarmplot` to show the datapoints on top of the boxes: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxplot(x="day", y="total_bill", data=tips) + >>> ax = sns.swarmplot(x="day", y="total_bill", data=tips, color=".25") + + Use :func:`catplot` to combine a :func:`boxplot` and a + :class:`FacetGrid`. This allows grouping within additional categorical + variables. Using :func:`catplot` is safer than using :class:`FacetGrid` + directly, as it ensures synchronization of variable order across facets: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="sex", y="total_bill", + ... hue="smoker", col="time", + ... data=tips, kind="box", + ... height=4, aspect=.7); + + """).format(**_categorical_docs) + + +@_deprecate_positional_args +def violinplot( + *, + x=None, y=None, + hue=None, data=None, + order=None, hue_order=None, + bw="scott", cut=2, scale="area", scale_hue=True, gridsize=100, + width=.8, inner="box", split=False, dodge=True, orient=None, + linewidth=None, color=None, palette=None, saturation=.75, + ax=None, **kwargs, +): + + plotter = _ViolinPlotter(x, y, hue, data, order, hue_order, + bw, cut, scale, scale_hue, gridsize, + width, inner, split, dodge, orient, linewidth, + color, palette, saturation) + + if ax is None: + ax = plt.gca() + + plotter.plot(ax) + return ax + + +violinplot.__doc__ = dedent("""\ + Draw a combination of boxplot and kernel density estimate. + + A violin plot plays a similar role as a box and whisker plot. It shows the + distribution of quantitative data across several levels of one (or more) + categorical variables such that those distributions can be compared. Unlike + a box plot, in which all of the plot components correspond to actual + datapoints, the violin plot features a kernel density estimation of the + underlying distribution. + + This can be an effective and attractive way to show multiple distributions + of data at once, but keep in mind that the estimation procedure is + influenced by the sample size, and violins for relatively small samples + might look misleadingly smooth. + + {main_api_narrative} + + {categorical_narrative} + + Parameters + ---------- + {input_params} + {categorical_data} + {order_vars} + bw : {{'scott', 'silverman', float}}, optional + Either the name of a reference rule or the scale factor to use when + computing the kernel bandwidth. The actual kernel size will be + determined by multiplying the scale factor by the standard deviation of + the data within each bin. + cut : float, optional + Distance, in units of bandwidth size, to extend the density past the + extreme datapoints. Set to 0 to limit the violin range within the range + of the observed data (i.e., to have the same effect as ``trim=True`` in + ``ggplot``. + scale : {{"area", "count", "width"}}, optional + The method used to scale the width of each violin. If ``area``, each + violin will have the same area. If ``count``, the width of the violins + will be scaled by the number of observations in that bin. If ``width``, + each violin will have the same width. + scale_hue : bool, optional + When nesting violins using a ``hue`` variable, this parameter + determines whether the scaling is computed within each level of the + major grouping variable (``scale_hue=True``) or across all the violins + on the plot (``scale_hue=False``). + gridsize : int, optional + Number of points in the discrete grid used to compute the kernel + density estimate. + {width} + inner : {{"box", "quartile", "point", "stick", None}}, optional + Representation of the datapoints in the violin interior. If ``box``, + draw a miniature boxplot. If ``quartiles``, draw the quartiles of the + distribution. If ``point`` or ``stick``, show each underlying + datapoint. Using ``None`` will draw unadorned violins. + split : bool, optional + When using hue nesting with a variable that takes two levels, setting + ``split`` to True will draw half of a violin for each level. This can + make it easier to directly compare the distributions. + {dodge} + {orient} + {linewidth} + {color} + {palette} + {saturation} + {ax_in} + + Returns + ------- + {ax_out} + + See Also + -------- + {boxplot} + {stripplot} + {swarmplot} + {catplot} + + Examples + -------- + + Draw a single horizontal violinplot: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns + >>> sns.set_theme(style="whitegrid") + >>> tips = sns.load_dataset("tips") + >>> ax = sns.violinplot(x=tips["total_bill"]) + + Draw a vertical violinplot grouped by a categorical variable: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="day", y="total_bill", data=tips) + + Draw a violinplot with nested grouping by two categorical variables: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="day", y="total_bill", hue="smoker", + ... data=tips, palette="muted") + + Draw split violins to compare the across the hue variable: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="day", y="total_bill", hue="smoker", + ... data=tips, palette="muted", split=True) + + Control violin order by passing an explicit order: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="time", y="tip", data=tips, + ... order=["Dinner", "Lunch"]) + + Scale the violin width by the number of observations in each bin: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="day", y="total_bill", hue="sex", + ... data=tips, palette="Set2", split=True, + ... scale="count") + + Draw the quartiles as horizontal lines instead of a mini-box: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="day", y="total_bill", hue="sex", + ... data=tips, palette="Set2", split=True, + ... scale="count", inner="quartile") + + Show each observation with a stick inside the violin: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="day", y="total_bill", hue="sex", + ... data=tips, palette="Set2", split=True, + ... scale="count", inner="stick") + + Scale the density relative to the counts across all bins: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="day", y="total_bill", hue="sex", + ... data=tips, palette="Set2", split=True, + ... scale="count", inner="stick", scale_hue=False) + + Use a narrow bandwidth to reduce the amount of smoothing: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="day", y="total_bill", hue="sex", + ... data=tips, palette="Set2", split=True, + ... scale="count", inner="stick", + ... scale_hue=False, bw=.2) + + Draw horizontal violins: + + .. plot:: + :context: close-figs + + >>> planets = sns.load_dataset("planets") + >>> ax = sns.violinplot(x="orbital_period", y="method", + ... data=planets[planets.orbital_period < 1000], + ... scale="width", palette="Set3") + + Don't let density extend past extreme values in the data: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="orbital_period", y="method", + ... data=planets[planets.orbital_period < 1000], + ... cut=0, scale="width", palette="Set3") + + Use ``hue`` without changing violin position or width: + + .. plot:: + :context: close-figs + + >>> tips["weekend"] = tips["day"].isin(["Sat", "Sun"]) + >>> ax = sns.violinplot(x="day", y="total_bill", hue="weekend", + ... data=tips, dodge=False) + + Use :func:`catplot` to combine a :func:`violinplot` and a + :class:`FacetGrid`. This allows grouping within additional categorical + variables. Using :func:`catplot` is safer than using :class:`FacetGrid` + directly, as it ensures synchronization of variable order across facets: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="sex", y="total_bill", + ... hue="smoker", col="time", + ... data=tips, kind="violin", split=True, + ... height=4, aspect=.7); + + """).format(**_categorical_docs) + + +@_deprecate_positional_args +def boxenplot( + *, + x=None, y=None, + hue=None, data=None, + order=None, hue_order=None, + orient=None, color=None, palette=None, saturation=.75, + width=.8, dodge=True, k_depth='tukey', linewidth=None, + scale='exponential', outlier_prop=0.007, trust_alpha=0.05, showfliers=True, + ax=None, **kwargs +): + + plotter = _LVPlotter(x, y, hue, data, order, hue_order, + orient, color, palette, saturation, + width, dodge, k_depth, linewidth, scale, + outlier_prop, trust_alpha, showfliers) + + if ax is None: + ax = plt.gca() + + plotter.plot(ax, kwargs) + return ax + + +boxenplot.__doc__ = dedent("""\ + Draw an enhanced box plot for larger datasets. + + This style of plot was originally named a "letter value" plot because it + shows a large number of quantiles that are defined as "letter values". It + is similar to a box plot in plotting a nonparametric representation of a + distribution in which all features correspond to actual observations. By + plotting more quantiles, it provides more information about the shape of + the distribution, particularly in the tails. For a more extensive + explanation, you can read the paper that introduced the plot: + + https://vita.had.co.nz/papers/letter-value-plot.html + + {main_api_narrative} + + {categorical_narrative} + + Parameters + ---------- + {input_params} + {categorical_data} + {order_vars} + {orient} + {color} + {palette} + {saturation} + {width} + {dodge} + k_depth : {{"tukey", "proportion", "trustworthy", "full"}} or scalar,\ + optional + The number of boxes, and by extension number of percentiles, to draw. + All methods are detailed in Wickham's paper. Each makes different + assumptions about the number of outliers and leverages different + statistical properties. If "proportion", draw no more than + `outlier_prop` extreme observations. If "full", draw `log(n)+1` boxes. + {linewidth} + scale : {{"exponential", "linear", "area"}}, optional + Method to use for the width of the letter value boxes. All give similar + results visually. "linear" reduces the width by a constant linear + factor, "exponential" uses the proportion of data not covered, "area" + is proportional to the percentage of data covered. + outlier_prop : float, optional + Proportion of data believed to be outliers. Must be in the range + (0, 1]. Used to determine the number of boxes to plot when + `k_depth="proportion"`. + trust_alpha : float, optional + Confidence level for a box to be plotted. Used to determine the + number of boxes to plot when `k_depth="trustworthy"`. Must be in the + range (0, 1). + showfliers : bool, optional + If False, suppress the plotting of outliers. + {ax_in} + kwargs : key, value mappings + Other keyword arguments are passed through to + :meth:`matplotlib.axes.Axes.plot` and + :meth:`matplotlib.axes.Axes.scatter`. + + Returns + ------- + {ax_out} + + See Also + -------- + {violinplot} + {boxplot} + {catplot} + + Examples + -------- + + Draw a single horizontal boxen plot: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns + >>> sns.set_theme(style="whitegrid") + >>> tips = sns.load_dataset("tips") + >>> ax = sns.boxenplot(x=tips["total_bill"]) + + Draw a vertical boxen plot grouped by a categorical variable: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxenplot(x="day", y="total_bill", data=tips) + + Draw a letter value plot with nested grouping by two categorical variables: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxenplot(x="day", y="total_bill", hue="smoker", + ... data=tips, palette="Set3") + + Draw a boxen plot with nested grouping when some bins are empty: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxenplot(x="day", y="total_bill", hue="time", + ... data=tips, linewidth=2.5) + + Control box order by passing an explicit order: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxenplot(x="time", y="tip", data=tips, + ... order=["Dinner", "Lunch"]) + + Draw a boxen plot for each numeric variable in a DataFrame: + + .. plot:: + :context: close-figs + + >>> iris = sns.load_dataset("iris") + >>> ax = sns.boxenplot(data=iris, orient="h", palette="Set2") + + Use :func:`stripplot` to show the datapoints on top of the boxes: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxenplot(x="day", y="total_bill", data=tips, + ... showfliers=False) + >>> ax = sns.stripplot(x="day", y="total_bill", data=tips, + ... size=4, color=".26") + + Use :func:`catplot` to combine :func:`boxenplot` and a :class:`FacetGrid`. + This allows grouping within additional categorical variables. Using + :func:`catplot` is safer than using :class:`FacetGrid` directly, as it + ensures synchronization of variable order across facets: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="sex", y="total_bill", + ... hue="smoker", col="time", + ... data=tips, kind="boxen", + ... height=4, aspect=.7); + + """).format(**_categorical_docs) + + +@_deprecate_positional_args +def stripplot( + *, + x=None, y=None, + hue=None, data=None, + order=None, hue_order=None, + jitter=True, dodge=False, orient=None, color=None, palette=None, + size=5, edgecolor="gray", linewidth=0, ax=None, + **kwargs +): + + if "split" in kwargs: + dodge = kwargs.pop("split") + msg = "The `split` parameter has been renamed to `dodge`." + warnings.warn(msg, UserWarning) + + plotter = _StripPlotter(x, y, hue, data, order, hue_order, + jitter, dodge, orient, color, palette) + if ax is None: + ax = plt.gca() + + kwargs.setdefault("zorder", 3) + size = kwargs.get("s", size) + if linewidth is None: + linewidth = size / 10 + if edgecolor == "gray": + edgecolor = plotter.gray + kwargs.update(dict(s=size ** 2, + edgecolor=edgecolor, + linewidth=linewidth)) + + plotter.plot(ax, kwargs) + return ax + + +stripplot.__doc__ = dedent("""\ + Draw a scatterplot where one variable is categorical. + + A strip plot can be drawn on its own, but it is also a good complement + to a box or violin plot in cases where you want to show all observations + along with some representation of the underlying distribution. + + {main_api_narrative} + + {categorical_narrative} + + Parameters + ---------- + {input_params} + {categorical_data} + {order_vars} + jitter : float, ``True``/``1`` is special-cased, optional + Amount of jitter (only along the categorical axis) to apply. This + can be useful when you have many points and they overlap, so that + it is easier to see the distribution. You can specify the amount + of jitter (half the width of the uniform random variable support), + or just use ``True`` for a good default. + dodge : bool, optional + When using ``hue`` nesting, setting this to ``True`` will separate + the strips for different hue levels along the categorical axis. + Otherwise, the points for each level will be plotted on top of + each other. + {orient} + {color} + {palette} + size : float, optional + Radius of the markers, in points. + edgecolor : matplotlib color, "gray" is special-cased, optional + Color of the lines around each point. If you pass ``"gray"``, the + brightness is determined by the color palette used for the body + of the points. + {linewidth} + {ax_in} + kwargs : key, value mappings + Other keyword arguments are passed through to + :meth:`matplotlib.axes.Axes.scatter`. + + Returns + ------- + {ax_out} + + See Also + -------- + {swarmplot} + {boxplot} + {violinplot} + {catplot} + + Examples + -------- + + Draw a single horizontal strip plot: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns + >>> sns.set_theme(style="whitegrid") + >>> tips = sns.load_dataset("tips") + >>> ax = sns.stripplot(x=tips["total_bill"]) + + Group the strips by a categorical variable: + + .. plot:: + :context: close-figs + + >>> ax = sns.stripplot(x="day", y="total_bill", data=tips) + + Use a smaller amount of jitter: + + .. plot:: + :context: close-figs + + >>> ax = sns.stripplot(x="day", y="total_bill", data=tips, jitter=0.05) + + Draw horizontal strips: + + .. plot:: + :context: close-figs + + >>> ax = sns.stripplot(x="total_bill", y="day", data=tips) + + Draw outlines around the points: + + .. plot:: + :context: close-figs + + >>> ax = sns.stripplot(x="total_bill", y="day", data=tips, + ... linewidth=1) + + Nest the strips within a second categorical variable: + + .. plot:: + :context: close-figs + + >>> ax = sns.stripplot(x="sex", y="total_bill", hue="day", data=tips) + + Draw each level of the ``hue`` variable at different locations on the + major categorical axis: + + .. plot:: + :context: close-figs + + >>> ax = sns.stripplot(x="day", y="total_bill", hue="smoker", + ... data=tips, palette="Set2", dodge=True) + + Control strip order by passing an explicit order: + + .. plot:: + :context: close-figs + + >>> ax = sns.stripplot(x="time", y="tip", data=tips, + ... order=["Dinner", "Lunch"]) + + Draw strips with large points and different aesthetics: + + .. plot:: + :context: close-figs + + >>> ax = sns.stripplot(x="day", y="total_bill", hue="smoker", + ... data=tips, palette="Set2", size=20, marker="D", + ... edgecolor="gray", alpha=.25) + + Draw strips of observations on top of a box plot: + + .. plot:: + :context: close-figs + + >>> import numpy as np + >>> ax = sns.boxplot(x="tip", y="day", data=tips, whis=np.inf) + >>> ax = sns.stripplot(x="tip", y="day", data=tips, color=".3") + + Draw strips of observations on top of a violin plot: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="day", y="total_bill", data=tips, + ... inner=None, color=".8") + >>> ax = sns.stripplot(x="day", y="total_bill", data=tips) + + Use :func:`catplot` to combine a :func:`stripplot` and a + :class:`FacetGrid`. This allows grouping within additional categorical + variables. Using :func:`catplot` is safer than using :class:`FacetGrid` + directly, as it ensures synchronization of variable order across facets: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="sex", y="total_bill", + ... hue="smoker", col="time", + ... data=tips, kind="strip", + ... height=4, aspect=.7); + + """).format(**_categorical_docs) + + +@_deprecate_positional_args +def swarmplot( + *, + x=None, y=None, + hue=None, data=None, + order=None, hue_order=None, + dodge=False, orient=None, color=None, palette=None, + size=5, edgecolor="gray", linewidth=0, ax=None, + **kwargs +): + + if "split" in kwargs: + dodge = kwargs.pop("split") + msg = "The `split` parameter has been renamed to `dodge`." + warnings.warn(msg, UserWarning) + + plotter = _SwarmPlotter(x, y, hue, data, order, hue_order, + dodge, orient, color, palette) + if ax is None: + ax = plt.gca() + + kwargs.setdefault("zorder", 3) + size = kwargs.get("s", size) + if linewidth is None: + linewidth = size / 10 + if edgecolor == "gray": + edgecolor = plotter.gray + kwargs.update(dict(s=size ** 2, + edgecolor=edgecolor, + linewidth=linewidth)) + + plotter.plot(ax, kwargs) + return ax + + +swarmplot.__doc__ = dedent("""\ + Draw a categorical scatterplot with non-overlapping points. + + This function is similar to :func:`stripplot`, but the points are adjusted + (only along the categorical axis) so that they don't overlap. This gives a + better representation of the distribution of values, but it does not scale + well to large numbers of observations. This style of plot is sometimes + called a "beeswarm". + + A swarm plot can be drawn on its own, but it is also a good complement + to a box or violin plot in cases where you want to show all observations + along with some representation of the underlying distribution. + + Arranging the points properly requires an accurate transformation between + data and point coordinates. This means that non-default axis limits must + be set *before* drawing the plot. + + {main_api_narrative} + + {categorical_narrative} + + Parameters + ---------- + {input_params} + {categorical_data} + {order_vars} + dodge : bool, optional + When using ``hue`` nesting, setting this to ``True`` will separate + the strips for different hue levels along the categorical axis. + Otherwise, the points for each level will be plotted in one swarm. + {orient} + {color} + {palette} + size : float, optional + Radius of the markers, in points. + edgecolor : matplotlib color, "gray" is special-cased, optional + Color of the lines around each point. If you pass ``"gray"``, the + brightness is determined by the color palette used for the body + of the points. + {linewidth} + {ax_in} + kwargs : key, value mappings + Other keyword arguments are passed through to + :meth:`matplotlib.axes.Axes.scatter`. + + Returns + ------- + {ax_out} + + See Also + -------- + {boxplot} + {violinplot} + {stripplot} + {catplot} + + Examples + -------- + + Draw a single horizontal swarm plot: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns + >>> sns.set_theme(style="whitegrid") + >>> tips = sns.load_dataset("tips") + >>> ax = sns.swarmplot(x=tips["total_bill"]) + + Group the swarms by a categorical variable: + + .. plot:: + :context: close-figs + + >>> ax = sns.swarmplot(x="day", y="total_bill", data=tips) + + Draw horizontal swarms: + + .. plot:: + :context: close-figs + + >>> ax = sns.swarmplot(x="total_bill", y="day", data=tips) + + Color the points using a second categorical variable: + + .. plot:: + :context: close-figs + + >>> ax = sns.swarmplot(x="day", y="total_bill", hue="sex", data=tips) + + Split each level of the ``hue`` variable along the categorical axis: + + .. plot:: + :context: close-figs + + >>> ax = sns.swarmplot(x="day", y="total_bill", hue="smoker", + ... data=tips, palette="Set2", dodge=True) + + Control swarm order by passing an explicit order: + + .. plot:: + :context: close-figs + + >>> ax = sns.swarmplot(x="time", y="total_bill", data=tips, + ... order=["Dinner", "Lunch"]) + + Plot using larger points: + + .. plot:: + :context: close-figs + + >>> ax = sns.swarmplot(x="time", y="total_bill", data=tips, size=6) + + Draw swarms of observations on top of a box plot: + + .. plot:: + :context: close-figs + + >>> ax = sns.boxplot(x="total_bill", y="day", data=tips, whis=np.inf) + >>> ax = sns.swarmplot(x="total_bill", y="day", data=tips, color=".2") + + Draw swarms of observations on top of a violin plot: + + .. plot:: + :context: close-figs + + >>> ax = sns.violinplot(x="day", y="total_bill", data=tips, inner=None) + >>> ax = sns.swarmplot(x="day", y="total_bill", data=tips, + ... color="white", edgecolor="gray") + + Use :func:`catplot` to combine a :func:`swarmplot` and a + :class:`FacetGrid`. This allows grouping within additional categorical + variables. Using :func:`catplot` is safer than using :class:`FacetGrid` + directly, as it ensures synchronization of variable order across facets: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="sex", y="total_bill", + ... hue="smoker", col="time", + ... data=tips, kind="swarm", + ... height=4, aspect=.7); + + """).format(**_categorical_docs) + + +@_deprecate_positional_args +def barplot( + *, + x=None, y=None, + hue=None, data=None, + order=None, hue_order=None, + estimator=np.mean, ci=95, n_boot=1000, units=None, seed=None, + orient=None, color=None, palette=None, saturation=.75, + errcolor=".26", errwidth=None, capsize=None, dodge=True, + ax=None, + **kwargs, +): + + plotter = _BarPlotter(x, y, hue, data, order, hue_order, + estimator, ci, n_boot, units, seed, + orient, color, palette, saturation, + errcolor, errwidth, capsize, dodge) + + if ax is None: + ax = plt.gca() + + plotter.plot(ax, kwargs) + return ax + + +barplot.__doc__ = dedent("""\ + Show point estimates and confidence intervals as rectangular bars. + + A bar plot represents an estimate of central tendency for a numeric + variable with the height of each rectangle and provides some indication of + the uncertainty around that estimate using error bars. Bar plots include 0 + in the quantitative axis range, and they are a good choice when 0 is a + meaningful value for the quantitative variable, and you want to make + comparisons against it. + + For datasets where 0 is not a meaningful value, a point plot will allow you + to focus on differences between levels of one or more categorical + variables. + + It is also important to keep in mind that a bar plot shows only the mean + (or other estimator) value, but in many cases it may be more informative to + show the distribution of values at each level of the categorical variables. + In that case, other approaches such as a box or violin plot may be more + appropriate. + + {main_api_narrative} + + {categorical_narrative} + + Parameters + ---------- + {input_params} + {categorical_data} + {order_vars} + {stat_api_params} + {orient} + {color} + {palette} + {saturation} + errcolor : matplotlib color + Color for the lines that represent the confidence interval. + {errwidth} + {capsize} + {dodge} + {ax_in} + kwargs : key, value mappings + Other keyword arguments are passed through to + :meth:`matplotlib.axes.Axes.bar`. + + Returns + ------- + {ax_out} + + See Also + -------- + {countplot} + {pointplot} + {catplot} + + Examples + -------- + + Draw a set of vertical bar plots grouped by a categorical variable: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns + >>> sns.set_theme(style="whitegrid") + >>> tips = sns.load_dataset("tips") + >>> ax = sns.barplot(x="day", y="total_bill", data=tips) + + Draw a set of vertical bars with nested grouping by a two variables: + + .. plot:: + :context: close-figs + + >>> ax = sns.barplot(x="day", y="total_bill", hue="sex", data=tips) + + Draw a set of horizontal bars: + + .. plot:: + :context: close-figs + + >>> ax = sns.barplot(x="tip", y="day", data=tips) + + Control bar order by passing an explicit order: + + .. plot:: + :context: close-figs + + >>> ax = sns.barplot(x="time", y="tip", data=tips, + ... order=["Dinner", "Lunch"]) + + Use median as the estimate of central tendency: + + .. plot:: + :context: close-figs + + >>> from numpy import median + >>> ax = sns.barplot(x="day", y="tip", data=tips, estimator=median) + + Show the standard error of the mean with the error bars: + + .. plot:: + :context: close-figs + + >>> ax = sns.barplot(x="day", y="tip", data=tips, ci=68) + + Show standard deviation of observations instead of a confidence interval: + + .. plot:: + :context: close-figs + + >>> ax = sns.barplot(x="day", y="tip", data=tips, ci="sd") + + Add "caps" to the error bars: + + .. plot:: + :context: close-figs + + >>> ax = sns.barplot(x="day", y="tip", data=tips, capsize=.2) + + Use a different color palette for the bars: + + .. plot:: + :context: close-figs + + >>> ax = sns.barplot(x="size", y="total_bill", data=tips, + ... palette="Blues_d") + + Use ``hue`` without changing bar position or width: + + .. plot:: + :context: close-figs + + >>> tips["weekend"] = tips["day"].isin(["Sat", "Sun"]) + >>> ax = sns.barplot(x="day", y="total_bill", hue="weekend", + ... data=tips, dodge=False) + + Plot all bars in a single color: + + .. plot:: + :context: close-figs + + >>> ax = sns.barplot(x="size", y="total_bill", data=tips, + ... color="salmon", saturation=.5) + + Use :meth:`matplotlib.axes.Axes.bar` parameters to control the style. + + .. plot:: + :context: close-figs + + >>> ax = sns.barplot(x="day", y="total_bill", data=tips, + ... linewidth=2.5, facecolor=(1, 1, 1, 0), + ... errcolor=".2", edgecolor=".2") + + Use :func:`catplot` to combine a :func:`barplot` and a :class:`FacetGrid`. + This allows grouping within additional categorical variables. Using + :func:`catplot` is safer than using :class:`FacetGrid` directly, as it + ensures synchronization of variable order across facets: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="sex", y="total_bill", + ... hue="smoker", col="time", + ... data=tips, kind="bar", + ... height=4, aspect=.7); + + """).format(**_categorical_docs) + + +@_deprecate_positional_args +def pointplot( + *, + x=None, y=None, + hue=None, data=None, + order=None, hue_order=None, + estimator=np.mean, ci=95, n_boot=1000, units=None, seed=None, + markers="o", linestyles="-", dodge=False, join=True, scale=1, + orient=None, color=None, palette=None, errwidth=None, + capsize=None, ax=None, + **kwargs +): + + plotter = _PointPlotter(x, y, hue, data, order, hue_order, + estimator, ci, n_boot, units, seed, + markers, linestyles, dodge, join, scale, + orient, color, palette, errwidth, capsize) + + if ax is None: + ax = plt.gca() + + plotter.plot(ax) + return ax + + +pointplot.__doc__ = dedent("""\ + Show point estimates and confidence intervals using scatter plot glyphs. + + A point plot represents an estimate of central tendency for a numeric + variable by the position of scatter plot points and provides some + indication of the uncertainty around that estimate using error bars. + + Point plots can be more useful than bar plots for focusing comparisons + between different levels of one or more categorical variables. They are + particularly adept at showing interactions: how the relationship between + levels of one categorical variable changes across levels of a second + categorical variable. The lines that join each point from the same ``hue`` + level allow interactions to be judged by differences in slope, which is + easier for the eyes than comparing the heights of several groups of points + or bars. + + It is important to keep in mind that a point plot shows only the mean (or + other estimator) value, but in many cases it may be more informative to + show the distribution of values at each level of the categorical variables. + In that case, other approaches such as a box or violin plot may be more + appropriate. + + {main_api_narrative} + + {categorical_narrative} + + Parameters + ---------- + {input_params} + {categorical_data} + {order_vars} + {stat_api_params} + markers : string or list of strings, optional + Markers to use for each of the ``hue`` levels. + linestyles : string or list of strings, optional + Line styles to use for each of the ``hue`` levels. + dodge : bool or float, optional + Amount to separate the points for each level of the ``hue`` variable + along the categorical axis. + join : bool, optional + If ``True``, lines will be drawn between point estimates at the same + ``hue`` level. + scale : float, optional + Scale factor for the plot elements. + {orient} + {color} + {palette} + {errwidth} + {capsize} + {ax_in} + + Returns + ------- + {ax_out} + + See Also + -------- + {barplot} + {catplot} + + Examples + -------- + + Draw a set of vertical point plots grouped by a categorical variable: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns + >>> sns.set_theme(style="darkgrid") + >>> tips = sns.load_dataset("tips") + >>> ax = sns.pointplot(x="time", y="total_bill", data=tips) + + Draw a set of vertical points with nested grouping by a two variables: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="time", y="total_bill", hue="smoker", + ... data=tips) + + Separate the points for different hue levels along the categorical axis: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="time", y="total_bill", hue="smoker", + ... data=tips, dodge=True) + + Use a different marker and line style for the hue levels: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="time", y="total_bill", hue="smoker", + ... data=tips, + ... markers=["o", "x"], + ... linestyles=["-", "--"]) + + Draw a set of horizontal points: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="tip", y="day", data=tips) + + Don't draw a line connecting each point: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="tip", y="day", data=tips, join=False) + + Use a different color for a single-layer plot: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="time", y="total_bill", data=tips, + ... color="#bb3f3f") + + Use a different color palette for the points: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="time", y="total_bill", hue="smoker", + ... data=tips, palette="Set2") + + Control point order by passing an explicit order: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="time", y="tip", data=tips, + ... order=["Dinner", "Lunch"]) + + Use median as the estimate of central tendency: + + .. plot:: + :context: close-figs + + >>> from numpy import median + >>> ax = sns.pointplot(x="day", y="tip", data=tips, estimator=median) + + Show the standard error of the mean with the error bars: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="day", y="tip", data=tips, ci=68) + + Show standard deviation of observations instead of a confidence interval: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="day", y="tip", data=tips, ci="sd") + + Add "caps" to the error bars: + + .. plot:: + :context: close-figs + + >>> ax = sns.pointplot(x="day", y="tip", data=tips, capsize=.2) + + Use :func:`catplot` to combine a :func:`pointplot` and a + :class:`FacetGrid`. This allows grouping within additional categorical + variables. Using :func:`catplot` is safer than using :class:`FacetGrid` + directly, as it ensures synchronization of variable order across facets: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="sex", y="total_bill", + ... hue="smoker", col="time", + ... data=tips, kind="point", + ... dodge=True, + ... height=4, aspect=.7); + + """).format(**_categorical_docs) + + +@_deprecate_positional_args +def countplot( + *, + x=None, y=None, + hue=None, data=None, + order=None, hue_order=None, + orient=None, color=None, palette=None, saturation=.75, + dodge=True, ax=None, **kwargs +): + + estimator = len + ci = None + n_boot = 0 + units = None + seed = None + errcolor = None + errwidth = None + capsize = None + + if x is None and y is not None: + orient = "h" + x = y + elif y is None and x is not None: + orient = "v" + y = x + elif x is not None and y is not None: + raise ValueError("Cannot pass values for both `x` and `y`") + + plotter = _CountPlotter( + x, y, hue, data, order, hue_order, + estimator, ci, n_boot, units, seed, + orient, color, palette, saturation, + errcolor, errwidth, capsize, dodge + ) + + plotter.value_label = "count" + + if ax is None: + ax = plt.gca() + + plotter.plot(ax, kwargs) + return ax + + +countplot.__doc__ = dedent("""\ + Show the counts of observations in each categorical bin using bars. + + A count plot can be thought of as a histogram across a categorical, instead + of quantitative, variable. The basic API and options are identical to those + for :func:`barplot`, so you can compare counts across nested variables. + + {main_api_narrative} + + {categorical_narrative} + + Parameters + ---------- + {input_params} + {categorical_data} + {order_vars} + {orient} + {color} + {palette} + {saturation} + {dodge} + {ax_in} + kwargs : key, value mappings + Other keyword arguments are passed through to + :meth:`matplotlib.axes.Axes.bar`. + + Returns + ------- + {ax_out} + + See Also + -------- + {barplot} + {catplot} + + Examples + -------- + + Show value counts for a single categorical variable: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns + >>> sns.set_theme(style="darkgrid") + >>> titanic = sns.load_dataset("titanic") + >>> ax = sns.countplot(x="class", data=titanic) + + Show value counts for two categorical variables: + + .. plot:: + :context: close-figs + + >>> ax = sns.countplot(x="class", hue="who", data=titanic) + + Plot the bars horizontally: + + .. plot:: + :context: close-figs + + >>> ax = sns.countplot(y="class", hue="who", data=titanic) + + Use a different color palette: + + .. plot:: + :context: close-figs + + >>> ax = sns.countplot(x="who", data=titanic, palette="Set3") + + Use :meth:`matplotlib.axes.Axes.bar` parameters to control the style. + + .. plot:: + :context: close-figs + + >>> ax = sns.countplot(x="who", data=titanic, + ... facecolor=(0, 0, 0, 0), + ... linewidth=5, + ... edgecolor=sns.color_palette("dark", 3)) + + Use :func:`catplot` to combine a :func:`countplot` and a + :class:`FacetGrid`. This allows grouping within additional categorical + variables. Using :func:`catplot` is safer than using :class:`FacetGrid` + directly, as it ensures synchronization of variable order across facets: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="class", hue="who", col="survived", + ... data=titanic, kind="count", + ... height=4, aspect=.7); + + """).format(**_categorical_docs) + + +def factorplot(*args, **kwargs): + """Deprecated; please use `catplot` instead.""" + + msg = ( + "The `factorplot` function has been renamed to `catplot`. The " + "original name will be removed in a future release. Please update " + "your code. Note that the default `kind` in `factorplot` (`'point'`) " + "has changed `'strip'` in `catplot`." + ) + warnings.warn(msg) + + if "size" in kwargs: + kwargs["height"] = kwargs.pop("size") + msg = ("The `size` parameter has been renamed to `height`; " + "please update your code.") + warnings.warn(msg, UserWarning) + + kwargs.setdefault("kind", "point") + + return catplot(*args, **kwargs) + + +@_deprecate_positional_args +def catplot( + *, + x=None, y=None, + hue=None, data=None, + row=None, col=None, # TODO move in front of data when * is enforced + col_wrap=None, estimator=np.mean, ci=95, n_boot=1000, + units=None, seed=None, order=None, hue_order=None, row_order=None, + col_order=None, kind="strip", height=5, aspect=1, + orient=None, color=None, palette=None, + legend=True, legend_out=True, sharex=True, sharey=True, + margin_titles=False, facet_kws=None, + **kwargs +): + + # Handle deprecations + if "size" in kwargs: + height = kwargs.pop("size") + msg = ("The `size` parameter has been renamed to `height`; " + "please update your code.") + warnings.warn(msg, UserWarning) + + # Determine the plotting function + try: + plot_func = globals()[kind + "plot"] + except KeyError: + err = "Plot kind '{}' is not recognized".format(kind) + raise ValueError(err) + + # Alias the input variables to determine categorical order and palette + # correctly in the case of a count plot + if kind == "count": + if x is None and y is not None: + x_, y_, orient = y, y, "h" + elif y is None and x is not None: + x_, y_, orient = x, x, "v" + else: + raise ValueError("Either `x` or `y` must be None for kind='count'") + else: + x_, y_ = x, y + + # Check for attempt to plot onto specific axes and warn + if "ax" in kwargs: + msg = ("catplot is a figure-level function and does not accept " + "target axes. You may wish to try {}".format(kind + "plot")) + warnings.warn(msg, UserWarning) + kwargs.pop("ax") + + # Determine the order for the whole dataset, which will be used in all + # facets to ensure representation of all data in the final plot + plotter_class = { + "box": _BoxPlotter, + "violin": _ViolinPlotter, + "boxen": _LVPlotter, + "bar": _BarPlotter, + "point": _PointPlotter, + "strip": _StripPlotter, + "swarm": _SwarmPlotter, + "count": _CountPlotter, + }[kind] + p = _CategoricalPlotter() + p.require_numeric = plotter_class.require_numeric + p.establish_variables(x_, y_, hue, data, orient, order, hue_order) + if ( + order is not None + or (sharex and p.orient == "v") + or (sharey and p.orient == "h") + ): + # Sync categorical axis between facets to have the same categories + order = p.group_names + elif color is None and hue is None: + msg = ( + "Setting `{}=False` with `color=None` may cause different levels of the " + "`{}` variable to share colors. This will change in a future version." + ) + if not sharex and p.orient == "v": + warnings.warn(msg.format("sharex", "x"), UserWarning) + if not sharey and p.orient == "h": + warnings.warn(msg.format("sharey", "y"), UserWarning) + + hue_order = p.hue_names + + # Determine the palette to use + # (FacetGrid will pass a value for ``color`` to the plotting function + # so we need to define ``palette`` to get default behavior for the + # categorical functions + p.establish_colors(color, palette, 1) + if kind != "point" or hue is not None: + palette = p.colors + + # Determine keyword arguments for the facets + facet_kws = {} if facet_kws is None else facet_kws + facet_kws.update( + data=data, row=row, col=col, + row_order=row_order, col_order=col_order, + col_wrap=col_wrap, height=height, aspect=aspect, + sharex=sharex, sharey=sharey, + legend_out=legend_out, margin_titles=margin_titles, + dropna=False, + ) + + # Determine keyword arguments for the plotting function + plot_kws = dict( + order=order, hue_order=hue_order, + orient=orient, color=color, palette=palette, + ) + plot_kws.update(kwargs) + + if kind in ["bar", "point"]: + plot_kws.update( + estimator=estimator, ci=ci, n_boot=n_boot, units=units, seed=seed, + ) + + # Initialize the facets + g = FacetGrid(**facet_kws) + + # Draw the plot onto the facets + g.map_dataframe(plot_func, x=x, y=y, hue=hue, **plot_kws) + + if p.orient == "h": + g.set_axis_labels(p.value_label, p.group_label) + else: + g.set_axis_labels(p.group_label, p.value_label) + + # Special case axis labels for a count type plot + if kind == "count": + if x is None: + g.set_axis_labels(x_var="count") + if y is None: + g.set_axis_labels(y_var="count") + + if legend and (hue is not None) and (hue not in [x, row, col]): + hue_order = list(map(utils.to_utf8, hue_order)) + g.add_legend(title=hue, label_order=hue_order) + + return g + + +catplot.__doc__ = dedent("""\ + Figure-level interface for drawing categorical plots onto a FacetGrid. + + This function provides access to several axes-level functions that + show the relationship between a numerical and one or more categorical + variables using one of several visual representations. The ``kind`` + parameter selects the underlying axes-level function to use: + + Categorical scatterplots: + + - :func:`stripplot` (with ``kind="strip"``; the default) + - :func:`swarmplot` (with ``kind="swarm"``) + + Categorical distribution plots: + + - :func:`boxplot` (with ``kind="box"``) + - :func:`violinplot` (with ``kind="violin"``) + - :func:`boxenplot` (with ``kind="boxen"``) + + Categorical estimate plots: + + - :func:`pointplot` (with ``kind="point"``) + - :func:`barplot` (with ``kind="bar"``) + - :func:`countplot` (with ``kind="count"``) + + Extra keyword arguments are passed to the underlying function, so you + should refer to the documentation for each to see kind-specific options. + + Note that unlike when using the axes-level functions directly, data must be + passed in a long-form DataFrame with variables specified by passing strings + to ``x``, ``y``, ``hue``, etc. + + As in the case with the underlying plot functions, if variables have a + ``categorical`` data type, the levels of the categorical variables, and + their order will be inferred from the objects. Otherwise you may have to + use alter the dataframe sorting or use the function parameters (``orient``, + ``order``, ``hue_order``, etc.) to set up the plot correctly. + + {categorical_narrative} + + After plotting, the :class:`FacetGrid` with the plot is returned and can + be used directly to tweak supporting plot details or add other layers. + + Parameters + ---------- + {string_input_params} + {long_form_data} + row, col : names of variables in ``data``, optional + Categorical variables that will determine the faceting of the grid. + {col_wrap} + {stat_api_params} + {order_vars} + row_order, col_order : lists of strings, optional + Order to organize the rows and/or columns of the grid in, otherwise the + orders are inferred from the data objects. + kind : str, optional + The kind of plot to draw, corresponds to the name of a categorical + axes-level plotting function. Options are: "strip", "swarm", "box", "violin", + "boxen", "point", "bar", or "count". + {height} + {aspect} + {orient} + {color} + {palette} + legend : bool, optional + If ``True`` and there is a ``hue`` variable, draw a legend on the plot. + {legend_out} + {share_xy} + {margin_titles} + facet_kws : dict, optional + Dictionary of other keyword arguments to pass to :class:`FacetGrid`. + kwargs : key, value pairings + Other keyword arguments are passed through to the underlying plotting + function. + + Returns + ------- + g : :class:`FacetGrid` + Returns the :class:`FacetGrid` object with the plot on it for further + tweaking. + + Examples + -------- + + Draw a single facet to use the :class:`FacetGrid` legend placement: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns + >>> sns.set_theme(style="ticks") + >>> exercise = sns.load_dataset("exercise") + >>> g = sns.catplot(x="time", y="pulse", hue="kind", data=exercise) + + Use a different plot kind to visualize the same data: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="time", y="pulse", hue="kind", + ... data=exercise, kind="violin") + + Facet along the columns to show a third categorical variable: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="time", y="pulse", hue="kind", + ... col="diet", data=exercise) + + Use a different height and aspect ratio for the facets: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="time", y="pulse", hue="kind", + ... col="diet", data=exercise, + ... height=5, aspect=.8) + + Make many column facets and wrap them into the rows of the grid: + + .. plot:: + :context: close-figs + + >>> titanic = sns.load_dataset("titanic") + >>> g = sns.catplot(x="alive", col="deck", col_wrap=4, + ... data=titanic[titanic.deck.notnull()], + ... kind="count", height=2.5, aspect=.8) + + Plot horizontally and pass other keyword arguments to the plot function: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="age", y="embark_town", + ... hue="sex", row="class", + ... data=titanic[titanic.embark_town.notnull()], + ... orient="h", height=2, aspect=3, palette="Set3", + ... kind="violin", dodge=True, cut=0, bw=.2) + + Use methods on the returned :class:`FacetGrid` to tweak the presentation: + + .. plot:: + :context: close-figs + + >>> g = sns.catplot(x="who", y="survived", col="class", + ... data=titanic, saturation=.5, + ... kind="bar", ci=None, aspect=.6) + >>> (g.set_axis_labels("", "Survival Rate") + ... .set_xticklabels(["Men", "Women", "Children"]) + ... .set_titles("{{col_name}} {{col_var}}") + ... .set(ylim=(0, 1)) + ... .despine(left=True)) #doctest: +ELLIPSIS + + + """).format(**_categorical_docs) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/cm.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/cm.py new file mode 100644 index 0000000000000000000000000000000000000000..4e39fe7a67fc592e0c4998c88ccf232d74f14e6e --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/cm.py @@ -0,0 +1,1585 @@ +from matplotlib import colors, cm as mpl_cm + + +_rocket_lut = [ + [ 0.01060815, 0.01808215, 0.10018654], + [ 0.01428972, 0.02048237, 0.10374486], + [ 0.01831941, 0.0229766 , 0.10738511], + [ 0.02275049, 0.02554464, 0.11108639], + [ 0.02759119, 0.02818316, 0.11483751], + [ 0.03285175, 0.03088792, 0.11863035], + [ 0.03853466, 0.03365771, 0.12245873], + [ 0.04447016, 0.03648425, 0.12631831], + [ 0.05032105, 0.03936808, 0.13020508], + [ 0.05611171, 0.04224835, 0.13411624], + [ 0.0618531 , 0.04504866, 0.13804929], + [ 0.06755457, 0.04778179, 0.14200206], + [ 0.0732236 , 0.05045047, 0.14597263], + [ 0.0788708 , 0.05305461, 0.14995981], + [ 0.08450105, 0.05559631, 0.15396203], + [ 0.09011319, 0.05808059, 0.15797687], + [ 0.09572396, 0.06050127, 0.16200507], + [ 0.10132312, 0.06286782, 0.16604287], + [ 0.10692823, 0.06517224, 0.17009175], + [ 0.1125315 , 0.06742194, 0.17414848], + [ 0.11813947, 0.06961499, 0.17821272], + [ 0.12375803, 0.07174938, 0.18228425], + [ 0.12938228, 0.07383015, 0.18636053], + [ 0.13501631, 0.07585609, 0.19044109], + [ 0.14066867, 0.0778224 , 0.19452676], + [ 0.14633406, 0.07973393, 0.1986151 ], + [ 0.15201338, 0.08159108, 0.20270523], + [ 0.15770877, 0.08339312, 0.20679668], + [ 0.16342174, 0.0851396 , 0.21088893], + [ 0.16915387, 0.08682996, 0.21498104], + [ 0.17489524, 0.08848235, 0.2190294 ], + [ 0.18065495, 0.09009031, 0.22303512], + [ 0.18643324, 0.09165431, 0.22699705], + [ 0.19223028, 0.09317479, 0.23091409], + [ 0.19804623, 0.09465217, 0.23478512], + [ 0.20388117, 0.09608689, 0.23860907], + [ 0.20973515, 0.09747934, 0.24238489], + [ 0.21560818, 0.09882993, 0.24611154], + [ 0.22150014, 0.10013944, 0.2497868 ], + [ 0.22741085, 0.10140876, 0.25340813], + [ 0.23334047, 0.10263737, 0.25697736], + [ 0.23928891, 0.10382562, 0.2604936 ], + [ 0.24525608, 0.10497384, 0.26395596], + [ 0.25124182, 0.10608236, 0.26736359], + [ 0.25724602, 0.10715148, 0.27071569], + [ 0.26326851, 0.1081815 , 0.27401148], + [ 0.26930915, 0.1091727 , 0.2772502 ], + [ 0.27536766, 0.11012568, 0.28043021], + [ 0.28144375, 0.11104133, 0.2835489 ], + [ 0.2875374 , 0.11191896, 0.28660853], + [ 0.29364846, 0.11275876, 0.2896085 ], + [ 0.29977678, 0.11356089, 0.29254823], + [ 0.30592213, 0.11432553, 0.29542718], + [ 0.31208435, 0.11505284, 0.29824485], + [ 0.31826327, 0.1157429 , 0.30100076], + [ 0.32445869, 0.11639585, 0.30369448], + [ 0.33067031, 0.11701189, 0.30632563], + [ 0.33689808, 0.11759095, 0.3088938 ], + [ 0.34314168, 0.11813362, 0.31139721], + [ 0.34940101, 0.11863987, 0.3138355 ], + [ 0.355676 , 0.11910909, 0.31620996], + [ 0.36196644, 0.1195413 , 0.31852037], + [ 0.36827206, 0.11993653, 0.32076656], + [ 0.37459292, 0.12029443, 0.32294825], + [ 0.38092887, 0.12061482, 0.32506528], + [ 0.38727975, 0.12089756, 0.3271175 ], + [ 0.39364518, 0.12114272, 0.32910494], + [ 0.40002537, 0.12134964, 0.33102734], + [ 0.40642019, 0.12151801, 0.33288464], + [ 0.41282936, 0.12164769, 0.33467689], + [ 0.41925278, 0.12173833, 0.33640407], + [ 0.42569057, 0.12178916, 0.33806605], + [ 0.43214263, 0.12179973, 0.33966284], + [ 0.43860848, 0.12177004, 0.34119475], + [ 0.44508855, 0.12169883, 0.34266151], + [ 0.45158266, 0.12158557, 0.34406324], + [ 0.45809049, 0.12142996, 0.34540024], + [ 0.46461238, 0.12123063, 0.34667231], + [ 0.47114798, 0.12098721, 0.34787978], + [ 0.47769736, 0.12069864, 0.34902273], + [ 0.48426077, 0.12036349, 0.35010104], + [ 0.49083761, 0.11998161, 0.35111537], + [ 0.49742847, 0.11955087, 0.35206533], + [ 0.50403286, 0.11907081, 0.35295152], + [ 0.51065109, 0.11853959, 0.35377385], + [ 0.51728314, 0.1179558 , 0.35453252], + [ 0.52392883, 0.11731817, 0.35522789], + [ 0.53058853, 0.11662445, 0.35585982], + [ 0.53726173, 0.11587369, 0.35642903], + [ 0.54394898, 0.11506307, 0.35693521], + [ 0.5506426 , 0.11420757, 0.35737863], + [ 0.55734473, 0.11330456, 0.35775059], + [ 0.56405586, 0.11235265, 0.35804813], + [ 0.57077365, 0.11135597, 0.35827146], + [ 0.5774991 , 0.11031233, 0.35841679], + [ 0.58422945, 0.10922707, 0.35848469], + [ 0.59096382, 0.10810205, 0.35847347], + [ 0.59770215, 0.10693774, 0.35838029], + [ 0.60444226, 0.10573912, 0.35820487], + [ 0.61118304, 0.10450943, 0.35794557], + [ 0.61792306, 0.10325288, 0.35760108], + [ 0.62466162, 0.10197244, 0.35716891], + [ 0.63139686, 0.10067417, 0.35664819], + [ 0.63812122, 0.09938212, 0.35603757], + [ 0.64483795, 0.0980891 , 0.35533555], + [ 0.65154562, 0.09680192, 0.35454107], + [ 0.65824241, 0.09552918, 0.3536529 ], + [ 0.66492652, 0.09428017, 0.3526697 ], + [ 0.67159578, 0.09306598, 0.35159077], + [ 0.67824099, 0.09192342, 0.3504148 ], + [ 0.684863 , 0.09085633, 0.34914061], + [ 0.69146268, 0.0898675 , 0.34776864], + [ 0.69803757, 0.08897226, 0.3462986 ], + [ 0.70457834, 0.0882129 , 0.34473046], + [ 0.71108138, 0.08761223, 0.3430635 ], + [ 0.7175507 , 0.08716212, 0.34129974], + [ 0.72398193, 0.08688725, 0.33943958], + [ 0.73035829, 0.0868623 , 0.33748452], + [ 0.73669146, 0.08704683, 0.33543669], + [ 0.74297501, 0.08747196, 0.33329799], + [ 0.74919318, 0.08820542, 0.33107204], + [ 0.75535825, 0.08919792, 0.32876184], + [ 0.76145589, 0.09050716, 0.32637117], + [ 0.76748424, 0.09213602, 0.32390525], + [ 0.77344838, 0.09405684, 0.32136808], + [ 0.77932641, 0.09634794, 0.31876642], + [ 0.78513609, 0.09892473, 0.31610488], + [ 0.79085854, 0.10184672, 0.313391 ], + [ 0.7965014 , 0.10506637, 0.31063031], + [ 0.80205987, 0.10858333, 0.30783 ], + [ 0.80752799, 0.11239964, 0.30499738], + [ 0.81291606, 0.11645784, 0.30213802], + [ 0.81820481, 0.12080606, 0.29926105], + [ 0.82341472, 0.12535343, 0.2963705 ], + [ 0.82852822, 0.13014118, 0.29347474], + [ 0.83355779, 0.13511035, 0.29057852], + [ 0.83850183, 0.14025098, 0.2876878 ], + [ 0.84335441, 0.14556683, 0.28480819], + [ 0.84813096, 0.15099892, 0.281943 ], + [ 0.85281737, 0.15657772, 0.27909826], + [ 0.85742602, 0.1622583 , 0.27627462], + [ 0.86196552, 0.16801239, 0.27346473], + [ 0.86641628, 0.17387796, 0.27070818], + [ 0.87079129, 0.17982114, 0.26797378], + [ 0.87507281, 0.18587368, 0.26529697], + [ 0.87925878, 0.19203259, 0.26268136], + [ 0.8833417 , 0.19830556, 0.26014181], + [ 0.88731387, 0.20469941, 0.25769539], + [ 0.89116859, 0.21121788, 0.2553592 ], + [ 0.89490337, 0.21785614, 0.25314362], + [ 0.8985026 , 0.22463251, 0.25108745], + [ 0.90197527, 0.23152063, 0.24918223], + [ 0.90530097, 0.23854541, 0.24748098], + [ 0.90848638, 0.24568473, 0.24598324], + [ 0.911533 , 0.25292623, 0.24470258], + [ 0.9144225 , 0.26028902, 0.24369359], + [ 0.91717106, 0.26773821, 0.24294137], + [ 0.91978131, 0.27526191, 0.24245973], + [ 0.92223947, 0.28287251, 0.24229568], + [ 0.92456587, 0.29053388, 0.24242622], + [ 0.92676657, 0.29823282, 0.24285536], + [ 0.92882964, 0.30598085, 0.24362274], + [ 0.93078135, 0.31373977, 0.24468803], + [ 0.93262051, 0.3215093 , 0.24606461], + [ 0.93435067, 0.32928362, 0.24775328], + [ 0.93599076, 0.33703942, 0.24972157], + [ 0.93752831, 0.34479177, 0.25199928], + [ 0.93899289, 0.35250734, 0.25452808], + [ 0.94036561, 0.36020899, 0.25734661], + [ 0.94167588, 0.36786594, 0.2603949 ], + [ 0.94291042, 0.37549479, 0.26369821], + [ 0.94408513, 0.3830811 , 0.26722004], + [ 0.94520419, 0.39062329, 0.27094924], + [ 0.94625977, 0.39813168, 0.27489742], + [ 0.94727016, 0.4055909 , 0.27902322], + [ 0.94823505, 0.41300424, 0.28332283], + [ 0.94914549, 0.42038251, 0.28780969], + [ 0.95001704, 0.42771398, 0.29244728], + [ 0.95085121, 0.43500005, 0.29722817], + [ 0.95165009, 0.44224144, 0.30214494], + [ 0.9524044 , 0.44944853, 0.3072105 ], + [ 0.95312556, 0.45661389, 0.31239776], + [ 0.95381595, 0.46373781, 0.31769923], + [ 0.95447591, 0.47082238, 0.32310953], + [ 0.95510255, 0.47787236, 0.32862553], + [ 0.95569679, 0.48489115, 0.33421404], + [ 0.95626788, 0.49187351, 0.33985601], + [ 0.95681685, 0.49882008, 0.34555431], + [ 0.9573439 , 0.50573243, 0.35130912], + [ 0.95784842, 0.51261283, 0.35711942], + [ 0.95833051, 0.51946267, 0.36298589], + [ 0.95879054, 0.52628305, 0.36890904], + [ 0.95922872, 0.53307513, 0.3748895 ], + [ 0.95964538, 0.53983991, 0.38092784], + [ 0.96004345, 0.54657593, 0.3870292 ], + [ 0.96042097, 0.55328624, 0.39319057], + [ 0.96077819, 0.55997184, 0.39941173], + [ 0.9611152 , 0.5666337 , 0.40569343], + [ 0.96143273, 0.57327231, 0.41203603], + [ 0.96173392, 0.57988594, 0.41844491], + [ 0.96201757, 0.58647675, 0.42491751], + [ 0.96228344, 0.59304598, 0.43145271], + [ 0.96253168, 0.5995944 , 0.43805131], + [ 0.96276513, 0.60612062, 0.44471698], + [ 0.96298491, 0.6126247 , 0.45145074], + [ 0.96318967, 0.61910879, 0.45824902], + [ 0.96337949, 0.6255736 , 0.46511271], + [ 0.96355923, 0.63201624, 0.47204746], + [ 0.96372785, 0.63843852, 0.47905028], + [ 0.96388426, 0.64484214, 0.4861196 ], + [ 0.96403203, 0.65122535, 0.4932578 ], + [ 0.96417332, 0.65758729, 0.50046894], + [ 0.9643063 , 0.66393045, 0.5077467 ], + [ 0.96443322, 0.67025402, 0.51509334], + [ 0.96455845, 0.67655564, 0.52251447], + [ 0.96467922, 0.68283846, 0.53000231], + [ 0.96479861, 0.68910113, 0.53756026], + [ 0.96492035, 0.69534192, 0.5451917 ], + [ 0.96504223, 0.7015636 , 0.5528892 ], + [ 0.96516917, 0.70776351, 0.5606593 ], + [ 0.96530224, 0.71394212, 0.56849894], + [ 0.96544032, 0.72010124, 0.57640375], + [ 0.96559206, 0.72623592, 0.58438387], + [ 0.96575293, 0.73235058, 0.59242739], + [ 0.96592829, 0.73844258, 0.60053991], + [ 0.96612013, 0.74451182, 0.60871954], + [ 0.96632832, 0.75055966, 0.61696136], + [ 0.96656022, 0.75658231, 0.62527295], + [ 0.96681185, 0.76258381, 0.63364277], + [ 0.96709183, 0.76855969, 0.64207921], + [ 0.96739773, 0.77451297, 0.65057302], + [ 0.96773482, 0.78044149, 0.65912731], + [ 0.96810471, 0.78634563, 0.66773889], + [ 0.96850919, 0.79222565, 0.6764046 ], + [ 0.96893132, 0.79809112, 0.68512266], + [ 0.96935926, 0.80395415, 0.69383201], + [ 0.9698028 , 0.80981139, 0.70252255], + [ 0.97025511, 0.81566605, 0.71120296], + [ 0.97071849, 0.82151775, 0.71987163], + [ 0.97120159, 0.82736371, 0.72851999], + [ 0.97169389, 0.83320847, 0.73716071], + [ 0.97220061, 0.83905052, 0.74578903], + [ 0.97272597, 0.84488881, 0.75440141], + [ 0.97327085, 0.85072354, 0.76299805], + [ 0.97383206, 0.85655639, 0.77158353], + [ 0.97441222, 0.86238689, 0.78015619], + [ 0.97501782, 0.86821321, 0.78871034], + [ 0.97564391, 0.87403763, 0.79725261], + [ 0.97628674, 0.87986189, 0.8057883 ], + [ 0.97696114, 0.88568129, 0.81430324], + [ 0.97765722, 0.89149971, 0.82280948], + [ 0.97837585, 0.89731727, 0.83130786], + [ 0.97912374, 0.90313207, 0.83979337], + [ 0.979891 , 0.90894778, 0.84827858], + [ 0.98067764, 0.91476465, 0.85676611], + [ 0.98137749, 0.92061729, 0.86536915] +] + + +_mako_lut = [ + [ 0.04503935, 0.01482344, 0.02092227], + [ 0.04933018, 0.01709292, 0.02535719], + [ 0.05356262, 0.01950702, 0.03018802], + [ 0.05774337, 0.02205989, 0.03545515], + [ 0.06188095, 0.02474764, 0.04115287], + [ 0.06598247, 0.0275665 , 0.04691409], + [ 0.07005374, 0.03051278, 0.05264306], + [ 0.07409947, 0.03358324, 0.05834631], + [ 0.07812339, 0.03677446, 0.06403249], + [ 0.08212852, 0.0400833 , 0.06970862], + [ 0.08611731, 0.04339148, 0.07538208], + [ 0.09009161, 0.04664706, 0.08105568], + [ 0.09405308, 0.04985685, 0.08673591], + [ 0.09800301, 0.05302279, 0.09242646], + [ 0.10194255, 0.05614641, 0.09813162], + [ 0.10587261, 0.05922941, 0.103854 ], + [ 0.1097942 , 0.06227277, 0.10959847], + [ 0.11370826, 0.06527747, 0.11536893], + [ 0.11761516, 0.06824548, 0.12116393], + [ 0.12151575, 0.07117741, 0.12698763], + [ 0.12541095, 0.07407363, 0.1328442 ], + [ 0.12930083, 0.07693611, 0.13873064], + [ 0.13317849, 0.07976988, 0.14465095], + [ 0.13701138, 0.08259683, 0.15060265], + [ 0.14079223, 0.08542126, 0.15659379], + [ 0.14452486, 0.08824175, 0.16262484], + [ 0.14820351, 0.09106304, 0.16869476], + [ 0.15183185, 0.09388372, 0.17480366], + [ 0.15540398, 0.09670855, 0.18094993], + [ 0.15892417, 0.09953561, 0.18713384], + [ 0.16238588, 0.10236998, 0.19335329], + [ 0.16579435, 0.10520905, 0.19960847], + [ 0.16914226, 0.10805832, 0.20589698], + [ 0.17243586, 0.11091443, 0.21221911], + [ 0.17566717, 0.11378321, 0.21857219], + [ 0.17884322, 0.11666074, 0.2249565 ], + [ 0.18195582, 0.11955283, 0.23136943], + [ 0.18501213, 0.12245547, 0.23781116], + [ 0.18800459, 0.12537395, 0.24427914], + [ 0.19093944, 0.1283047 , 0.25077369], + [ 0.19381092, 0.13125179, 0.25729255], + [ 0.19662307, 0.13421303, 0.26383543], + [ 0.19937337, 0.13719028, 0.27040111], + [ 0.20206187, 0.14018372, 0.27698891], + [ 0.20469116, 0.14319196, 0.28359861], + [ 0.20725547, 0.14621882, 0.29022775], + [ 0.20976258, 0.14925954, 0.29687795], + [ 0.21220409, 0.15231929, 0.30354703], + [ 0.21458611, 0.15539445, 0.31023563], + [ 0.21690827, 0.15848519, 0.31694355], + [ 0.21916481, 0.16159489, 0.32366939], + [ 0.2213631 , 0.16471913, 0.33041431], + [ 0.22349947, 0.1678599 , 0.33717781], + [ 0.2255714 , 0.1710185 , 0.34395925], + [ 0.22758415, 0.17419169, 0.35075983], + [ 0.22953569, 0.17738041, 0.35757941], + [ 0.23142077, 0.18058733, 0.3644173 ], + [ 0.2332454 , 0.18380872, 0.37127514], + [ 0.2350092 , 0.18704459, 0.3781528 ], + [ 0.23670785, 0.190297 , 0.38504973], + [ 0.23834119, 0.19356547, 0.39196711], + [ 0.23991189, 0.19684817, 0.39890581], + [ 0.24141903, 0.20014508, 0.4058667 ], + [ 0.24286214, 0.20345642, 0.4128484 ], + [ 0.24423453, 0.20678459, 0.41985299], + [ 0.24554109, 0.21012669, 0.42688124], + [ 0.2467815 , 0.21348266, 0.43393244], + [ 0.24795393, 0.21685249, 0.4410088 ], + [ 0.24905614, 0.22023618, 0.448113 ], + [ 0.25007383, 0.22365053, 0.45519562], + [ 0.25098926, 0.22710664, 0.46223892], + [ 0.25179696, 0.23060342, 0.46925447], + [ 0.25249346, 0.23414353, 0.47623196], + [ 0.25307401, 0.23772973, 0.48316271], + [ 0.25353152, 0.24136961, 0.49001976], + [ 0.25386167, 0.24506548, 0.49679407], + [ 0.25406082, 0.2488164 , 0.50348932], + [ 0.25412435, 0.25262843, 0.51007843], + [ 0.25404842, 0.25650743, 0.51653282], + [ 0.25383134, 0.26044852, 0.52286845], + [ 0.2534705 , 0.26446165, 0.52903422], + [ 0.25296722, 0.2685428 , 0.53503572], + [ 0.2523226 , 0.27269346, 0.54085315], + [ 0.25153974, 0.27691629, 0.54645752], + [ 0.25062402, 0.28120467, 0.55185939], + [ 0.24958205, 0.28556371, 0.55701246], + [ 0.24842386, 0.28998148, 0.56194601], + [ 0.24715928, 0.29446327, 0.56660884], + [ 0.24580099, 0.29899398, 0.57104399], + [ 0.24436202, 0.30357852, 0.57519929], + [ 0.24285591, 0.30819938, 0.57913247], + [ 0.24129828, 0.31286235, 0.58278615], + [ 0.23970131, 0.3175495 , 0.5862272 ], + [ 0.23807973, 0.32226344, 0.58941872], + [ 0.23644557, 0.32699241, 0.59240198], + [ 0.2348113 , 0.33173196, 0.59518282], + [ 0.23318874, 0.33648036, 0.59775543], + [ 0.2315855 , 0.34122763, 0.60016456], + [ 0.23001121, 0.34597357, 0.60240251], + [ 0.2284748 , 0.35071512, 0.6044784 ], + [ 0.22698081, 0.35544612, 0.60642528], + [ 0.22553305, 0.36016515, 0.60825252], + [ 0.22413977, 0.36487341, 0.60994938], + [ 0.22280246, 0.36956728, 0.61154118], + [ 0.22152555, 0.37424409, 0.61304472], + [ 0.22030752, 0.37890437, 0.61446646], + [ 0.2191538 , 0.38354668, 0.61581561], + [ 0.21806257, 0.38817169, 0.61709794], + [ 0.21703799, 0.39277882, 0.61831922], + [ 0.21607792, 0.39736958, 0.61948028], + [ 0.21518463, 0.40194196, 0.62059763], + [ 0.21435467, 0.40649717, 0.62167507], + [ 0.21358663, 0.41103579, 0.62271724], + [ 0.21288172, 0.41555771, 0.62373011], + [ 0.21223835, 0.42006355, 0.62471794], + [ 0.21165312, 0.42455441, 0.62568371], + [ 0.21112526, 0.42903064, 0.6266318 ], + [ 0.21065161, 0.43349321, 0.62756504], + [ 0.21023306, 0.43794288, 0.62848279], + [ 0.20985996, 0.44238227, 0.62938329], + [ 0.20951045, 0.44680966, 0.63030696], + [ 0.20916709, 0.45122981, 0.63124483], + [ 0.20882976, 0.45564335, 0.63219599], + [ 0.20849798, 0.46005094, 0.63315928], + [ 0.20817199, 0.46445309, 0.63413391], + [ 0.20785149, 0.46885041, 0.63511876], + [ 0.20753716, 0.47324327, 0.63611321], + [ 0.20722876, 0.47763224, 0.63711608], + [ 0.20692679, 0.48201774, 0.63812656], + [ 0.20663156, 0.48640018, 0.63914367], + [ 0.20634336, 0.49078002, 0.64016638], + [ 0.20606303, 0.49515755, 0.6411939 ], + [ 0.20578999, 0.49953341, 0.64222457], + [ 0.20552612, 0.50390766, 0.64325811], + [ 0.20527189, 0.50828072, 0.64429331], + [ 0.20502868, 0.51265277, 0.64532947], + [ 0.20479718, 0.51702417, 0.64636539], + [ 0.20457804, 0.52139527, 0.64739979], + [ 0.20437304, 0.52576622, 0.64843198], + [ 0.20418396, 0.53013715, 0.64946117], + [ 0.20401238, 0.53450825, 0.65048638], + [ 0.20385896, 0.53887991, 0.65150606], + [ 0.20372653, 0.54325208, 0.65251978], + [ 0.20361709, 0.5476249 , 0.6535266 ], + [ 0.20353258, 0.55199854, 0.65452542], + [ 0.20347472, 0.55637318, 0.655515 ], + [ 0.20344718, 0.56074869, 0.65649508], + [ 0.20345161, 0.56512531, 0.65746419], + [ 0.20349089, 0.56950304, 0.65842151], + [ 0.20356842, 0.57388184, 0.65936642], + [ 0.20368663, 0.57826181, 0.66029768], + [ 0.20384884, 0.58264293, 0.6612145 ], + [ 0.20405904, 0.58702506, 0.66211645], + [ 0.20431921, 0.59140842, 0.66300179], + [ 0.20463464, 0.59579264, 0.66387079], + [ 0.20500731, 0.60017798, 0.66472159], + [ 0.20544449, 0.60456387, 0.66555409], + [ 0.20596097, 0.60894927, 0.66636568], + [ 0.20654832, 0.61333521, 0.66715744], + [ 0.20721003, 0.61772167, 0.66792838], + [ 0.20795035, 0.62210845, 0.66867802], + [ 0.20877302, 0.62649546, 0.66940555], + [ 0.20968223, 0.63088252, 0.6701105 ], + [ 0.21068163, 0.63526951, 0.67079211], + [ 0.21177544, 0.63965621, 0.67145005], + [ 0.21298582, 0.64404072, 0.67208182], + [ 0.21430361, 0.64842404, 0.67268861], + [ 0.21572716, 0.65280655, 0.67326978], + [ 0.21726052, 0.65718791, 0.6738255 ], + [ 0.21890636, 0.66156803, 0.67435491], + [ 0.220668 , 0.66594665, 0.67485792], + [ 0.22255447, 0.67032297, 0.67533374], + [ 0.22458372, 0.67469531, 0.67578061], + [ 0.22673713, 0.67906542, 0.67620044], + [ 0.22901625, 0.6834332 , 0.67659251], + [ 0.23142316, 0.68779836, 0.67695703], + [ 0.23395924, 0.69216072, 0.67729378], + [ 0.23663857, 0.69651881, 0.67760151], + [ 0.23946645, 0.70087194, 0.67788018], + [ 0.24242624, 0.70522162, 0.67813088], + [ 0.24549008, 0.70957083, 0.67835215], + [ 0.24863372, 0.71392166, 0.67854868], + [ 0.25187832, 0.71827158, 0.67872193], + [ 0.25524083, 0.72261873, 0.67887024], + [ 0.25870947, 0.72696469, 0.67898912], + [ 0.26229238, 0.73130855, 0.67907645], + [ 0.26604085, 0.73564353, 0.67914062], + [ 0.26993099, 0.73997282, 0.67917264], + [ 0.27397488, 0.74429484, 0.67917096], + [ 0.27822463, 0.74860229, 0.67914468], + [ 0.28264201, 0.75290034, 0.67907959], + [ 0.2873016 , 0.75717817, 0.67899164], + [ 0.29215894, 0.76144162, 0.67886578], + [ 0.29729823, 0.76567816, 0.67871894], + [ 0.30268199, 0.76989232, 0.67853896], + [ 0.30835665, 0.77407636, 0.67833512], + [ 0.31435139, 0.77822478, 0.67811118], + [ 0.3206671 , 0.78233575, 0.67786729], + [ 0.32733158, 0.78640315, 0.67761027], + [ 0.33437168, 0.79042043, 0.67734882], + [ 0.34182112, 0.79437948, 0.67709394], + [ 0.34968889, 0.79827511, 0.67685638], + [ 0.35799244, 0.80210037, 0.67664969], + [ 0.36675371, 0.80584651, 0.67649539], + [ 0.3759816 , 0.80950627, 0.67641393], + [ 0.38566792, 0.81307432, 0.67642947], + [ 0.39579804, 0.81654592, 0.67656899], + [ 0.40634556, 0.81991799, 0.67686215], + [ 0.41730243, 0.82318339, 0.67735255], + [ 0.4285828 , 0.82635051, 0.6780564 ], + [ 0.44012728, 0.82942353, 0.67900049], + [ 0.45189421, 0.83240398, 0.68021733], + [ 0.46378379, 0.83530763, 0.6817062 ], + [ 0.47573199, 0.83814472, 0.68347352], + [ 0.48769865, 0.84092197, 0.68552698], + [ 0.49962354, 0.84365379, 0.68783929], + [ 0.5114027 , 0.8463718 , 0.69029789], + [ 0.52301693, 0.84908401, 0.69288545], + [ 0.53447549, 0.85179048, 0.69561066], + [ 0.54578602, 0.8544913 , 0.69848331], + [ 0.55695565, 0.85718723, 0.70150427], + [ 0.56798832, 0.85987893, 0.70468261], + [ 0.57888639, 0.86256715, 0.70802931], + [ 0.5896541 , 0.8652532 , 0.71154204], + [ 0.60028928, 0.86793835, 0.71523675], + [ 0.61079441, 0.87062438, 0.71910895], + [ 0.62116633, 0.87331311, 0.72317003], + [ 0.63140509, 0.87600675, 0.72741689], + [ 0.64150735, 0.87870746, 0.73185717], + [ 0.65147219, 0.8814179 , 0.73648495], + [ 0.66129632, 0.8841403 , 0.74130658], + [ 0.67097934, 0.88687758, 0.74631123], + [ 0.68051833, 0.88963189, 0.75150483], + [ 0.68991419, 0.89240612, 0.75687187], + [ 0.69916533, 0.89520211, 0.76241714], + [ 0.70827373, 0.89802257, 0.76812286], + [ 0.71723995, 0.90086891, 0.77399039], + [ 0.72606665, 0.90374337, 0.7800041 ], + [ 0.73475675, 0.90664718, 0.78615802], + [ 0.74331358, 0.90958151, 0.79244474], + [ 0.75174143, 0.91254787, 0.79884925], + [ 0.76004473, 0.91554656, 0.80536823], + [ 0.76827704, 0.91856549, 0.81196513], + [ 0.77647029, 0.921603 , 0.81855729], + [ 0.78462009, 0.92466151, 0.82514119], + [ 0.79273542, 0.92773848, 0.83172131], + [ 0.8008109 , 0.93083672, 0.83829355], + [ 0.80885107, 0.93395528, 0.84485982], + [ 0.81685878, 0.9370938 , 0.85142101], + [ 0.82483206, 0.94025378, 0.8579751 ], + [ 0.83277661, 0.94343371, 0.86452477], + [ 0.84069127, 0.94663473, 0.87106853], + [ 0.84857662, 0.9498573 , 0.8776059 ], + [ 0.8564431 , 0.95309792, 0.88414253], + [ 0.86429066, 0.95635719, 0.89067759], + [ 0.87218969, 0.95960708, 0.89725384] +] + + +_vlag_lut = [ + [ 0.13850039, 0.41331206, 0.74052025], + [ 0.15077609, 0.41762684, 0.73970427], + [ 0.16235219, 0.4219191 , 0.7389667 ], + [ 0.1733322 , 0.42619024, 0.73832537], + [ 0.18382538, 0.43044226, 0.73776764], + [ 0.19394034, 0.4346772 , 0.73725867], + [ 0.20367115, 0.43889576, 0.73685314], + [ 0.21313625, 0.44310003, 0.73648045], + [ 0.22231173, 0.44729079, 0.73619681], + [ 0.23125148, 0.45146945, 0.73597803], + [ 0.23998101, 0.45563715, 0.7358223 ], + [ 0.24853358, 0.45979489, 0.73571524], + [ 0.25691416, 0.4639437 , 0.73566943], + [ 0.26513894, 0.46808455, 0.73568319], + [ 0.27322194, 0.47221835, 0.73575497], + [ 0.28117543, 0.47634598, 0.73588332], + [ 0.28901021, 0.48046826, 0.73606686], + [ 0.2967358 , 0.48458597, 0.73630433], + [ 0.30436071, 0.48869986, 0.73659451], + [ 0.3118955 , 0.49281055, 0.73693255], + [ 0.31935389, 0.49691847, 0.73730851], + [ 0.32672701, 0.5010247 , 0.73774013], + [ 0.33402607, 0.50512971, 0.73821941], + [ 0.34125337, 0.50923419, 0.73874905], + [ 0.34840921, 0.51333892, 0.73933402], + [ 0.35551826, 0.51744353, 0.73994642], + [ 0.3625676 , 0.52154929, 0.74060763], + [ 0.36956356, 0.52565656, 0.74131327], + [ 0.37649902, 0.52976642, 0.74207698], + [ 0.38340273, 0.53387791, 0.74286286], + [ 0.39025859, 0.53799253, 0.7436962 ], + [ 0.39706821, 0.54211081, 0.744578 ], + [ 0.40384046, 0.54623277, 0.74549872], + [ 0.41058241, 0.55035849, 0.74645094], + [ 0.41728385, 0.55448919, 0.74745174], + [ 0.42395178, 0.55862494, 0.74849357], + [ 0.4305964 , 0.56276546, 0.74956387], + [ 0.4372044 , 0.56691228, 0.75068412], + [ 0.4437909 , 0.57106468, 0.75183427], + [ 0.45035117, 0.5752235 , 0.75302312], + [ 0.45687824, 0.57938983, 0.75426297], + [ 0.46339713, 0.58356191, 0.75551816], + [ 0.46988778, 0.58774195, 0.75682037], + [ 0.47635605, 0.59192986, 0.75816245], + [ 0.48281101, 0.5961252 , 0.75953212], + [ 0.4892374 , 0.60032986, 0.76095418], + [ 0.49566225, 0.60454154, 0.76238852], + [ 0.50206137, 0.60876307, 0.76387371], + [ 0.50845128, 0.61299312, 0.76538551], + [ 0.5148258 , 0.61723272, 0.76693475], + [ 0.52118385, 0.62148236, 0.76852436], + [ 0.52753571, 0.62574126, 0.77013939], + [ 0.53386831, 0.63001125, 0.77180152], + [ 0.54020159, 0.63429038, 0.7734803 ], + [ 0.54651272, 0.63858165, 0.77521306], + [ 0.55282975, 0.64288207, 0.77695608], + [ 0.55912585, 0.64719519, 0.77875327], + [ 0.56542599, 0.65151828, 0.78056551], + [ 0.57170924, 0.65585426, 0.78242747], + [ 0.57799572, 0.6602009 , 0.78430751], + [ 0.58426817, 0.66456073, 0.78623458], + [ 0.590544 , 0.66893178, 0.78818117], + [ 0.59680758, 0.67331643, 0.79017369], + [ 0.60307553, 0.67771273, 0.79218572], + [ 0.60934065, 0.68212194, 0.79422987], + [ 0.61559495, 0.68654548, 0.7963202 ], + [ 0.62185554, 0.69098125, 0.79842918], + [ 0.62810662, 0.69543176, 0.80058381], + [ 0.63436425, 0.69989499, 0.80275812], + [ 0.64061445, 0.70437326, 0.80497621], + [ 0.6468706 , 0.70886488, 0.80721641], + [ 0.65312213, 0.7133717 , 0.80949719], + [ 0.65937818, 0.71789261, 0.81180392], + [ 0.66563334, 0.72242871, 0.81414642], + [ 0.67189155, 0.72697967, 0.81651872], + [ 0.67815314, 0.73154569, 0.81892097], + [ 0.68441395, 0.73612771, 0.82136094], + [ 0.69068321, 0.74072452, 0.82382353], + [ 0.69694776, 0.7453385 , 0.82633199], + [ 0.70322431, 0.74996721, 0.8288583 ], + [ 0.70949595, 0.75461368, 0.83143221], + [ 0.7157774 , 0.75927574, 0.83402904], + [ 0.72206299, 0.76395461, 0.83665922], + [ 0.72835227, 0.76865061, 0.8393242 ], + [ 0.73465238, 0.7733628 , 0.84201224], + [ 0.74094862, 0.77809393, 0.84474951], + [ 0.74725683, 0.78284158, 0.84750915], + [ 0.75357103, 0.78760701, 0.85030217], + [ 0.75988961, 0.79239077, 0.85313207], + [ 0.76621987, 0.79719185, 0.85598668], + [ 0.77255045, 0.8020125 , 0.85888658], + [ 0.77889241, 0.80685102, 0.86181298], + [ 0.78524572, 0.81170768, 0.86476656], + [ 0.79159841, 0.81658489, 0.86776906], + [ 0.79796459, 0.82148036, 0.8707962 ], + [ 0.80434168, 0.82639479, 0.87385315], + [ 0.8107221 , 0.83132983, 0.87695392], + [ 0.81711301, 0.8362844 , 0.88008641], + [ 0.82351479, 0.84125863, 0.88325045], + [ 0.82992772, 0.84625263, 0.88644594], + [ 0.83634359, 0.85126806, 0.8896878 ], + [ 0.84277295, 0.85630293, 0.89295721], + [ 0.84921192, 0.86135782, 0.89626076], + [ 0.85566206, 0.866432 , 0.89959467], + [ 0.86211514, 0.87152627, 0.90297183], + [ 0.86857483, 0.87663856, 0.90638248], + [ 0.87504231, 0.88176648, 0.90981938], + [ 0.88151194, 0.88690782, 0.91328493], + [ 0.88797938, 0.89205857, 0.91677544], + [ 0.89443865, 0.89721298, 0.9202854 ], + [ 0.90088204, 0.90236294, 0.92380601], + [ 0.90729768, 0.90749778, 0.92732797], + [ 0.91367037, 0.91260329, 0.93083814], + [ 0.91998105, 0.91766106, 0.93431861], + [ 0.92620596, 0.92264789, 0.93774647], + [ 0.93231683, 0.9275351 , 0.94109192], + [ 0.93827772, 0.9322888 , 0.94432312], + [ 0.94404755, 0.93686925, 0.94740137], + [ 0.94958284, 0.94123072, 0.95027696], + [ 0.95482682, 0.9453245 , 0.95291103], + [ 0.9597248 , 0.94909728, 0.95525103], + [ 0.96422552, 0.95249273, 0.95723271], + [ 0.96826161, 0.95545812, 0.95882188], + [ 0.97178458, 0.95793984, 0.95995705], + [ 0.97474105, 0.95989142, 0.96059997], + [ 0.97708604, 0.96127366, 0.96071853], + [ 0.97877855, 0.96205832, 0.96030095], + [ 0.97978484, 0.96222949, 0.95935496], + [ 0.9805997 , 0.96155216, 0.95813083], + [ 0.98152619, 0.95993719, 0.95639322], + [ 0.9819726 , 0.95766608, 0.95399269], + [ 0.98191855, 0.9547873 , 0.95098107], + [ 0.98138514, 0.95134771, 0.94740644], + [ 0.98040845, 0.94739906, 0.94332125], + [ 0.97902107, 0.94300131, 0.93878672], + [ 0.97729348, 0.93820409, 0.93385135], + [ 0.9752533 , 0.933073 , 0.92858252], + [ 0.97297834, 0.92765261, 0.92302309], + [ 0.97049104, 0.92200317, 0.91723505], + [ 0.96784372, 0.91616744, 0.91126063], + [ 0.96507281, 0.91018664, 0.90514124], + [ 0.96222034, 0.90409203, 0.89890756], + [ 0.9593079 , 0.89791478, 0.89259122], + [ 0.95635626, 0.89167908, 0.88621654], + [ 0.95338303, 0.88540373, 0.87980238], + [ 0.95040174, 0.87910333, 0.87336339], + [ 0.94742246, 0.87278899, 0.86691076], + [ 0.94445249, 0.86646893, 0.86045277], + [ 0.94150476, 0.86014606, 0.85399191], + [ 0.93857394, 0.85382798, 0.84753642], + [ 0.93566206, 0.84751766, 0.84108935], + [ 0.93277194, 0.8412164 , 0.83465197], + [ 0.92990106, 0.83492672, 0.82822708], + [ 0.92704736, 0.82865028, 0.82181656], + [ 0.92422703, 0.82238092, 0.81541333], + [ 0.92142581, 0.81612448, 0.80902415], + [ 0.91864501, 0.80988032, 0.80264838], + [ 0.91587578, 0.80365187, 0.79629001], + [ 0.9131367 , 0.79743115, 0.78994 ], + [ 0.91041602, 0.79122265, 0.78360361], + [ 0.90771071, 0.78502727, 0.77728196], + [ 0.90501581, 0.77884674, 0.7709771 ], + [ 0.90235365, 0.77267117, 0.76467793], + [ 0.8997019 , 0.76650962, 0.75839484], + [ 0.89705346, 0.76036481, 0.752131 ], + [ 0.89444021, 0.75422253, 0.74587047], + [ 0.89183355, 0.74809474, 0.73962689], + [ 0.88923216, 0.74198168, 0.73340061], + [ 0.88665892, 0.73587283, 0.72717995], + [ 0.88408839, 0.72977904, 0.72097718], + [ 0.88153537, 0.72369332, 0.71478461], + [ 0.87899389, 0.7176179 , 0.70860487], + [ 0.87645157, 0.71155805, 0.7024439 ], + [ 0.8739399 , 0.70549893, 0.6962854 ], + [ 0.87142626, 0.6994551 , 0.69014561], + [ 0.8689268 , 0.69341868, 0.68401597], + [ 0.86643562, 0.687392 , 0.67789917], + [ 0.86394434, 0.68137863, 0.67179927], + [ 0.86147586, 0.67536728, 0.665704 ], + [ 0.85899928, 0.66937226, 0.6596292 ], + [ 0.85654668, 0.66337773, 0.6535577 ], + [ 0.85408818, 0.65739772, 0.64750494], + [ 0.85164413, 0.65142189, 0.64145983], + [ 0.84920091, 0.6454565 , 0.63542932], + [ 0.84676427, 0.63949827, 0.62941 ], + [ 0.84433231, 0.63354773, 0.62340261], + [ 0.84190106, 0.62760645, 0.61740899], + [ 0.83947935, 0.62166951, 0.61142404], + [ 0.8370538 , 0.61574332, 0.60545478], + [ 0.83463975, 0.60981951, 0.59949247], + [ 0.83221877, 0.60390724, 0.593547 ], + [ 0.82980985, 0.59799607, 0.58760751], + [ 0.82740268, 0.59209095, 0.58167944], + [ 0.82498638, 0.5861973 , 0.57576866], + [ 0.82258181, 0.5803034 , 0.56986307], + [ 0.82016611, 0.57442123, 0.56397539], + [ 0.81776305, 0.56853725, 0.55809173], + [ 0.81534551, 0.56266602, 0.55222741], + [ 0.81294293, 0.55679056, 0.5463651 ], + [ 0.81052113, 0.55092973, 0.54052443], + [ 0.80811509, 0.54506305, 0.53468464], + [ 0.80568952, 0.53921036, 0.52886622], + [ 0.80327506, 0.53335335, 0.52305077], + [ 0.80084727, 0.52750583, 0.51725256], + [ 0.79842217, 0.5216578 , 0.51146173], + [ 0.79599382, 0.51581223, 0.50568155], + [ 0.79355781, 0.50997127, 0.49991444], + [ 0.79112596, 0.50412707, 0.49415289], + [ 0.78867442, 0.49829386, 0.48841129], + [ 0.7862306 , 0.49245398, 0.48267247], + [ 0.7837687 , 0.48662309, 0.47695216], + [ 0.78130809, 0.4807883 , 0.47123805], + [ 0.77884467, 0.47495151, 0.46553236], + [ 0.77636283, 0.46912235, 0.45984473], + [ 0.77388383, 0.46328617, 0.45416141], + [ 0.77138912, 0.45745466, 0.44849398], + [ 0.76888874, 0.45162042, 0.44283573], + [ 0.76638802, 0.44577901, 0.43718292], + [ 0.76386116, 0.43994762, 0.43155211], + [ 0.76133542, 0.43410655, 0.42592523], + [ 0.75880631, 0.42825801, 0.42030488], + [ 0.75624913, 0.42241905, 0.41470727], + [ 0.7536919 , 0.41656866, 0.40911347], + [ 0.75112748, 0.41071104, 0.40352792], + [ 0.74854331, 0.40485474, 0.3979589 ], + [ 0.74594723, 0.39899309, 0.39240088], + [ 0.74334332, 0.39312199, 0.38685075], + [ 0.74073277, 0.38723941, 0.3813074 ], + [ 0.73809409, 0.38136133, 0.37578553], + [ 0.73544692, 0.37547129, 0.37027123], + [ 0.73278943, 0.36956954, 0.36476549], + [ 0.73011829, 0.36365761, 0.35927038], + [ 0.72743485, 0.35773314, 0.35378465], + [ 0.72472722, 0.35180504, 0.34831662], + [ 0.72200473, 0.34586421, 0.34285937], + [ 0.71927052, 0.33990649, 0.33741033], + [ 0.71652049, 0.33393396, 0.33197219], + [ 0.71375362, 0.32794602, 0.32654545], + [ 0.71096951, 0.32194148, 0.32113016], + [ 0.70816772, 0.31591904, 0.31572637], + [ 0.70534784, 0.30987734, 0.31033414], + [ 0.70250944, 0.30381489, 0.30495353], + [ 0.69965211, 0.2977301 , 0.2995846 ], + [ 0.6967754 , 0.29162126, 0.29422741], + [ 0.69388446, 0.28548074, 0.28887769], + [ 0.69097561, 0.2793096 , 0.28353795], + [ 0.68803513, 0.27311993, 0.27821876], + [ 0.6850794 , 0.26689144, 0.27290694], + [ 0.682108 , 0.26062114, 0.26760246], + [ 0.67911013, 0.2543177 , 0.26231367], + [ 0.67609393, 0.24796818, 0.25703372], + [ 0.67305921, 0.24156846, 0.25176238], + [ 0.67000176, 0.23511902, 0.24650278], + [ 0.66693423, 0.22859879, 0.24124404], + [ 0.6638441 , 0.22201742, 0.2359961 ], + [ 0.66080672, 0.21526712, 0.23069468] +] + + +_icefire_lut = [ + [ 0.73936227, 0.90443867, 0.85757238], + [ 0.72888063, 0.89639109, 0.85488394], + [ 0.71834255, 0.88842162, 0.8521605 ], + [ 0.70773866, 0.88052939, 0.849422 ], + [ 0.69706215, 0.87271313, 0.84668315], + [ 0.68629021, 0.86497329, 0.84398721], + [ 0.67543654, 0.85730617, 0.84130969], + [ 0.66448539, 0.84971123, 0.83868005], + [ 0.65342679, 0.84218728, 0.83611512], + [ 0.64231804, 0.83471867, 0.83358584], + [ 0.63117745, 0.827294 , 0.83113431], + [ 0.62000484, 0.81991069, 0.82876741], + [ 0.60879435, 0.81256797, 0.82648905], + [ 0.59754118, 0.80526458, 0.82430414], + [ 0.58624247, 0.79799884, 0.82221573], + [ 0.57489525, 0.7907688 , 0.82022901], + [ 0.56349779, 0.78357215, 0.81834861], + [ 0.55204294, 0.77640827, 0.81657563], + [ 0.54052516, 0.76927562, 0.81491462], + [ 0.52894085, 0.76217215, 0.81336913], + [ 0.51728854, 0.75509528, 0.81194156], + [ 0.50555676, 0.74804469, 0.81063503], + [ 0.49373871, 0.7410187 , 0.80945242], + [ 0.48183174, 0.73401449, 0.80839675], + [ 0.46982587, 0.72703075, 0.80747097], + [ 0.45770893, 0.72006648, 0.80667756], + [ 0.44547249, 0.71311941, 0.80601991], + [ 0.43318643, 0.70617126, 0.80549278], + [ 0.42110294, 0.69916972, 0.80506683], + [ 0.40925101, 0.69211059, 0.80473246], + [ 0.3976693 , 0.68498786, 0.80448272], + [ 0.38632002, 0.67781125, 0.80431024], + [ 0.37523981, 0.67057537, 0.80420832], + [ 0.36442578, 0.66328229, 0.80417474], + [ 0.35385939, 0.65593699, 0.80420591], + [ 0.34358916, 0.64853177, 0.8043 ], + [ 0.33355526, 0.64107876, 0.80445484], + [ 0.32383062, 0.63356578, 0.80467091], + [ 0.31434372, 0.62600624, 0.8049475 ], + [ 0.30516161, 0.618389 , 0.80528692], + [ 0.29623491, 0.61072284, 0.80569021], + [ 0.28759072, 0.60300319, 0.80616055], + [ 0.27923924, 0.59522877, 0.80669803], + [ 0.27114651, 0.5874047 , 0.80730545], + [ 0.26337153, 0.57952055, 0.80799113], + [ 0.25588696, 0.57157984, 0.80875922], + [ 0.248686 , 0.56358255, 0.80961366], + [ 0.24180668, 0.55552289, 0.81055123], + [ 0.23526251, 0.54739477, 0.8115939 ], + [ 0.22921445, 0.53918506, 0.81267292], + [ 0.22397687, 0.53086094, 0.8137141 ], + [ 0.21977058, 0.52241482, 0.81457651], + [ 0.21658989, 0.51384321, 0.81528511], + [ 0.21452772, 0.50514155, 0.81577278], + [ 0.21372783, 0.49630865, 0.81589566], + [ 0.21409503, 0.48734861, 0.81566163], + [ 0.2157176 , 0.47827123, 0.81487615], + [ 0.21842857, 0.46909168, 0.81351614], + [ 0.22211705, 0.45983212, 0.81146983], + [ 0.22665681, 0.45052233, 0.80860217], + [ 0.23176013, 0.44119137, 0.80494325], + [ 0.23727775, 0.43187704, 0.80038017], + [ 0.24298285, 0.42261123, 0.79493267], + [ 0.24865068, 0.41341842, 0.78869164], + [ 0.25423116, 0.40433127, 0.78155831], + [ 0.25950239, 0.39535521, 0.77376848], + [ 0.2644736 , 0.38651212, 0.76524809], + [ 0.26901584, 0.37779582, 0.75621942], + [ 0.27318141, 0.36922056, 0.746605 ], + [ 0.27690355, 0.3607736 , 0.73659374], + [ 0.28023585, 0.35244234, 0.72622103], + [ 0.28306009, 0.34438449, 0.71500731], + [ 0.28535896, 0.33660243, 0.70303975], + [ 0.28708711, 0.32912157, 0.69034504], + [ 0.28816354, 0.32200604, 0.67684067], + [ 0.28862749, 0.31519824, 0.66278813], + [ 0.28847904, 0.30869064, 0.6482815 ], + [ 0.28770912, 0.30250126, 0.63331265], + [ 0.28640325, 0.29655509, 0.61811374], + [ 0.28458943, 0.29082155, 0.60280913], + [ 0.28233561, 0.28527482, 0.58742866], + [ 0.27967038, 0.2798938 , 0.57204225], + [ 0.27665361, 0.27465357, 0.55667809], + [ 0.27332564, 0.2695165 , 0.54145387], + [ 0.26973851, 0.26447054, 0.52634916], + [ 0.2659204 , 0.25949691, 0.511417 ], + [ 0.26190145, 0.25458123, 0.49668768], + [ 0.2577151 , 0.24971691, 0.48214874], + [ 0.25337618, 0.24490494, 0.46778758], + [ 0.24890842, 0.24013332, 0.45363816], + [ 0.24433654, 0.23539226, 0.4397245 ], + [ 0.23967922, 0.23067729, 0.4260591 ], + [ 0.23495608, 0.22598894, 0.41262952], + [ 0.23018113, 0.22132414, 0.39945577], + [ 0.22534609, 0.21670847, 0.38645794], + [ 0.22048761, 0.21211723, 0.37372555], + [ 0.2156198 , 0.20755389, 0.36125301], + [ 0.21074637, 0.20302717, 0.34903192], + [ 0.20586893, 0.19855368, 0.33701661], + [ 0.20101757, 0.19411573, 0.32529173], + [ 0.19619947, 0.18972425, 0.31383846], + [ 0.19140726, 0.18540157, 0.30260777], + [ 0.1866769 , 0.1811332 , 0.29166583], + [ 0.18201285, 0.17694992, 0.28088776], + [ 0.17745228, 0.17282141, 0.27044211], + [ 0.17300684, 0.16876921, 0.26024893], + [ 0.16868273, 0.16479861, 0.25034479], + [ 0.16448691, 0.16091728, 0.24075373], + [ 0.16043195, 0.15714351, 0.23141745], + [ 0.15652427, 0.15348248, 0.22238175], + [ 0.15277065, 0.14994111, 0.21368395], + [ 0.14918274, 0.14653431, 0.20529486], + [ 0.14577095, 0.14327403, 0.19720829], + [ 0.14254381, 0.14016944, 0.18944326], + [ 0.13951035, 0.13723063, 0.18201072], + [ 0.13667798, 0.13446606, 0.17493774], + [ 0.13405762, 0.13188822, 0.16820842], + [ 0.13165767, 0.12950667, 0.16183275], + [ 0.12948748, 0.12733187, 0.15580631], + [ 0.12755435, 0.1253723 , 0.15014098], + [ 0.12586516, 0.12363617, 0.1448459 ], + [ 0.12442647, 0.12213143, 0.13992571], + [ 0.12324241, 0.12086419, 0.13539995], + [ 0.12232067, 0.11984278, 0.13124644], + [ 0.12166209, 0.11907077, 0.12749671], + [ 0.12126982, 0.11855309, 0.12415079], + [ 0.12114244, 0.11829179, 0.1212385 ], + [ 0.12127766, 0.11828837, 0.11878534], + [ 0.12284806, 0.1179729 , 0.11772022], + [ 0.12619498, 0.11721796, 0.11770203], + [ 0.129968 , 0.11663788, 0.11792377], + [ 0.13410011, 0.11625146, 0.11839138], + [ 0.13855459, 0.11606618, 0.11910584], + [ 0.14333775, 0.11607038, 0.1200606 ], + [ 0.148417 , 0.11626929, 0.12125453], + [ 0.15377389, 0.11666192, 0.12268364], + [ 0.15941427, 0.11723486, 0.12433911], + [ 0.16533376, 0.11797856, 0.12621303], + [ 0.17152547, 0.11888403, 0.12829735], + [ 0.17797765, 0.11994436, 0.13058435], + [ 0.18468769, 0.12114722, 0.13306426], + [ 0.19165663, 0.12247737, 0.13572616], + [ 0.19884415, 0.12394381, 0.1385669 ], + [ 0.20627181, 0.12551883, 0.14157124], + [ 0.21394877, 0.12718055, 0.14472604], + [ 0.22184572, 0.12893119, 0.14802579], + [ 0.22994394, 0.13076731, 0.15146314], + [ 0.23823937, 0.13267611, 0.15502793], + [ 0.24676041, 0.13462172, 0.15870321], + [ 0.25546457, 0.13661751, 0.16248722], + [ 0.26433628, 0.13865956, 0.16637301], + [ 0.27341345, 0.14070412, 0.17034221], + [ 0.28264773, 0.14277192, 0.1743957 ], + [ 0.29202272, 0.14486161, 0.17852793], + [ 0.30159648, 0.14691224, 0.1827169 ], + [ 0.31129002, 0.14897583, 0.18695213], + [ 0.32111555, 0.15103351, 0.19119629], + [ 0.33107961, 0.1530674 , 0.19543758], + [ 0.34119892, 0.15504762, 0.1996803 ], + [ 0.35142388, 0.15701131, 0.20389086], + [ 0.36178937, 0.1589124 , 0.20807639], + [ 0.37229381, 0.16073993, 0.21223189], + [ 0.38288348, 0.16254006, 0.2163249 ], + [ 0.39359592, 0.16426336, 0.22036577], + [ 0.40444332, 0.16588767, 0.22434027], + [ 0.41537995, 0.16745325, 0.2282297 ], + [ 0.42640867, 0.16894939, 0.23202755], + [ 0.43754706, 0.17034847, 0.23572899], + [ 0.44878564, 0.1716535 , 0.23932344], + [ 0.4601126 , 0.17287365, 0.24278607], + [ 0.47151732, 0.17401641, 0.24610337], + [ 0.48300689, 0.17506676, 0.2492737 ], + [ 0.49458302, 0.17601892, 0.25227688], + [ 0.50623876, 0.17687777, 0.255096 ], + [ 0.5179623 , 0.17765528, 0.2577162 ], + [ 0.52975234, 0.17835232, 0.2601134 ], + [ 0.54159776, 0.17898292, 0.26226847], + [ 0.55348804, 0.17956232, 0.26416003], + [ 0.56541729, 0.18010175, 0.26575971], + [ 0.57736669, 0.180631 , 0.26704888], + [ 0.58932081, 0.18117827, 0.26800409], + [ 0.60127582, 0.18175888, 0.26858488], + [ 0.61319563, 0.1824336 , 0.2687872 ], + [ 0.62506376, 0.18324015, 0.26858301], + [ 0.63681202, 0.18430173, 0.26795276], + [ 0.64842603, 0.18565472, 0.26689463], + [ 0.65988195, 0.18734638, 0.26543435], + [ 0.67111966, 0.18948885, 0.26357955], + [ 0.68209194, 0.19216636, 0.26137175], + [ 0.69281185, 0.19535326, 0.25887063], + [ 0.70335022, 0.19891271, 0.25617971], + [ 0.71375229, 0.20276438, 0.25331365], + [ 0.72401436, 0.20691287, 0.25027366], + [ 0.73407638, 0.21145051, 0.24710661], + [ 0.74396983, 0.21631913, 0.24380715], + [ 0.75361506, 0.22163653, 0.24043996], + [ 0.7630579 , 0.22731637, 0.23700095], + [ 0.77222228, 0.23346231, 0.23356628], + [ 0.78115441, 0.23998404, 0.23013825], + [ 0.78979746, 0.24694858, 0.22678822], + [ 0.79819286, 0.25427223, 0.22352658], + [ 0.80630444, 0.26198807, 0.22040877], + [ 0.81417437, 0.27001406, 0.21744645], + [ 0.82177364, 0.27837336, 0.21468316], + [ 0.82915955, 0.28696963, 0.21210766], + [ 0.83628628, 0.2958499 , 0.20977813], + [ 0.84322168, 0.30491136, 0.20766435], + [ 0.84995458, 0.31415945, 0.2057863 ], + [ 0.85648867, 0.32358058, 0.20415327], + [ 0.86286243, 0.33312058, 0.20274969], + [ 0.86908321, 0.34276705, 0.20157271], + [ 0.87512876, 0.3525416 , 0.20064949], + [ 0.88100349, 0.36243385, 0.19999078], + [ 0.8866469 , 0.37249496, 0.1997976 ], + [ 0.89203964, 0.38273475, 0.20013431], + [ 0.89713496, 0.39318156, 0.20121514], + [ 0.90195099, 0.40380687, 0.20301555], + [ 0.90648379, 0.41460191, 0.20558847], + [ 0.9106967 , 0.42557857, 0.20918529], + [ 0.91463791, 0.43668557, 0.21367954], + [ 0.91830723, 0.44790913, 0.21916352], + [ 0.92171507, 0.45922856, 0.22568002], + [ 0.92491786, 0.4705936 , 0.23308207], + [ 0.92790792, 0.48200153, 0.24145932], + [ 0.93073701, 0.49341219, 0.25065486], + [ 0.93343918, 0.5048017 , 0.26056148], + [ 0.93602064, 0.51616486, 0.27118485], + [ 0.93850535, 0.52748892, 0.28242464], + [ 0.94092933, 0.53875462, 0.29416042], + [ 0.94330011, 0.5499628 , 0.30634189], + [ 0.94563159, 0.56110987, 0.31891624], + [ 0.94792955, 0.57219822, 0.33184256], + [ 0.95020929, 0.5832232 , 0.34508419], + [ 0.95247324, 0.59419035, 0.35859866], + [ 0.95471709, 0.60510869, 0.37236035], + [ 0.95698411, 0.61595766, 0.38629631], + [ 0.95923863, 0.62676473, 0.40043317], + [ 0.9615041 , 0.6375203 , 0.41474106], + [ 0.96371553, 0.64826619, 0.42928335], + [ 0.96591497, 0.65899621, 0.44380444], + [ 0.96809871, 0.66971662, 0.45830232], + [ 0.9702495 , 0.6804394 , 0.47280492], + [ 0.9723881 , 0.69115622, 0.48729272], + [ 0.97450723, 0.70187358, 0.50178034], + [ 0.9766108 , 0.712592 , 0.51626837], + [ 0.97871716, 0.72330511, 0.53074053], + [ 0.98082222, 0.73401769, 0.54520694], + [ 0.9829001 , 0.74474445, 0.5597019 ], + [ 0.98497466, 0.75547635, 0.57420239], + [ 0.98705581, 0.76621129, 0.58870185], + [ 0.98913325, 0.77695637, 0.60321626], + [ 0.99119918, 0.78771716, 0.61775821], + [ 0.9932672 , 0.79848979, 0.63231691], + [ 0.99535958, 0.80926704, 0.64687278], + [ 0.99740544, 0.82008078, 0.66150571], + [ 0.9992197 , 0.83100723, 0.6764127 ] +] + + +_flare_lut = [ + [0.92907237, 0.68878959, 0.50411509], + [0.92891402, 0.68494686, 0.50173994], + [0.92864754, 0.68116207, 0.4993754], + [0.92836112, 0.67738527, 0.49701572], + [0.9280599, 0.67361354, 0.49466044], + [0.92775569, 0.66983999, 0.49230866], + [0.9274375, 0.66607098, 0.48996097], + [0.927111, 0.66230315, 0.48761688], + [0.92677996, 0.6585342, 0.485276], + [0.92644317, 0.65476476, 0.48293832], + [0.92609759, 0.65099658, 0.48060392], + [0.925747, 0.64722729, 0.47827244], + [0.92539502, 0.64345456, 0.47594352], + [0.92503106, 0.6396848, 0.47361782], + [0.92466877, 0.6359095, 0.47129427], + [0.92429828, 0.63213463, 0.46897349], + [0.92392172, 0.62835879, 0.46665526], + [0.92354597, 0.62457749, 0.46433898], + [0.9231622, 0.6207962, 0.46202524], + [0.92277222, 0.61701365, 0.45971384], + [0.92237978, 0.61322733, 0.45740444], + [0.92198615, 0.60943622, 0.45509686], + [0.92158735, 0.60564276, 0.45279137], + [0.92118373, 0.60184659, 0.45048789], + [0.92077582, 0.59804722, 0.44818634], + [0.92036413, 0.59424414, 0.44588663], + [0.91994924, 0.5904368, 0.44358868], + [0.91952943, 0.58662619, 0.4412926], + [0.91910675, 0.58281075, 0.43899817], + [0.91868096, 0.57899046, 0.4367054], + [0.91825103, 0.57516584, 0.43441436], + [0.91781857, 0.57133556, 0.43212486], + [0.9173814, 0.56750099, 0.4298371], + [0.91694139, 0.56366058, 0.42755089], + [0.91649756, 0.55981483, 0.42526631], + [0.91604942, 0.55596387, 0.42298339], + [0.9155979, 0.55210684, 0.42070204], + [0.9151409, 0.54824485, 0.4184247], + [0.91466138, 0.54438817, 0.41617858], + [0.91416896, 0.54052962, 0.41396347], + [0.91366559, 0.53666778, 0.41177769], + [0.91315173, 0.53280208, 0.40962196], + [0.91262605, 0.52893336, 0.40749715], + [0.91208866, 0.52506133, 0.40540404], + [0.91153952, 0.52118582, 0.40334346], + [0.91097732, 0.51730767, 0.4013163], + [0.910403, 0.51342591, 0.39932342], + [0.90981494, 0.50954168, 0.39736571], + [0.90921368, 0.5056543, 0.39544411], + [0.90859797, 0.50176463, 0.39355952], + [0.90796841, 0.49787195, 0.39171297], + [0.90732341, 0.4939774, 0.38990532], + [0.90666382, 0.49008006, 0.38813773], + [0.90598815, 0.486181, 0.38641107], + [0.90529624, 0.48228017, 0.38472641], + [0.90458808, 0.47837738, 0.38308489], + [0.90386248, 0.47447348, 0.38148746], + [0.90311921, 0.4705685, 0.37993524], + [0.90235809, 0.46666239, 0.37842943], + [0.90157824, 0.46275577, 0.37697105], + [0.90077904, 0.45884905, 0.37556121], + [0.89995995, 0.45494253, 0.37420106], + [0.89912041, 0.4510366, 0.37289175], + [0.8982602, 0.44713126, 0.37163458], + [0.89737819, 0.44322747, 0.37043052], + [0.89647387, 0.43932557, 0.36928078], + [0.89554477, 0.43542759, 0.36818855], + [0.89458871, 0.4315354, 0.36715654], + [0.89360794, 0.42764714, 0.36618273], + [0.89260152, 0.42376366, 0.36526813], + [0.8915687, 0.41988565, 0.36441384], + [0.89050882, 0.41601371, 0.36362102], + [0.8894159, 0.41215334, 0.36289639], + [0.888292, 0.40830288, 0.36223756], + [0.88713784, 0.40446193, 0.36164328], + [0.88595253, 0.40063149, 0.36111438], + [0.88473115, 0.39681635, 0.3606566], + [0.88347246, 0.39301805, 0.36027074], + [0.88217931, 0.38923439, 0.35995244], + [0.880851, 0.38546632, 0.35970244], + [0.87947728, 0.38172422, 0.35953127], + [0.87806542, 0.37800172, 0.35942941], + [0.87661509, 0.37429964, 0.35939659], + [0.87511668, 0.37062819, 0.35944178], + [0.87357554, 0.36698279, 0.35955811], + [0.87199254, 0.3633634, 0.35974223], + [0.87035691, 0.35978174, 0.36000516], + [0.86867647, 0.35623087, 0.36033559], + [0.86694949, 0.35271349, 0.36073358], + [0.86516775, 0.34923921, 0.36120624], + [0.86333996, 0.34580008, 0.36174113], + [0.86145909, 0.3424046, 0.36234402], + [0.85952586, 0.33905327, 0.36301129], + [0.85754536, 0.33574168, 0.36373567], + [0.855514, 0.33247568, 0.36451271], + [0.85344392, 0.32924217, 0.36533344], + [0.8513284, 0.32604977, 0.36620106], + [0.84916723, 0.32289973, 0.36711424], + [0.84696243, 0.31979068, 0.36806976], + [0.84470627, 0.31673295, 0.36907066], + [0.84240761, 0.31371695, 0.37010969], + [0.84005337, 0.31075974, 0.37119284], + [0.83765537, 0.30784814, 0.3723105], + [0.83520234, 0.30499724, 0.37346726], + [0.83270291, 0.30219766, 0.37465552], + [0.83014895, 0.29946081, 0.37587769], + [0.82754694, 0.29677989, 0.37712733], + [0.82489111, 0.29416352, 0.37840532], + [0.82218644, 0.29160665, 0.37970606], + [0.81942908, 0.28911553, 0.38102921], + [0.81662276, 0.28668665, 0.38236999], + [0.81376555, 0.28432371, 0.383727], + [0.81085964, 0.28202508, 0.38509649], + [0.8079055, 0.27979128, 0.38647583], + [0.80490309, 0.27762348, 0.3878626], + [0.80185613, 0.2755178, 0.38925253], + [0.79876118, 0.27347974, 0.39064559], + [0.79562644, 0.27149928, 0.39203532], + [0.79244362, 0.2695883, 0.39342447], + [0.78922456, 0.26773176, 0.3948046], + [0.78596161, 0.26594053, 0.39617873], + [0.7826624, 0.26420493, 0.39754146], + [0.77932717, 0.26252522, 0.39889102], + [0.77595363, 0.2609049, 0.4002279], + [0.77254999, 0.25933319, 0.40154704], + [0.76911107, 0.25781758, 0.40284959], + [0.76564158, 0.25635173, 0.40413341], + [0.76214598, 0.25492998, 0.40539471], + [0.75861834, 0.25356035, 0.40663694], + [0.75506533, 0.25223402, 0.40785559], + [0.75148963, 0.2509473, 0.40904966], + [0.74788835, 0.24970413, 0.41022028], + [0.74426345, 0.24850191, 0.41136599], + [0.74061927, 0.24733457, 0.41248516], + [0.73695678, 0.24620072, 0.41357737], + [0.73327278, 0.24510469, 0.41464364], + [0.72957096, 0.24404127, 0.4156828], + [0.72585394, 0.24300672, 0.41669383], + [0.7221226, 0.24199971, 0.41767651], + [0.71837612, 0.24102046, 0.41863486], + [0.71463236, 0.24004289, 0.41956983], + [0.7108932, 0.23906316, 0.42048681], + [0.70715842, 0.23808142, 0.42138647], + [0.70342811, 0.2370976, 0.42226844], + [0.69970218, 0.23611179, 0.42313282], + [0.69598055, 0.2351247, 0.42397678], + [0.69226314, 0.23413578, 0.42480327], + [0.68854988, 0.23314511, 0.42561234], + [0.68484064, 0.23215279, 0.42640419], + [0.68113541, 0.23115942, 0.42717615], + [0.67743412, 0.23016472, 0.42792989], + [0.67373662, 0.22916861, 0.42866642], + [0.67004287, 0.22817117, 0.42938576], + [0.66635279, 0.22717328, 0.43008427], + [0.66266621, 0.22617435, 0.43076552], + [0.65898313, 0.22517434, 0.43142956], + [0.65530349, 0.22417381, 0.43207427], + [0.65162696, 0.22317307, 0.4327001], + [0.64795375, 0.22217149, 0.43330852], + [0.64428351, 0.22116972, 0.43389854], + [0.64061624, 0.22016818, 0.43446845], + [0.63695183, 0.21916625, 0.43502123], + [0.63329016, 0.21816454, 0.43555493], + [0.62963102, 0.2171635, 0.43606881], + [0.62597451, 0.21616235, 0.43656529], + [0.62232019, 0.21516239, 0.43704153], + [0.61866821, 0.21416307, 0.43749868], + [0.61501835, 0.21316435, 0.43793808], + [0.61137029, 0.21216761, 0.4383556], + [0.60772426, 0.2111715, 0.43875552], + [0.60407977, 0.21017746, 0.43913439], + [0.60043678, 0.20918503, 0.43949412], + [0.59679524, 0.20819447, 0.43983393], + [0.59315487, 0.20720639, 0.44015254], + [0.58951566, 0.20622027, 0.44045213], + [0.58587715, 0.20523751, 0.44072926], + [0.5822395, 0.20425693, 0.44098758], + [0.57860222, 0.20328034, 0.44122241], + [0.57496549, 0.20230637, 0.44143805], + [0.57132875, 0.20133689, 0.4416298], + [0.56769215, 0.20037071, 0.44180142], + [0.5640552, 0.19940936, 0.44194923], + [0.56041794, 0.19845221, 0.44207535], + [0.55678004, 0.1975, 0.44217824], + [0.55314129, 0.19655316, 0.44225723], + [0.54950166, 0.19561118, 0.44231412], + [0.54585987, 0.19467771, 0.44234111], + [0.54221157, 0.19375869, 0.44233698], + [0.5385549, 0.19285696, 0.44229959], + [0.5348913, 0.19197036, 0.44222958], + [0.53122177, 0.1910974, 0.44212735], + [0.52754464, 0.19024042, 0.44199159], + [0.52386353, 0.18939409, 0.44182449], + [0.52017476, 0.18856368, 0.44162345], + [0.51648277, 0.18774266, 0.44139128], + [0.51278481, 0.18693492, 0.44112605], + [0.50908361, 0.18613639, 0.4408295], + [0.50537784, 0.18534893, 0.44050064], + [0.50166912, 0.18457008, 0.44014054], + [0.49795686, 0.18380056, 0.43974881], + [0.49424218, 0.18303865, 0.43932623], + [0.49052472, 0.18228477, 0.43887255], + [0.48680565, 0.1815371, 0.43838867], + [0.48308419, 0.18079663, 0.43787408], + [0.47936222, 0.18006056, 0.43733022], + [0.47563799, 0.17933127, 0.43675585], + [0.47191466, 0.17860416, 0.43615337], + [0.46818879, 0.17788392, 0.43552047], + [0.46446454, 0.17716458, 0.43486036], + [0.46073893, 0.17645017, 0.43417097], + [0.45701462, 0.17573691, 0.43345429], + [0.45329097, 0.17502549, 0.43271025], + [0.44956744, 0.17431649, 0.4319386], + [0.44584668, 0.17360625, 0.43114133], + [0.44212538, 0.17289906, 0.43031642], + [0.43840678, 0.17219041, 0.42946642], + [0.43469046, 0.17148074, 0.42859124], + [0.4309749, 0.17077192, 0.42769008], + [0.42726297, 0.17006003, 0.42676519], + [0.42355299, 0.16934709, 0.42581586], + [0.41984535, 0.16863258, 0.42484219], + [0.41614149, 0.16791429, 0.42384614], + [0.41244029, 0.16719372, 0.42282661], + [0.40874177, 0.16647061, 0.42178429], + [0.40504765, 0.16574261, 0.42072062], + [0.401357, 0.16501079, 0.41963528], + [0.397669, 0.16427607, 0.418528], + [0.39398585, 0.16353554, 0.41740053], + [0.39030735, 0.16278924, 0.41625344], + [0.3866314, 0.16203977, 0.41508517], + [0.38295904, 0.16128519, 0.41389849], + [0.37928736, 0.16052483, 0.41270599], + [0.37562649, 0.15974704, 0.41151182], + [0.37197803, 0.15895049, 0.41031532], + [0.36833779, 0.15813871, 0.40911916], + [0.36470944, 0.15730861, 0.40792149], + [0.36109117, 0.15646169, 0.40672362], + [0.35748213, 0.15559861, 0.40552633], + [0.353885, 0.15471714, 0.40432831], + [0.35029682, 0.15381967, 0.4031316], + [0.34671861, 0.1529053, 0.40193587], + [0.34315191, 0.15197275, 0.40074049], + [0.33959331, 0.15102466, 0.3995478], + [0.33604378, 0.15006017, 0.39835754], + [0.33250529, 0.14907766, 0.39716879], + [0.32897621, 0.14807831, 0.39598285], + [0.3254559, 0.14706248, 0.39480044], + [0.32194567, 0.14602909, 0.39362106], + [0.31844477, 0.14497857, 0.39244549], + [0.31494974, 0.14391333, 0.39127626], + [0.31146605, 0.14282918, 0.39011024], + [0.30798857, 0.1417297, 0.38895105], + [0.30451661, 0.14061515, 0.38779953], + [0.30105136, 0.13948445, 0.38665531], + [0.2975886, 0.1383403, 0.38552159], + [0.29408557, 0.13721193, 0.38442775] +] + + +_crest_lut = [ + [0.6468274, 0.80289262, 0.56592265], + [0.64233318, 0.80081141, 0.56639461], + [0.63791969, 0.7987162, 0.56674976], + [0.6335316, 0.79661833, 0.56706128], + [0.62915226, 0.7945212, 0.56735066], + [0.62477862, 0.79242543, 0.56762143], + [0.62042003, 0.79032918, 0.56786129], + [0.61606327, 0.78823508, 0.56808666], + [0.61171322, 0.78614216, 0.56829092], + [0.60736933, 0.78405055, 0.56847436], + [0.60302658, 0.78196121, 0.56864272], + [0.59868708, 0.77987374, 0.56879289], + [0.59435366, 0.77778758, 0.56892099], + [0.59001953, 0.77570403, 0.56903477], + [0.58568753, 0.77362254, 0.56913028], + [0.58135593, 0.77154342, 0.56920908], + [0.57702623, 0.76946638, 0.56926895], + [0.57269165, 0.76739266, 0.5693172], + [0.56835934, 0.76532092, 0.56934507], + [0.56402533, 0.76325185, 0.56935664], + [0.55968429, 0.76118643, 0.56935732], + [0.55534159, 0.75912361, 0.56934052], + [0.55099572, 0.75706366, 0.56930743], + [0.54664626, 0.75500662, 0.56925799], + [0.54228969, 0.75295306, 0.56919546], + [0.53792417, 0.75090328, 0.56912118], + [0.53355172, 0.74885687, 0.5690324], + [0.52917169, 0.74681387, 0.56892926], + [0.52478243, 0.74477453, 0.56881287], + [0.52038338, 0.74273888, 0.56868323], + [0.5159739, 0.74070697, 0.56854039], + [0.51155269, 0.73867895, 0.56838507], + [0.50711872, 0.73665492, 0.56821764], + [0.50267118, 0.73463494, 0.56803826], + [0.49822926, 0.73261388, 0.56785146], + [0.49381422, 0.73058524, 0.56767484], + [0.48942421, 0.72854938, 0.56751036], + [0.48505993, 0.72650623, 0.56735752], + [0.48072207, 0.72445575, 0.56721583], + [0.4764113, 0.72239788, 0.56708475], + [0.47212827, 0.72033258, 0.56696376], + [0.46787361, 0.71825983, 0.56685231], + [0.46364792, 0.71617961, 0.56674986], + [0.45945271, 0.71409167, 0.56665625], + [0.45528878, 0.71199595, 0.56657103], + [0.45115557, 0.70989276, 0.5664931], + [0.44705356, 0.70778212, 0.56642189], + [0.44298321, 0.70566406, 0.56635683], + [0.43894492, 0.70353863, 0.56629734], + [0.43493911, 0.70140588, 0.56624286], + [0.43096612, 0.69926587, 0.5661928], + [0.42702625, 0.69711868, 0.56614659], + [0.42311977, 0.69496438, 0.56610368], + [0.41924689, 0.69280308, 0.56606355], + [0.41540778, 0.69063486, 0.56602564], + [0.41160259, 0.68845984, 0.56598944], + [0.40783143, 0.68627814, 0.56595436], + [0.40409434, 0.68408988, 0.56591994], + [0.40039134, 0.68189518, 0.56588564], + [0.39672238, 0.6796942, 0.56585103], + [0.39308781, 0.67748696, 0.56581581], + [0.38949137, 0.67527276, 0.56578084], + [0.38592889, 0.67305266, 0.56574422], + [0.38240013, 0.67082685, 0.56570561], + [0.37890483, 0.66859548, 0.56566462], + [0.37544276, 0.66635871, 0.56562081], + [0.37201365, 0.66411673, 0.56557372], + [0.36861709, 0.6618697, 0.5655231], + [0.36525264, 0.65961782, 0.56546873], + [0.36191986, 0.65736125, 0.56541032], + [0.35861935, 0.65509998, 0.56534768], + [0.35535621, 0.65283302, 0.56528211], + [0.35212361, 0.65056188, 0.56521171], + [0.34892097, 0.64828676, 0.56513633], + [0.34574785, 0.64600783, 0.56505539], + [0.34260357, 0.64372528, 0.5649689], + [0.33948744, 0.64143931, 0.56487679], + [0.33639887, 0.6391501, 0.56477869], + [0.33334501, 0.63685626, 0.56467661], + [0.33031952, 0.63455911, 0.564569], + [0.3273199, 0.63225924, 0.56445488], + [0.32434526, 0.62995682, 0.56433457], + [0.32139487, 0.62765201, 0.56420795], + [0.31846807, 0.62534504, 0.56407446], + [0.3155731, 0.62303426, 0.56393695], + [0.31270304, 0.62072111, 0.56379321], + [0.30985436, 0.61840624, 0.56364307], + [0.30702635, 0.61608984, 0.56348606], + [0.30421803, 0.61377205, 0.56332267], + [0.30143611, 0.61145167, 0.56315419], + [0.29867863, 0.60912907, 0.56298054], + [0.29593872, 0.60680554, 0.56280022], + [0.29321538, 0.60448121, 0.56261376], + [0.2905079, 0.60215628, 0.56242036], + [0.28782827, 0.5998285, 0.56222366], + [0.28516521, 0.59749996, 0.56202093], + [0.28251558, 0.59517119, 0.56181204], + [0.27987847, 0.59284232, 0.56159709], + [0.27726216, 0.59051189, 0.56137785], + [0.27466434, 0.58818027, 0.56115433], + [0.2720767, 0.58584893, 0.56092486], + [0.26949829, 0.58351797, 0.56068983], + [0.26693801, 0.58118582, 0.56045121], + [0.26439366, 0.57885288, 0.56020858], + [0.26185616, 0.57652063, 0.55996077], + [0.25932459, 0.57418919, 0.55970795], + [0.25681303, 0.57185614, 0.55945297], + [0.25431024, 0.56952337, 0.55919385], + [0.25180492, 0.56719255, 0.5589305], + [0.24929311, 0.56486397, 0.5586654], + [0.24678356, 0.56253666, 0.55839491], + [0.24426587, 0.56021153, 0.55812473], + [0.24174022, 0.55788852, 0.55785448], + [0.23921167, 0.55556705, 0.55758211], + [0.23668315, 0.55324675, 0.55730676], + [0.23414742, 0.55092825, 0.55703167], + [0.23160473, 0.54861143, 0.5567573], + [0.22905996, 0.54629572, 0.55648168], + [0.22651648, 0.54398082, 0.5562029], + [0.22396709, 0.54166721, 0.55592542], + [0.22141221, 0.53935481, 0.55564885], + [0.21885269, 0.53704347, 0.55537294], + [0.21629986, 0.53473208, 0.55509319], + [0.21374297, 0.53242154, 0.5548144], + [0.21118255, 0.53011166, 0.55453708], + [0.2086192, 0.52780237, 0.55426067], + [0.20605624, 0.52549322, 0.55398479], + [0.20350004, 0.5231837, 0.55370601], + [0.20094292, 0.52087429, 0.55342884], + [0.19838567, 0.51856489, 0.55315283], + [0.19582911, 0.51625531, 0.55287818], + [0.19327413, 0.51394542, 0.55260469], + [0.19072933, 0.51163448, 0.5523289], + [0.18819045, 0.50932268, 0.55205372], + [0.18565609, 0.50701014, 0.55177937], + [0.18312739, 0.50469666, 0.55150597], + [0.18060561, 0.50238204, 0.55123374], + [0.178092, 0.50006616, 0.55096224], + [0.17558808, 0.49774882, 0.55069118], + [0.17310341, 0.49542924, 0.5504176], + [0.17063111, 0.49310789, 0.55014445], + [0.1681728, 0.49078458, 0.54987159], + [0.1657302, 0.48845913, 0.54959882], + [0.16330517, 0.48613135, 0.54932605], + [0.16089963, 0.48380104, 0.54905306], + [0.15851561, 0.48146803, 0.54877953], + [0.15615526, 0.47913212, 0.54850526], + [0.15382083, 0.47679313, 0.54822991], + [0.15151471, 0.47445087, 0.54795318], + [0.14924112, 0.47210502, 0.54767411], + [0.1470032, 0.46975537, 0.54739226], + [0.14480101, 0.46740187, 0.54710832], + [0.14263736, 0.46504434, 0.54682188], + [0.14051521, 0.46268258, 0.54653253], + [0.13843761, 0.46031639, 0.54623985], + [0.13640774, 0.45794558, 0.5459434], + [0.13442887, 0.45556994, 0.54564272], + [0.1325044, 0.45318928, 0.54533736], + [0.13063777, 0.4508034, 0.54502674], + [0.12883252, 0.44841211, 0.5447104], + [0.12709242, 0.44601517, 0.54438795], + [0.1254209, 0.44361244, 0.54405855], + [0.12382162, 0.44120373, 0.54372156], + [0.12229818, 0.43878887, 0.54337634], + [0.12085453, 0.4363676, 0.54302253], + [0.11949938, 0.43393955, 0.54265715], + [0.11823166, 0.43150478, 0.54228104], + [0.11705496, 0.42906306, 0.54189388], + [0.115972, 0.42661431, 0.54149449], + [0.11498598, 0.42415835, 0.54108222], + [0.11409965, 0.42169502, 0.54065622], + [0.11331533, 0.41922424, 0.5402155], + [0.11263542, 0.41674582, 0.53975931], + [0.1120615, 0.4142597, 0.53928656], + [0.11159738, 0.41176567, 0.53879549], + [0.11125248, 0.40926325, 0.53828203], + [0.11101698, 0.40675289, 0.53774864], + [0.11089152, 0.40423445, 0.53719455], + [0.11085121, 0.4017095, 0.53662425], + [0.11087217, 0.39917938, 0.53604354], + [0.11095515, 0.39664394, 0.53545166], + [0.11110676, 0.39410282, 0.53484509], + [0.11131735, 0.39155635, 0.53422678], + [0.11158595, 0.38900446, 0.53359634], + [0.11191139, 0.38644711, 0.5329534], + [0.11229224, 0.38388426, 0.53229748], + [0.11273683, 0.38131546, 0.53162393], + [0.11323438, 0.37874109, 0.53093619], + [0.11378271, 0.37616112, 0.53023413], + [0.11437992, 0.37357557, 0.52951727], + [0.11502681, 0.37098429, 0.52878396], + [0.11572661, 0.36838709, 0.52803124], + [0.11646936, 0.36578429, 0.52726234], + [0.11725299, 0.3631759, 0.52647685], + [0.1180755, 0.36056193, 0.52567436], + [0.1189438, 0.35794203, 0.5248497], + [0.11984752, 0.35531657, 0.52400649], + [0.1207833, 0.35268564, 0.52314492], + [0.12174895, 0.35004927, 0.52226461], + [0.12274959, 0.34740723, 0.52136104], + [0.12377809, 0.34475975, 0.52043639], + [0.12482961, 0.34210702, 0.51949179], + [0.125902, 0.33944908, 0.51852688], + [0.12699998, 0.33678574, 0.51753708], + [0.12811691, 0.33411727, 0.51652464], + [0.12924811, 0.33144384, 0.51549084], + [0.13039157, 0.32876552, 0.51443538], + [0.13155228, 0.32608217, 0.51335321], + [0.13272282, 0.32339407, 0.51224759], + [0.13389954, 0.32070138, 0.51111946], + [0.13508064, 0.31800419, 0.50996862], + [0.13627149, 0.31530238, 0.50878942], + [0.13746376, 0.31259627, 0.50758645], + [0.13865499, 0.30988598, 0.50636017], + [0.13984364, 0.30717161, 0.50511042], + [0.14103515, 0.30445309, 0.50383119], + [0.14222093, 0.30173071, 0.50252813], + [0.14339946, 0.2990046, 0.50120127], + [0.14456941, 0.29627483, 0.49985054], + [0.14573579, 0.29354139, 0.49847009], + [0.14689091, 0.29080452, 0.49706566], + [0.1480336, 0.28806432, 0.49563732], + [0.1491628, 0.28532086, 0.49418508], + [0.15028228, 0.28257418, 0.49270402], + [0.15138673, 0.27982444, 0.49119848], + [0.15247457, 0.27707172, 0.48966925], + [0.15354487, 0.2743161, 0.48811641], + [0.15459955, 0.27155765, 0.4865371], + [0.15563716, 0.26879642, 0.4849321], + [0.1566572, 0.26603191, 0.48330429], + [0.15765823, 0.26326032, 0.48167456], + [0.15862147, 0.26048295, 0.48005785], + [0.15954301, 0.25770084, 0.47845341], + [0.16043267, 0.25491144, 0.4768626], + [0.16129262, 0.25211406, 0.4752857], + [0.1621119, 0.24931169, 0.47372076], + [0.16290577, 0.24649998, 0.47217025], + [0.16366819, 0.24368054, 0.47063302], + [0.1644021, 0.24085237, 0.46910949], + [0.16510882, 0.2380149, 0.46759982], + [0.16579015, 0.23516739, 0.46610429], + [0.1664433, 0.2323105, 0.46462219], + [0.16707586, 0.22944155, 0.46315508], + [0.16768475, 0.22656122, 0.46170223], + [0.16826815, 0.22366984, 0.46026308], + [0.16883174, 0.22076514, 0.45883891], + [0.16937589, 0.21784655, 0.45742976], + [0.16990129, 0.21491339, 0.45603578], + [0.1704074, 0.21196535, 0.45465677], + [0.17089473, 0.20900176, 0.4532928], + [0.17136819, 0.20602012, 0.45194524], + [0.17182683, 0.20302012, 0.45061386], + [0.17227059, 0.20000106, 0.44929865], + [0.17270583, 0.19695949, 0.44800165], + [0.17313804, 0.19389201, 0.44672488], + [0.17363177, 0.19076859, 0.44549087] +] + + +_lut_dict = dict( + rocket=_rocket_lut, + mako=_mako_lut, + icefire=_icefire_lut, + vlag=_vlag_lut, + flare=_flare_lut, + crest=_crest_lut, + +) + +for _name, _lut in _lut_dict.items(): + + _cmap = colors.ListedColormap(_lut, _name) + locals()[_name] = _cmap + + _cmap_r = colors.ListedColormap(_lut[::-1], _name + "_r") + locals()[_name + "_r"] = _cmap_r + + mpl_cm.register_cmap(_name, _cmap) + mpl_cm.register_cmap(_name + "_r", _cmap_r) + +del colors, mpl_cm \ No newline at end of file diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/conftest.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..731fdda05a6d3685adcbc6762deebf20792d1885 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/conftest.py @@ -0,0 +1,235 @@ +import numpy as np +import pandas as pd +import datetime +import matplotlib as mpl +import matplotlib.pyplot as plt + +import pytest + + +def has_verdana(): + """Helper to verify if Verdana font is present""" + # This import is relatively lengthy, so to prevent its import for + # testing other tests in this module not requiring this knowledge, + # import font_manager here + import matplotlib.font_manager as mplfm + try: + verdana_font = mplfm.findfont('Verdana', fallback_to_default=False) + except: # noqa + # if https://github.com/matplotlib/matplotlib/pull/3435 + # gets accepted + return False + # otherwise check if not matching the logic for a 'default' one + try: + unlikely_font = mplfm.findfont("very_unlikely_to_exist1234", + fallback_to_default=False) + except: # noqa + # if matched verdana but not unlikely, Verdana must exist + return True + # otherwise -- if they match, must be the same default + return verdana_font != unlikely_font + + +@pytest.fixture(scope="session", autouse=True) +def remove_pandas_unit_conversion(): + # Prior to pandas 1.0, it registered its own datetime converters, + # but they are less powerful than what matplotlib added in 2.2, + # and we rely on that functionality in seaborn. + # https://github.com/matplotlib/matplotlib/pull/9779 + # https://github.com/pandas-dev/pandas/issues/27036 + mpl.units.registry[np.datetime64] = mpl.dates.DateConverter() + mpl.units.registry[datetime.date] = mpl.dates.DateConverter() + mpl.units.registry[datetime.datetime] = mpl.dates.DateConverter() + + +@pytest.fixture(autouse=True) +def close_figs(): + yield + plt.close("all") + + +@pytest.fixture(autouse=True) +def random_seed(): + seed = sum(map(ord, "seaborn random global")) + np.random.seed(seed) + + +@pytest.fixture() +def rng(): + seed = sum(map(ord, "seaborn random object")) + return np.random.RandomState(seed) + + +@pytest.fixture +def wide_df(rng): + + columns = list("abc") + index = pd.Int64Index(np.arange(10, 50, 2), name="wide_index") + values = rng.normal(size=(len(index), len(columns))) + return pd.DataFrame(values, index=index, columns=columns) + + +@pytest.fixture +def wide_array(wide_df): + + # Requires panads >= 0.24 + # return wide_df.to_numpy() + return np.asarray(wide_df) + + +@pytest.fixture +def flat_series(rng): + + index = pd.Int64Index(np.arange(10, 30), name="t") + return pd.Series(rng.normal(size=20), index, name="s") + + +@pytest.fixture +def flat_array(flat_series): + + # Requires panads >= 0.24 + # return flat_series.to_numpy() + return np.asarray(flat_series) + + +@pytest.fixture +def flat_list(flat_series): + + # Requires panads >= 0.24 + # return flat_series.to_list() + return flat_series.tolist() + + +@pytest.fixture(params=["series", "array", "list"]) +def flat_data(rng, request): + + index = pd.Int64Index(np.arange(10, 30), name="t") + series = pd.Series(rng.normal(size=20), index, name="s") + if request.param == "series": + data = series + elif request.param == "array": + try: + data = series.to_numpy() # Requires pandas >= 0.24 + except AttributeError: + data = np.asarray(series) + elif request.param == "list": + try: + data = series.to_list() # Requires pandas >= 0.24 + except AttributeError: + data = series.tolist() + return data + + +@pytest.fixture +def wide_list_of_series(rng): + + return [pd.Series(rng.normal(size=20), np.arange(20), name="a"), + pd.Series(rng.normal(size=10), np.arange(5, 15), name="b")] + + +@pytest.fixture +def wide_list_of_arrays(wide_list_of_series): + + # Requires pandas >= 0.24 + # return [s.to_numpy() for s in wide_list_of_series] + return [np.asarray(s) for s in wide_list_of_series] + + +@pytest.fixture +def wide_list_of_lists(wide_list_of_series): + + # Requires pandas >= 0.24 + # return [s.to_list() for s in wide_list_of_series] + return [s.tolist() for s in wide_list_of_series] + + +@pytest.fixture +def wide_dict_of_series(wide_list_of_series): + + return {s.name: s for s in wide_list_of_series} + + +@pytest.fixture +def wide_dict_of_arrays(wide_list_of_series): + + # Requires pandas >= 0.24 + # return {s.name: s.to_numpy() for s in wide_list_of_series} + return {s.name: np.asarray(s) for s in wide_list_of_series} + + +@pytest.fixture +def wide_dict_of_lists(wide_list_of_series): + + # Requires pandas >= 0.24 + # return {s.name: s.to_list() for s in wide_list_of_series} + return {s.name: s.tolist() for s in wide_list_of_series} + + +@pytest.fixture +def long_df(rng): + + n = 100 + df = pd.DataFrame(dict( + x=rng.uniform(0, 20, n).round().astype("int"), + y=rng.normal(size=n), + z=rng.lognormal(size=n), + a=rng.choice(list("abc"), n), + b=rng.choice(list("mnop"), n), + c=rng.choice([0, 1], n, [.3, .7]), + t=rng.choice(np.arange("2004-07-30", "2007-07-30", dtype="datetime64[Y]"), n), + s=rng.choice([2, 4, 8], n), + f=rng.choice([0.2, 0.3], n), + )) + + a_cat = df["a"].astype("category") + new_categories = np.roll(a_cat.cat.categories, 1) + df["a_cat"] = a_cat.cat.reorder_categories(new_categories) + + df["s_cat"] = df["s"].astype("category") + df["s_str"] = df["s"].astype(str) + + return df + + +@pytest.fixture +def long_dict(long_df): + + return long_df.to_dict() + + +@pytest.fixture +def repeated_df(rng): + + n = 100 + return pd.DataFrame(dict( + x=np.tile(np.arange(n // 2), 2), + y=rng.normal(size=n), + a=rng.choice(list("abc"), n), + u=np.repeat(np.arange(2), n // 2), + )) + + +@pytest.fixture +def missing_df(rng, long_df): + + df = long_df.copy() + for col in df: + idx = rng.permutation(df.index)[:10] + df.loc[idx, col] = np.nan + return df + + +@pytest.fixture +def object_df(rng, long_df): + + df = long_df.copy() + # objectify numeric columns + for col in ["c", "s", "f"]: + df[col] = df[col].astype(object) + return df + + +@pytest.fixture +def null_series(flat_series): + + return pd.Series(index=flat_series.index, dtype='float64') diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/linearmodels.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/linearmodels.py new file mode 100644 index 0000000000000000000000000000000000000000..ad5e039f57d7f198c4574c0a50a1d915d23d2083 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/linearmodels.py @@ -0,0 +1,7 @@ +import warnings +from .regression import * # noqa + +msg = ( + "The `linearmodels` module has been renamed `regression`." +) +warnings.warn(msg) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/palettes.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/palettes.py new file mode 100644 index 0000000000000000000000000000000000000000..b33280e27d1f9112c6cfb51ea98884aed6118110 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/palettes.py @@ -0,0 +1,1038 @@ +import colorsys +from itertools import cycle + +import numpy as np +import matplotlib as mpl + +from .external import husl + +from .utils import desaturate, get_color_cycle +from .colors import xkcd_rgb, crayons + + +__all__ = ["color_palette", "hls_palette", "husl_palette", "mpl_palette", + "dark_palette", "light_palette", "diverging_palette", + "blend_palette", "xkcd_palette", "crayon_palette", + "cubehelix_palette", "set_color_codes"] + + +SEABORN_PALETTES = dict( + deep=["#4C72B0", "#DD8452", "#55A868", "#C44E52", "#8172B3", + "#937860", "#DA8BC3", "#8C8C8C", "#CCB974", "#64B5CD"], + deep6=["#4C72B0", "#55A868", "#C44E52", + "#8172B3", "#CCB974", "#64B5CD"], + muted=["#4878D0", "#EE854A", "#6ACC64", "#D65F5F", "#956CB4", + "#8C613C", "#DC7EC0", "#797979", "#D5BB67", "#82C6E2"], + muted6=["#4878D0", "#6ACC64", "#D65F5F", + "#956CB4", "#D5BB67", "#82C6E2"], + pastel=["#A1C9F4", "#FFB482", "#8DE5A1", "#FF9F9B", "#D0BBFF", + "#DEBB9B", "#FAB0E4", "#CFCFCF", "#FFFEA3", "#B9F2F0"], + pastel6=["#A1C9F4", "#8DE5A1", "#FF9F9B", + "#D0BBFF", "#FFFEA3", "#B9F2F0"], + bright=["#023EFF", "#FF7C00", "#1AC938", "#E8000B", "#8B2BE2", + "#9F4800", "#F14CC1", "#A3A3A3", "#FFC400", "#00D7FF"], + bright6=["#023EFF", "#1AC938", "#E8000B", + "#8B2BE2", "#FFC400", "#00D7FF"], + dark=["#001C7F", "#B1400D", "#12711C", "#8C0800", "#591E71", + "#592F0D", "#A23582", "#3C3C3C", "#B8850A", "#006374"], + dark6=["#001C7F", "#12711C", "#8C0800", + "#591E71", "#B8850A", "#006374"], + colorblind=["#0173B2", "#DE8F05", "#029E73", "#D55E00", "#CC78BC", + "#CA9161", "#FBAFE4", "#949494", "#ECE133", "#56B4E9"], + colorblind6=["#0173B2", "#029E73", "#D55E00", + "#CC78BC", "#ECE133", "#56B4E9"] +) + + +MPL_QUAL_PALS = { + "tab10": 10, "tab20": 20, "tab20b": 20, "tab20c": 20, + "Set1": 9, "Set2": 8, "Set3": 12, + "Accent": 8, "Paired": 12, + "Pastel1": 9, "Pastel2": 8, "Dark2": 8, +} + + +QUAL_PALETTE_SIZES = MPL_QUAL_PALS.copy() +QUAL_PALETTE_SIZES.update({k: len(v) for k, v in SEABORN_PALETTES.items()}) +QUAL_PALETTES = list(QUAL_PALETTE_SIZES.keys()) + + +class _ColorPalette(list): + """Set the color palette in a with statement, otherwise be a list.""" + def __enter__(self): + """Open the context.""" + from .rcmod import set_palette + self._orig_palette = color_palette() + set_palette(self) + return self + + def __exit__(self, *args): + """Close the context.""" + from .rcmod import set_palette + set_palette(self._orig_palette) + + def as_hex(self): + """Return a color palette with hex codes instead of RGB values.""" + hex = [mpl.colors.rgb2hex(rgb) for rgb in self] + return _ColorPalette(hex) + + def _repr_html_(self): + """Rich display of the color palette in an HTML frontend.""" + s = 55 + n = len(self) + html = f'' + for i, c in enumerate(self.as_hex()): + html += ( + f'' + ) + html += '' + return html + + +def color_palette(palette=None, n_colors=None, desat=None, as_cmap=False): + """Return a list of colors or continuous colormap defining a palette. + + Possible ``palette`` values include: + - Name of a seaborn palette (deep, muted, bright, pastel, dark, colorblind) + - Name of matplotlib colormap + - 'husl' or 'hls' + - 'ch:' + - 'light:', 'dark:', 'blend:,', + - A sequence of colors in any format matplotlib accepts + + Calling this function with ``palette=None`` will return the current + matplotlib color cycle. + + This function can also be used in a ``with`` statement to temporarily + set the color cycle for a plot or set of plots. + + See the :ref:`tutorial ` for more information. + + Parameters + ---------- + palette : None, string, or sequence, optional + Name of palette or None to return current palette. If a sequence, input + colors are used but possibly cycled and desaturated. + n_colors : int, optional + Number of colors in the palette. If ``None``, the default will depend + on how ``palette`` is specified. Named palettes default to 6 colors, + but grabbing the current palette or passing in a list of colors will + not change the number of colors unless this is specified. Asking for + more colors than exist in the palette will cause it to cycle. Ignored + when ``as_cmap`` is True. + desat : float, optional + Proportion to desaturate each color by. + as_cmap : bool + If True, return a :class:`matplotlib.colors.Colormap`. + + Returns + ------- + list of RGB tuples or :class:`matplotlib.colors.Colormap` + + See Also + -------- + set_palette : Set the default color cycle for all plots. + set_color_codes : Reassign color codes like ``"b"``, ``"g"``, etc. to + colors from one of the seaborn palettes. + + Examples + -------- + + .. include:: ../docstrings/color_palette.rst + + """ + if palette is None: + palette = get_color_cycle() + if n_colors is None: + n_colors = len(palette) + + elif not isinstance(palette, str): + palette = palette + if n_colors is None: + n_colors = len(palette) + else: + + if n_colors is None: + # Use all colors in a qualitative palette or 6 of another kind + n_colors = QUAL_PALETTE_SIZES.get(palette, 6) + + if palette in SEABORN_PALETTES: + # Named "seaborn variant" of matplotlib default color cycle + palette = SEABORN_PALETTES[palette] + + elif palette == "hls": + # Evenly spaced colors in cylindrical RGB space + palette = hls_palette(n_colors, as_cmap=as_cmap) + + elif palette == "husl": + # Evenly spaced colors in cylindrical Lab space + palette = husl_palette(n_colors, as_cmap=as_cmap) + + elif palette.lower() == "jet": + # Paternalism + raise ValueError("No.") + + elif palette.startswith("ch:"): + # Cubehelix palette with params specified in string + args, kwargs = _parse_cubehelix_args(palette) + palette = cubehelix_palette(n_colors, *args, **kwargs, as_cmap=as_cmap) + + elif palette.startswith("light:"): + # light palette to color specified in string + _, color = palette.split(":") + reverse = color.endswith("_r") + if reverse: + color = color[:-2] + palette = light_palette(color, n_colors, reverse=reverse, as_cmap=as_cmap) + + elif palette.startswith("dark:"): + # light palette to color specified in string + _, color = palette.split(":") + reverse = color.endswith("_r") + if reverse: + color = color[:-2] + palette = dark_palette(color, n_colors, reverse=reverse, as_cmap=as_cmap) + + elif palette.startswith("blend:"): + # blend palette between colors specified in string + _, colors = palette.split(":") + colors = colors.split(",") + palette = blend_palette(colors, n_colors, as_cmap=as_cmap) + + else: + try: + # Perhaps a named matplotlib colormap? + palette = mpl_palette(palette, n_colors, as_cmap=as_cmap) + except ValueError: + raise ValueError("%s is not a valid palette name" % palette) + + if desat is not None: + palette = [desaturate(c, desat) for c in palette] + + if not as_cmap: + + # Always return as many colors as we asked for + pal_cycle = cycle(palette) + palette = [next(pal_cycle) for _ in range(n_colors)] + + # Always return in r, g, b tuple format + try: + palette = map(mpl.colors.colorConverter.to_rgb, palette) + palette = _ColorPalette(palette) + except ValueError: + raise ValueError(f"Could not generate a palette for {palette}") + + return palette + + +def hls_palette(n_colors=6, h=.01, l=.6, s=.65, as_cmap=False): # noqa + """Get a set of evenly spaced colors in HLS hue space. + + h, l, and s should be between 0 and 1 + + Parameters + ---------- + + n_colors : int + number of colors in the palette + h : float + first hue + l : float + lightness + s : float + saturation + + Returns + ------- + list of RGB tuples or :class:`matplotlib.colors.Colormap` + + See Also + -------- + husl_palette : Make a palette using evenly spaced hues in the HUSL system. + + Examples + -------- + + Create a palette of 10 colors with the default parameters: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns; sns.set_theme() + >>> sns.palplot(sns.hls_palette(10)) + + Create a palette of 10 colors that begins at a different hue value: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.hls_palette(10, h=.5)) + + Create a palette of 10 colors that are darker than the default: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.hls_palette(10, l=.4)) + + Create a palette of 10 colors that are less saturated than the default: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.hls_palette(10, s=.4)) + + """ + if as_cmap: + n_colors = 256 + hues = np.linspace(0, 1, int(n_colors) + 1)[:-1] + hues += h + hues %= 1 + hues -= hues.astype(int) + palette = [colorsys.hls_to_rgb(h_i, l, s) for h_i in hues] + if as_cmap: + return mpl.colors.ListedColormap(palette, "hls") + else: + return _ColorPalette(palette) + + +def husl_palette(n_colors=6, h=.01, s=.9, l=.65, as_cmap=False): # noqa + """Get a set of evenly spaced colors in HUSL hue space. + + h, s, and l should be between 0 and 1 + + Parameters + ---------- + + n_colors : int + number of colors in the palette + h : float + first hue + s : float + saturation + l : float + lightness + + Returns + ------- + list of RGB tuples or :class:`matplotlib.colors.Colormap` + + See Also + -------- + hls_palette : Make a palette using evently spaced circular hues in the + HSL system. + + Examples + -------- + + Create a palette of 10 colors with the default parameters: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns; sns.set_theme() + >>> sns.palplot(sns.husl_palette(10)) + + Create a palette of 10 colors that begins at a different hue value: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.husl_palette(10, h=.5)) + + Create a palette of 10 colors that are darker than the default: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.husl_palette(10, l=.4)) + + Create a palette of 10 colors that are less saturated than the default: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.husl_palette(10, s=.4)) + + """ + if as_cmap: + n_colors = 256 + hues = np.linspace(0, 1, int(n_colors) + 1)[:-1] + hues += h + hues %= 1 + hues *= 359 + s *= 99 + l *= 99 # noqa + palette = [_color_to_rgb((h_i, s, l), input="husl") for h_i in hues] + if as_cmap: + return mpl.colors.ListedColormap(palette, "hsl") + else: + return _ColorPalette(palette) + + +def mpl_palette(name, n_colors=6, as_cmap=False): + """Return discrete colors from a matplotlib palette. + + Note that this handles the qualitative colorbrewer palettes + properly, although if you ask for more colors than a particular + qualitative palette can provide you will get fewer than you are + expecting. In contrast, asking for qualitative color brewer palettes + using :func:`color_palette` will return the expected number of colors, + but they will cycle. + + If you are using the IPython notebook, you can also use the function + :func:`choose_colorbrewer_palette` to interactively select palettes. + + Parameters + ---------- + name : string + Name of the palette. This should be a named matplotlib colormap. + n_colors : int + Number of discrete colors in the palette. + + Returns + ------- + list of RGB tuples or :class:`matplotlib.colors.Colormap` + + Examples + -------- + + Create a qualitative colorbrewer palette with 8 colors: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns; sns.set_theme() + >>> sns.palplot(sns.mpl_palette("Set2", 8)) + + Create a sequential colorbrewer palette: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.mpl_palette("Blues")) + + Create a diverging palette: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.mpl_palette("seismic", 8)) + + Create a "dark" sequential palette: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.mpl_palette("GnBu_d")) + + """ + if name.endswith("_d"): + sub_name = name[:-2] + if sub_name.endswith("_r"): + reverse = True + sub_name = sub_name[:-2] + else: + reverse = False + pal = color_palette(sub_name, 2) + ["#333333"] + if reverse: + pal = pal[::-1] + cmap = blend_palette(pal, n_colors, as_cmap=True) + else: + cmap = mpl.cm.get_cmap(name) + + if name in MPL_QUAL_PALS: + bins = np.linspace(0, 1, MPL_QUAL_PALS[name])[:n_colors] + else: + bins = np.linspace(0, 1, int(n_colors) + 2)[1:-1] + palette = list(map(tuple, cmap(bins)[:, :3])) + + if as_cmap: + return cmap + else: + return _ColorPalette(palette) + + +def _color_to_rgb(color, input): + """Add some more flexibility to color choices.""" + if input == "hls": + color = colorsys.hls_to_rgb(*color) + elif input == "husl": + color = husl.husl_to_rgb(*color) + color = tuple(np.clip(color, 0, 1)) + elif input == "xkcd": + color = xkcd_rgb[color] + + return mpl.colors.to_rgb(color) + + +def dark_palette(color, n_colors=6, reverse=False, as_cmap=False, input="rgb"): + """Make a sequential palette that blends from dark to ``color``. + + This kind of palette is good for data that range between relatively + uninteresting low values and interesting high values. + + The ``color`` parameter can be specified in a number of ways, including + all options for defining a color in matplotlib and several additional + color spaces that are handled by seaborn. You can also use the database + of named colors from the XKCD color survey. + + If you are using the IPython notebook, you can also choose this palette + interactively with the :func:`choose_dark_palette` function. + + Parameters + ---------- + color : base color for high values + hex, rgb-tuple, or html color name + n_colors : int, optional + number of colors in the palette + reverse : bool, optional + if True, reverse the direction of the blend + as_cmap : bool, optional + If True, return a :class:`matplotlib.colors.Colormap`. + input : {'rgb', 'hls', 'husl', xkcd'} + Color space to interpret the input color. The first three options + apply to tuple inputs and the latter applies to string inputs. + + Returns + ------- + list of RGB tuples or :class:`matplotlib.colors.Colormap` + + See Also + -------- + light_palette : Create a sequential palette with bright low values. + diverging_palette : Create a diverging palette with two colors. + + Examples + -------- + + Generate a palette from an HTML color: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns; sns.set_theme() + >>> sns.palplot(sns.dark_palette("purple")) + + Generate a palette that decreases in lightness: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.dark_palette("seagreen", reverse=True)) + + Generate a palette from an HUSL-space seed: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.dark_palette((260, 75, 60), input="husl")) + + Generate a colormap object: + + .. plot:: + :context: close-figs + + >>> from numpy import arange + >>> x = arange(25).reshape(5, 5) + >>> cmap = sns.dark_palette("#2ecc71", as_cmap=True) + >>> ax = sns.heatmap(x, cmap=cmap) + + """ + rgb = _color_to_rgb(color, input) + h, s, l = husl.rgb_to_husl(*rgb) + gray_s, gray_l = .15 * s, 15 + gray = _color_to_rgb((h, gray_s, gray_l), input="husl") + colors = [rgb, gray] if reverse else [gray, rgb] + return blend_palette(colors, n_colors, as_cmap) + + +def light_palette(color, n_colors=6, reverse=False, as_cmap=False, input="rgb"): + """Make a sequential palette that blends from light to ``color``. + + This kind of palette is good for data that range between relatively + uninteresting low values and interesting high values. + + The ``color`` parameter can be specified in a number of ways, including + all options for defining a color in matplotlib and several additional + color spaces that are handled by seaborn. You can also use the database + of named colors from the XKCD color survey. + + If you are using the IPython notebook, you can also choose this palette + interactively with the :func:`choose_light_palette` function. + + Parameters + ---------- + color : base color for high values + hex code, html color name, or tuple in ``input`` space. + n_colors : int, optional + number of colors in the palette + reverse : bool, optional + if True, reverse the direction of the blend + as_cmap : bool, optional + If True, return a :class:`matplotlib.colors.Colormap`. + input : {'rgb', 'hls', 'husl', xkcd'} + Color space to interpret the input color. The first three options + apply to tuple inputs and the latter applies to string inputs. + + Returns + ------- + list of RGB tuples or :class:`matplotlib.colors.Colormap` + + See Also + -------- + dark_palette : Create a sequential palette with dark low values. + diverging_palette : Create a diverging palette with two colors. + + Examples + -------- + + Generate a palette from an HTML color: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns; sns.set_theme() + >>> sns.palplot(sns.light_palette("purple")) + + Generate a palette that increases in lightness: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.light_palette("seagreen", reverse=True)) + + Generate a palette from an HUSL-space seed: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.light_palette((260, 75, 60), input="husl")) + + Generate a colormap object: + + .. plot:: + :context: close-figs + + >>> from numpy import arange + >>> x = arange(25).reshape(5, 5) + >>> cmap = sns.light_palette("#2ecc71", as_cmap=True) + >>> ax = sns.heatmap(x, cmap=cmap) + + """ + rgb = _color_to_rgb(color, input) + h, s, l = husl.rgb_to_husl(*rgb) + gray_s, gray_l = .15 * s, 95 + gray = _color_to_rgb((h, gray_s, gray_l), input="husl") + colors = [rgb, gray] if reverse else [gray, rgb] + return blend_palette(colors, n_colors, as_cmap) + + +def diverging_palette(h_neg, h_pos, s=75, l=50, sep=1, n=6, # noqa + center="light", as_cmap=False): + """Make a diverging palette between two HUSL colors. + + If you are using the IPython notebook, you can also choose this palette + interactively with the :func:`choose_diverging_palette` function. + + Parameters + ---------- + h_neg, h_pos : float in [0, 359] + Anchor hues for negative and positive extents of the map. + s : float in [0, 100], optional + Anchor saturation for both extents of the map. + l : float in [0, 100], optional + Anchor lightness for both extents of the map. + sep : int, optional + Size of the intermediate region. + n : int, optional + Number of colors in the palette (if not returning a cmap) + center : {"light", "dark"}, optional + Whether the center of the palette is light or dark + as_cmap : bool, optional + If True, return a :class:`matplotlib.colors.Colormap`. + + Returns + ------- + list of RGB tuples or :class:`matplotlib.colors.Colormap` + + See Also + -------- + dark_palette : Create a sequential palette with dark values. + light_palette : Create a sequential palette with light values. + + Examples + -------- + + Generate a blue-white-red palette: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns; sns.set_theme() + >>> sns.palplot(sns.diverging_palette(240, 10, n=9)) + + Generate a brighter green-white-purple palette: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.diverging_palette(150, 275, s=80, l=55, n=9)) + + Generate a blue-black-red palette: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.diverging_palette(250, 15, s=75, l=40, + ... n=9, center="dark")) + + Generate a colormap object: + + .. plot:: + :context: close-figs + + >>> from numpy import arange + >>> x = arange(25).reshape(5, 5) + >>> cmap = sns.diverging_palette(220, 20, as_cmap=True) + >>> ax = sns.heatmap(x, cmap=cmap) + + """ + palfunc = dict(dark=dark_palette, light=light_palette)[center] + n_half = int(128 - (sep // 2)) + neg = palfunc((h_neg, s, l), n_half, reverse=True, input="husl") + pos = palfunc((h_pos, s, l), n_half, input="husl") + midpoint = dict(light=[(.95, .95, .95)], dark=[(.133, .133, .133)])[center] + mid = midpoint * sep + pal = blend_palette(np.concatenate([neg, mid, pos]), n, as_cmap=as_cmap) + return pal + + +def blend_palette(colors, n_colors=6, as_cmap=False, input="rgb"): + """Make a palette that blends between a list of colors. + + Parameters + ---------- + colors : sequence of colors in various formats interpreted by ``input`` + hex code, html color name, or tuple in ``input`` space. + n_colors : int, optional + Number of colors in the palette. + as_cmap : bool, optional + If True, return a :class:`matplotlib.colors.Colormap`. + + Returns + ------- + list of RGB tuples or :class:`matplotlib.colors.Colormap` + + """ + colors = [_color_to_rgb(color, input) for color in colors] + name = "blend" + pal = mpl.colors.LinearSegmentedColormap.from_list(name, colors) + if not as_cmap: + rgb_array = pal(np.linspace(0, 1, int(n_colors)))[:, :3] # no alpha + pal = _ColorPalette(map(tuple, rgb_array)) + return pal + + +def xkcd_palette(colors): + """Make a palette with color names from the xkcd color survey. + + See xkcd for the full list of colors: https://xkcd.com/color/rgb/ + + This is just a simple wrapper around the ``seaborn.xkcd_rgb`` dictionary. + + Parameters + ---------- + colors : list of strings + List of keys in the ``seaborn.xkcd_rgb`` dictionary. + + Returns + ------- + palette : seaborn color palette + Returns the list of colors as RGB tuples in an object that behaves like + other seaborn color palettes. + + See Also + -------- + crayon_palette : Make a palette with Crayola crayon colors. + + """ + palette = [xkcd_rgb[name] for name in colors] + return color_palette(palette, len(palette)) + + +def crayon_palette(colors): + """Make a palette with color names from Crayola crayons. + + Colors are taken from here: + https://en.wikipedia.org/wiki/List_of_Crayola_crayon_colors + + This is just a simple wrapper around the ``seaborn.crayons`` dictionary. + + Parameters + ---------- + colors : list of strings + List of keys in the ``seaborn.crayons`` dictionary. + + Returns + ------- + palette : seaborn color palette + Returns the list of colors as rgb tuples in an object that behaves like + other seaborn color palettes. + + See Also + -------- + xkcd_palette : Make a palette with named colors from the XKCD color survey. + + """ + palette = [crayons[name] for name in colors] + return color_palette(palette, len(palette)) + + +def cubehelix_palette(n_colors=6, start=0, rot=.4, gamma=1.0, hue=0.8, + light=.85, dark=.15, reverse=False, as_cmap=False): + """Make a sequential palette from the cubehelix system. + + This produces a colormap with linearly-decreasing (or increasing) + brightness. That means that information will be preserved if printed to + black and white or viewed by someone who is colorblind. "cubehelix" is + also available as a matplotlib-based palette, but this function gives the + user more control over the look of the palette and has a different set of + defaults. + + In addition to using this function, it is also possible to generate a + cubehelix palette generally in seaborn using a string-shorthand; see the + example below. + + Parameters + ---------- + n_colors : int + Number of colors in the palette. + start : float, 0 <= start <= 3 + The hue at the start of the helix. + rot : float + Rotations around the hue wheel over the range of the palette. + gamma : float 0 <= gamma + Gamma factor to emphasize darker (gamma < 1) or lighter (gamma > 1) + colors. + hue : float, 0 <= hue <= 1 + Saturation of the colors. + dark : float 0 <= dark <= 1 + Intensity of the darkest color in the palette. + light : float 0 <= light <= 1 + Intensity of the lightest color in the palette. + reverse : bool + If True, the palette will go from dark to light. + as_cmap : bool + If True, return a :class:`matplotlib.colors.Colormap`. + + Returns + ------- + list of RGB tuples or :class:`matplotlib.colors.Colormap` + + See Also + -------- + choose_cubehelix_palette : Launch an interactive widget to select cubehelix + palette parameters. + dark_palette : Create a sequential palette with dark low values. + light_palette : Create a sequential palette with bright low values. + + References + ---------- + Green, D. A. (2011). "A colour scheme for the display of astronomical + intensity images". Bulletin of the Astromical Society of India, Vol. 39, + p. 289-295. + + Examples + -------- + + Generate the default palette: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns; sns.set_theme() + >>> sns.palplot(sns.cubehelix_palette()) + + Rotate backwards from the same starting location: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.cubehelix_palette(rot=-.4)) + + Use a different starting point and shorter rotation: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.cubehelix_palette(start=2.8, rot=.1)) + + Reverse the direction of the lightness ramp: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.cubehelix_palette(reverse=True)) + + Generate a colormap object: + + .. plot:: + :context: close-figs + + >>> from numpy import arange + >>> x = arange(25).reshape(5, 5) + >>> cmap = sns.cubehelix_palette(as_cmap=True) + >>> ax = sns.heatmap(x, cmap=cmap) + + Use the full lightness range: + + .. plot:: + :context: close-figs + + >>> cmap = sns.cubehelix_palette(dark=0, light=1, as_cmap=True) + >>> ax = sns.heatmap(x, cmap=cmap) + + Use through the :func:`color_palette` interface: + + .. plot:: + :context: close-figs + + >>> sns.palplot(sns.color_palette("ch:2,r=.2,l=.6")) + + """ + def get_color_function(p0, p1): + # Copied from matplotlib because it lives in private module + def color(x): + # Apply gamma factor to emphasise low or high intensity values + xg = x ** gamma + + # Calculate amplitude and angle of deviation from the black + # to white diagonal in the plane of constant + # perceived intensity. + a = hue * xg * (1 - xg) / 2 + + phi = 2 * np.pi * (start / 3 + rot * x) + + return xg + a * (p0 * np.cos(phi) + p1 * np.sin(phi)) + return color + + cdict = { + "red": get_color_function(-0.14861, 1.78277), + "green": get_color_function(-0.29227, -0.90649), + "blue": get_color_function(1.97294, 0.0), + } + + cmap = mpl.colors.LinearSegmentedColormap("cubehelix", cdict) + + x = np.linspace(light, dark, int(n_colors)) + pal = cmap(x)[:, :3].tolist() + if reverse: + pal = pal[::-1] + + if as_cmap: + x_256 = np.linspace(light, dark, 256) + if reverse: + x_256 = x_256[::-1] + pal_256 = cmap(x_256) + cmap = mpl.colors.ListedColormap(pal_256, "seaborn_cubehelix") + return cmap + else: + return _ColorPalette(pal) + + +def _parse_cubehelix_args(argstr): + """Turn stringified cubehelix params into args/kwargs.""" + + if argstr.startswith("ch:"): + argstr = argstr[3:] + + if argstr.endswith("_r"): + reverse = True + argstr = argstr[:-2] + else: + reverse = False + + if not argstr: + return [], {"reverse": reverse} + + all_args = argstr.split(",") + + args = [float(a.strip(" ")) for a in all_args if "=" not in a] + + kwargs = [a.split("=") for a in all_args if "=" in a] + kwargs = {k.strip(" "): float(v.strip(" ")) for k, v in kwargs} + + kwarg_map = dict( + s="start", r="rot", g="gamma", + h="hue", l="light", d="dark", # noqa: E741 + ) + + kwargs = {kwarg_map.get(k, k): v for k, v in kwargs.items()} + + if reverse: + kwargs["reverse"] = True + + return args, kwargs + + +def set_color_codes(palette="deep"): + """Change how matplotlib color shorthands are interpreted. + + Calling this will change how shorthand codes like "b" or "g" + are interpreted by matplotlib in subsequent plots. + + Parameters + ---------- + palette : {deep, muted, pastel, dark, bright, colorblind} + Named seaborn palette to use as the source of colors. + + See Also + -------- + set : Color codes can be set through the high-level seaborn style + manager. + set_palette : Color codes can also be set through the function that + sets the matplotlib color cycle. + + Examples + -------- + + Map matplotlib color codes to the default seaborn palette. + + .. plot:: + :context: close-figs + + >>> import matplotlib.pyplot as plt + >>> import seaborn as sns; sns.set_theme() + >>> sns.set_color_codes() + >>> _ = plt.plot([0, 1], color="r") + + Use a different seaborn palette. + + .. plot:: + :context: close-figs + + >>> sns.set_color_codes("dark") + >>> _ = plt.plot([0, 1], color="g") + >>> _ = plt.plot([0, 2], color="m") + + """ + if palette == "reset": + colors = [(0., 0., 1.), (0., .5, 0.), (1., 0., 0.), (.75, 0., .75), + (.75, .75, 0.), (0., .75, .75), (0., 0., 0.)] + elif not isinstance(palette, str): + err = "set_color_codes requires a named seaborn palette" + raise TypeError(err) + elif palette in SEABORN_PALETTES: + if not palette.endswith("6"): + palette = palette + "6" + colors = SEABORN_PALETTES[palette] + [(.1, .1, .1)] + else: + err = "Cannot set colors with palette '{}'".format(palette) + raise ValueError(err) + + for code, color in zip("bgrmyck", colors): + rgb = mpl.colors.colorConverter.to_rgb(color) + mpl.colors.colorConverter.colors[code] = rgb + mpl.colors.colorConverter.cache[code] = rgb diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/rcmod.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/rcmod.py new file mode 100644 index 0000000000000000000000000000000000000000..395c376b25c6b7250ffaf402c07ef80f4cb95597 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/rcmod.py @@ -0,0 +1,550 @@ +"""Control plot style and scaling using the matplotlib rcParams interface.""" +import warnings +import functools +from distutils.version import LooseVersion +import matplotlib as mpl +from cycler import cycler +from . import palettes + + +__all__ = ["set_theme", "set", "reset_defaults", "reset_orig", + "axes_style", "set_style", "plotting_context", "set_context", + "set_palette"] + + +_style_keys = [ + + "axes.facecolor", + "axes.edgecolor", + "axes.grid", + "axes.axisbelow", + "axes.labelcolor", + + "figure.facecolor", + + "grid.color", + "grid.linestyle", + + "text.color", + + "xtick.color", + "ytick.color", + "xtick.direction", + "ytick.direction", + "lines.solid_capstyle", + + "patch.edgecolor", + "patch.force_edgecolor", + + "image.cmap", + "font.family", + "font.sans-serif", + + "xtick.bottom", + "xtick.top", + "ytick.left", + "ytick.right", + + "axes.spines.left", + "axes.spines.bottom", + "axes.spines.right", + "axes.spines.top", + +] + +_context_keys = [ + + "font.size", + "axes.labelsize", + "axes.titlesize", + "xtick.labelsize", + "ytick.labelsize", + "legend.fontsize", + + "axes.linewidth", + "grid.linewidth", + "lines.linewidth", + "lines.markersize", + "patch.linewidth", + + "xtick.major.width", + "ytick.major.width", + "xtick.minor.width", + "ytick.minor.width", + + "xtick.major.size", + "ytick.major.size", + "xtick.minor.size", + "ytick.minor.size", + +] + +if LooseVersion(mpl.__version__) >= "3.0": + _context_keys.append("legend.title_fontsize") + + +def set_theme(context="notebook", style="darkgrid", palette="deep", + font="sans-serif", font_scale=1, color_codes=True, rc=None): + """ + Set aspects of the visual theme for all matplotlib and seaborn plots. + + This function changes the global defaults for all plots using the + :ref:`matplotlib rcParams system `. + The themeing is decomposed into several distinct sets of parameter values. + + The options are illustrated in the :doc:`aesthetics <../tutorial/aesthetics>` + and :doc:`color palette <../tutorial/color_palettes>` tutorials. + + Parameters + ---------- + context : string or dict + Scaling parameters, see :func:`plotting_context`. + style : string or dict + Axes style parameters, see :func:`axes_style`. + palette : string or sequence + Color palette, see :func:`color_palette`. + font : string + Font family, see matplotlib font manager. + font_scale : float, optional + Separate scaling factor to independently scale the size of the + font elements. + color_codes : bool + If ``True`` and ``palette`` is a seaborn palette, remap the shorthand + color codes (e.g. "b", "g", "r", etc.) to the colors from this palette. + rc : dict or None + Dictionary of rc parameter mappings to override the above. + + Examples + -------- + + .. include:: ../docstrings/set_theme.rst + + """ + set_context(context, font_scale) + set_style(style, rc={"font.family": font}) + set_palette(palette, color_codes=color_codes) + if rc is not None: + mpl.rcParams.update(rc) + + +def set(*args, **kwargs): + """ + Alias for :func:`set_theme`, which is the preferred interface. + + This function may be removed in the future. + """ + set_theme(*args, **kwargs) + + +def reset_defaults(): + """Restore all RC params to default settings.""" + mpl.rcParams.update(mpl.rcParamsDefault) + + +def reset_orig(): + """Restore all RC params to original settings (respects custom rc).""" + from . import _orig_rc_params + with warnings.catch_warnings(): + warnings.simplefilter('ignore', mpl.cbook.MatplotlibDeprecationWarning) + mpl.rcParams.update(_orig_rc_params) + + +def axes_style(style=None, rc=None): + """ + Get the parameters that control the general style of the plots. + + The style parameters control properties like the color of the background and + whether a grid is enabled by default. This is accomplished using the + :ref:`matplotlib rcParams system `. + + The options are illustrated in the + :doc:`aesthetics tutorial <../tutorial/aesthetics>`. + + This function can also be used as a context manager to temporarily + alter the global defaults. See :func:`set_theme` or :func:`set_style` + to modify the global defaults for all plots. + + Parameters + ---------- + style : None, dict, or one of {darkgrid, whitegrid, dark, white, ticks} + A dictionary of parameters or the name of a preconfigured style. + rc : dict, optional + Parameter mappings to override the values in the preset seaborn + style dictionaries. This only updates parameters that are + considered part of the style definition. + + Examples + -------- + + .. include:: ../docstrings/axes_style.rst + + """ + if style is None: + style_dict = {k: mpl.rcParams[k] for k in _style_keys} + + elif isinstance(style, dict): + style_dict = style + + else: + styles = ["white", "dark", "whitegrid", "darkgrid", "ticks"] + if style not in styles: + raise ValueError("style must be one of %s" % ", ".join(styles)) + + # Define colors here + dark_gray = ".15" + light_gray = ".8" + + # Common parameters + style_dict = { + + "figure.facecolor": "white", + "axes.labelcolor": dark_gray, + + "xtick.direction": "out", + "ytick.direction": "out", + "xtick.color": dark_gray, + "ytick.color": dark_gray, + + "axes.axisbelow": True, + "grid.linestyle": "-", + + + "text.color": dark_gray, + "font.family": ["sans-serif"], + "font.sans-serif": ["Arial", "DejaVu Sans", "Liberation Sans", + "Bitstream Vera Sans", "sans-serif"], + + + "lines.solid_capstyle": "round", + "patch.edgecolor": "w", + "patch.force_edgecolor": True, + + "image.cmap": "rocket", + + "xtick.top": False, + "ytick.right": False, + + } + + # Set grid on or off + if "grid" in style: + style_dict.update({ + "axes.grid": True, + }) + else: + style_dict.update({ + "axes.grid": False, + }) + + # Set the color of the background, spines, and grids + if style.startswith("dark"): + style_dict.update({ + + "axes.facecolor": "#EAEAF2", + "axes.edgecolor": "white", + "grid.color": "white", + + "axes.spines.left": True, + "axes.spines.bottom": True, + "axes.spines.right": True, + "axes.spines.top": True, + + }) + + elif style == "whitegrid": + style_dict.update({ + + "axes.facecolor": "white", + "axes.edgecolor": light_gray, + "grid.color": light_gray, + + "axes.spines.left": True, + "axes.spines.bottom": True, + "axes.spines.right": True, + "axes.spines.top": True, + + }) + + elif style in ["white", "ticks"]: + style_dict.update({ + + "axes.facecolor": "white", + "axes.edgecolor": dark_gray, + "grid.color": light_gray, + + "axes.spines.left": True, + "axes.spines.bottom": True, + "axes.spines.right": True, + "axes.spines.top": True, + + }) + + # Show or hide the axes ticks + if style == "ticks": + style_dict.update({ + "xtick.bottom": True, + "ytick.left": True, + }) + else: + style_dict.update({ + "xtick.bottom": False, + "ytick.left": False, + }) + + # Remove entries that are not defined in the base list of valid keys + # This lets us handle matplotlib <=/> 2.0 + style_dict = {k: v for k, v in style_dict.items() if k in _style_keys} + + # Override these settings with the provided rc dictionary + if rc is not None: + rc = {k: v for k, v in rc.items() if k in _style_keys} + style_dict.update(rc) + + # Wrap in an _AxesStyle object so this can be used in a with statement + style_object = _AxesStyle(style_dict) + + return style_object + + +def set_style(style=None, rc=None): + """ + Set the parameters that control the general style of the plots. + + The style parameters control properties like the color of the background and + whether a grid is enabled by default. This is accomplished using the + :ref:`matplotlib rcParams system `. + + The options are illustrated in the + :doc:`aesthetics tutorial <../tutorial/aesthetics>`. + + See :func:`axes_style` to get the parameter values. + + Parameters + ---------- + style : dict, or one of {darkgrid, whitegrid, dark, white, ticks} + A dictionary of parameters or the name of a preconfigured style. + rc : dict, optional + Parameter mappings to override the values in the preset seaborn + style dictionaries. This only updates parameters that are + considered part of the style definition. + + Examples + -------- + + .. include:: ../docstrings/set_style.rst + + """ + style_object = axes_style(style, rc) + mpl.rcParams.update(style_object) + + +def plotting_context(context=None, font_scale=1, rc=None): + """ + Get the parameters that control the scaling of plot elements. + + This affects things like the size of the labels, lines, and other elements + of the plot, but not the overall style. This is accomplished using the + :ref:`matplotlib rcParams system `. + + The base context is "notebook", and the other contexts are "paper", "talk", + and "poster", which are version of the notebook parameters scaled by different + values. Font elements can also be scaled independently of (but relative to) + the other values. + + This function can also be used as a context manager to temporarily + alter the global defaults. See :func:`set_theme` or :func:`set_context` + to modify the global defaults for all plots. + + Parameters + ---------- + context : None, dict, or one of {paper, notebook, talk, poster} + A dictionary of parameters or the name of a preconfigured set. + font_scale : float, optional + Separate scaling factor to independently scale the size of the + font elements. + rc : dict, optional + Parameter mappings to override the values in the preset seaborn + context dictionaries. This only updates parameters that are + considered part of the context definition. + + Examples + -------- + + .. include:: ../docstrings/plotting_context.rst + + """ + if context is None: + context_dict = {k: mpl.rcParams[k] for k in _context_keys} + + elif isinstance(context, dict): + context_dict = context + + else: + + contexts = ["paper", "notebook", "talk", "poster"] + if context not in contexts: + raise ValueError("context must be in %s" % ", ".join(contexts)) + + # Set up dictionary of default parameters + texts_base_context = { + + "font.size": 12, + "axes.labelsize": 12, + "axes.titlesize": 12, + "xtick.labelsize": 11, + "ytick.labelsize": 11, + "legend.fontsize": 11, + + } + + if LooseVersion(mpl.__version__) >= "3.0": + texts_base_context["legend.title_fontsize"] = 12 + + base_context = { + + "axes.linewidth": 1.25, + "grid.linewidth": 1, + "lines.linewidth": 1.5, + "lines.markersize": 6, + "patch.linewidth": 1, + + "xtick.major.width": 1.25, + "ytick.major.width": 1.25, + "xtick.minor.width": 1, + "ytick.minor.width": 1, + + "xtick.major.size": 6, + "ytick.major.size": 6, + "xtick.minor.size": 4, + "ytick.minor.size": 4, + + } + base_context.update(texts_base_context) + + # Scale all the parameters by the same factor depending on the context + scaling = dict(paper=.8, notebook=1, talk=1.5, poster=2)[context] + context_dict = {k: v * scaling for k, v in base_context.items()} + + # Now independently scale the fonts + font_keys = texts_base_context.keys() + font_dict = {k: context_dict[k] * font_scale for k in font_keys} + context_dict.update(font_dict) + + # Override these settings with the provided rc dictionary + if rc is not None: + rc = {k: v for k, v in rc.items() if k in _context_keys} + context_dict.update(rc) + + # Wrap in a _PlottingContext object so this can be used in a with statement + context_object = _PlottingContext(context_dict) + + return context_object + + +def set_context(context=None, font_scale=1, rc=None): + """ + Set the parameters that control the scaling of plot elements. + + This affects things like the size of the labels, lines, and other elements + of the plot, but not the overall style. This is accomplished using the + :ref:`matplotlib rcParams system `. + + The base context is "notebook", and the other contexts are "paper", "talk", + and "poster", which are version of the notebook parameters scaled by different + values. Font elements can also be scaled independently of (but relative to) + the other values. + + See :func:`plotting_context` to get the parameter values. + + Parameters + ---------- + context : dict, or one of {paper, notebook, talk, poster} + A dictionary of parameters or the name of a preconfigured set. + font_scale : float, optional + Separate scaling factor to independently scale the size of the + font elements. + rc : dict, optional + Parameter mappings to override the values in the preset seaborn + context dictionaries. This only updates parameters that are + considered part of the context definition. + + Examples + -------- + + .. include:: ../docstrings/set_context.rst + + """ + context_object = plotting_context(context, font_scale, rc) + mpl.rcParams.update(context_object) + + +class _RCAesthetics(dict): + def __enter__(self): + rc = mpl.rcParams + self._orig = {k: rc[k] for k in self._keys} + self._set(self) + + def __exit__(self, exc_type, exc_value, exc_tb): + self._set(self._orig) + + def __call__(self, func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + with self: + return func(*args, **kwargs) + return wrapper + + +class _AxesStyle(_RCAesthetics): + """Light wrapper on a dict to set style temporarily.""" + _keys = _style_keys + _set = staticmethod(set_style) + + +class _PlottingContext(_RCAesthetics): + """Light wrapper on a dict to set context temporarily.""" + _keys = _context_keys + _set = staticmethod(set_context) + + +def set_palette(palette, n_colors=None, desat=None, color_codes=False): + """Set the matplotlib color cycle using a seaborn palette. + + Parameters + ---------- + palette : seaborn color paltte | matplotlib colormap | hls | husl + Palette definition. Should be something that :func:`color_palette` + can process. + n_colors : int + Number of colors in the cycle. The default number of colors will depend + on the format of ``palette``, see the :func:`color_palette` + documentation for more information. + desat : float + Proportion to desaturate each color by. + color_codes : bool + If ``True`` and ``palette`` is a seaborn palette, remap the shorthand + color codes (e.g. "b", "g", "r", etc.) to the colors from this palette. + + Examples + -------- + >>> set_palette("Reds") + + >>> set_palette("Set1", 8, .75) + + See Also + -------- + color_palette : build a color palette or set the color cycle temporarily + in a ``with`` statement. + set_context : set parameters to scale plot elements + set_style : set the default parameters for figure style + + """ + colors = palettes.color_palette(palette, n_colors, desat) + cyl = cycler('color', colors) + mpl.rcParams['axes.prop_cycle'] = cyl + mpl.rcParams["patch.facecolor"] = colors[0] + if color_codes: + try: + palettes.set_color_codes(palette) + except (ValueError, TypeError): + pass diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/regression.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/regression.py new file mode 100644 index 0000000000000000000000000000000000000000..91571917db0994dee4ea8ebc63c22d75aeff9f62 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/regression.py @@ -0,0 +1,1121 @@ +"""Plotting functions for linear models (broadly construed).""" +import copy +from textwrap import dedent +import warnings +import numpy as np +import pandas as pd +from scipy.spatial import distance +import matplotlib as mpl +import matplotlib.pyplot as plt + +try: + import statsmodels + assert statsmodels + _has_statsmodels = True +except ImportError: + _has_statsmodels = False + +from . import utils +from . import algorithms as algo +from .axisgrid import FacetGrid, _facet_docs +from ._decorators import _deprecate_positional_args + + +__all__ = ["lmplot", "regplot", "residplot"] + + +class _LinearPlotter(object): + """Base class for plotting relational data in tidy format. + + To get anything useful done you'll have to inherit from this, but setup + code that can be abstracted out should be put here. + + """ + def establish_variables(self, data, **kws): + """Extract variables from data or use directly.""" + self.data = data + + # Validate the inputs + any_strings = any([isinstance(v, str) for v in kws.values()]) + if any_strings and data is None: + raise ValueError("Must pass `data` if using named variables.") + + # Set the variables + for var, val in kws.items(): + if isinstance(val, str): + vector = data[val] + elif isinstance(val, list): + vector = np.asarray(val) + else: + vector = val + if vector is not None and vector.shape != (1,): + vector = np.squeeze(vector) + if np.ndim(vector) > 1: + err = "regplot inputs must be 1d" + raise ValueError(err) + setattr(self, var, vector) + + def dropna(self, *vars): + """Remove observations with missing data.""" + vals = [getattr(self, var) for var in vars] + vals = [v for v in vals if v is not None] + not_na = np.all(np.column_stack([pd.notnull(v) for v in vals]), axis=1) + for var in vars: + val = getattr(self, var) + if val is not None: + setattr(self, var, val[not_na]) + + def plot(self, ax): + raise NotImplementedError + + +class _RegressionPlotter(_LinearPlotter): + """Plotter for numeric independent variables with regression model. + + This does the computations and drawing for the `regplot` function, and + is thus also used indirectly by `lmplot`. + """ + def __init__(self, x, y, data=None, x_estimator=None, x_bins=None, + x_ci="ci", scatter=True, fit_reg=True, ci=95, n_boot=1000, + units=None, seed=None, order=1, logistic=False, lowess=False, + robust=False, logx=False, x_partial=None, y_partial=None, + truncate=False, dropna=True, x_jitter=None, y_jitter=None, + color=None, label=None): + + # Set member attributes + self.x_estimator = x_estimator + self.ci = ci + self.x_ci = ci if x_ci == "ci" else x_ci + self.n_boot = n_boot + self.seed = seed + self.scatter = scatter + self.fit_reg = fit_reg + self.order = order + self.logistic = logistic + self.lowess = lowess + self.robust = robust + self.logx = logx + self.truncate = truncate + self.x_jitter = x_jitter + self.y_jitter = y_jitter + self.color = color + self.label = label + + # Validate the regression options: + if sum((order > 1, logistic, robust, lowess, logx)) > 1: + raise ValueError("Mutually exclusive regression options.") + + # Extract the data vals from the arguments or passed dataframe + self.establish_variables(data, x=x, y=y, units=units, + x_partial=x_partial, y_partial=y_partial) + + # Drop null observations + if dropna: + self.dropna("x", "y", "units", "x_partial", "y_partial") + + # Regress nuisance variables out of the data + if self.x_partial is not None: + self.x = self.regress_out(self.x, self.x_partial) + if self.y_partial is not None: + self.y = self.regress_out(self.y, self.y_partial) + + # Possibly bin the predictor variable, which implies a point estimate + if x_bins is not None: + self.x_estimator = np.mean if x_estimator is None else x_estimator + x_discrete, x_bins = self.bin_predictor(x_bins) + self.x_discrete = x_discrete + else: + self.x_discrete = self.x + + # Disable regression in case of singleton inputs + if len(self.x) <= 1: + self.fit_reg = False + + # Save the range of the x variable for the grid later + if self.fit_reg: + self.x_range = self.x.min(), self.x.max() + + @property + def scatter_data(self): + """Data where each observation is a point.""" + x_j = self.x_jitter + if x_j is None: + x = self.x + else: + x = self.x + np.random.uniform(-x_j, x_j, len(self.x)) + + y_j = self.y_jitter + if y_j is None: + y = self.y + else: + y = self.y + np.random.uniform(-y_j, y_j, len(self.y)) + + return x, y + + @property + def estimate_data(self): + """Data with a point estimate and CI for each discrete x value.""" + x, y = self.x_discrete, self.y + vals = sorted(np.unique(x)) + points, cis = [], [] + + for val in vals: + + # Get the point estimate of the y variable + _y = y[x == val] + est = self.x_estimator(_y) + points.append(est) + + # Compute the confidence interval for this estimate + if self.x_ci is None: + cis.append(None) + else: + units = None + if self.x_ci == "sd": + sd = np.std(_y) + _ci = est - sd, est + sd + else: + if self.units is not None: + units = self.units[x == val] + boots = algo.bootstrap(_y, + func=self.x_estimator, + n_boot=self.n_boot, + units=units, + seed=self.seed) + _ci = utils.ci(boots, self.x_ci) + cis.append(_ci) + + return vals, points, cis + + def fit_regression(self, ax=None, x_range=None, grid=None): + """Fit the regression model.""" + # Create the grid for the regression + if grid is None: + if self.truncate: + x_min, x_max = self.x_range + else: + if ax is None: + x_min, x_max = x_range + else: + x_min, x_max = ax.get_xlim() + grid = np.linspace(x_min, x_max, 100) + ci = self.ci + + # Fit the regression + if self.order > 1: + yhat, yhat_boots = self.fit_poly(grid, self.order) + elif self.logistic: + from statsmodels.genmod.generalized_linear_model import GLM + from statsmodels.genmod.families import Binomial + yhat, yhat_boots = self.fit_statsmodels(grid, GLM, + family=Binomial()) + elif self.lowess: + ci = None + grid, yhat = self.fit_lowess() + elif self.robust: + from statsmodels.robust.robust_linear_model import RLM + yhat, yhat_boots = self.fit_statsmodels(grid, RLM) + elif self.logx: + yhat, yhat_boots = self.fit_logx(grid) + else: + yhat, yhat_boots = self.fit_fast(grid) + + # Compute the confidence interval at each grid point + if ci is None: + err_bands = None + else: + err_bands = utils.ci(yhat_boots, ci, axis=0) + + return grid, yhat, err_bands + + def fit_fast(self, grid): + """Low-level regression and prediction using linear algebra.""" + def reg_func(_x, _y): + return np.linalg.pinv(_x).dot(_y) + + X, y = np.c_[np.ones(len(self.x)), self.x], self.y + grid = np.c_[np.ones(len(grid)), grid] + yhat = grid.dot(reg_func(X, y)) + if self.ci is None: + return yhat, None + + beta_boots = algo.bootstrap(X, y, + func=reg_func, + n_boot=self.n_boot, + units=self.units, + seed=self.seed).T + yhat_boots = grid.dot(beta_boots).T + return yhat, yhat_boots + + def fit_poly(self, grid, order): + """Regression using numpy polyfit for higher-order trends.""" + def reg_func(_x, _y): + return np.polyval(np.polyfit(_x, _y, order), grid) + + x, y = self.x, self.y + yhat = reg_func(x, y) + if self.ci is None: + return yhat, None + + yhat_boots = algo.bootstrap(x, y, + func=reg_func, + n_boot=self.n_boot, + units=self.units, + seed=self.seed) + return yhat, yhat_boots + + def fit_statsmodels(self, grid, model, **kwargs): + """More general regression function using statsmodels objects.""" + import statsmodels.genmod.generalized_linear_model as glm + X, y = np.c_[np.ones(len(self.x)), self.x], self.y + grid = np.c_[np.ones(len(grid)), grid] + + def reg_func(_x, _y): + try: + yhat = model(_y, _x, **kwargs).fit().predict(grid) + except glm.PerfectSeparationError: + yhat = np.empty(len(grid)) + yhat.fill(np.nan) + return yhat + + yhat = reg_func(X, y) + if self.ci is None: + return yhat, None + + yhat_boots = algo.bootstrap(X, y, + func=reg_func, + n_boot=self.n_boot, + units=self.units, + seed=self.seed) + return yhat, yhat_boots + + def fit_lowess(self): + """Fit a locally-weighted regression, which returns its own grid.""" + from statsmodels.nonparametric.smoothers_lowess import lowess + grid, yhat = lowess(self.y, self.x).T + return grid, yhat + + def fit_logx(self, grid): + """Fit the model in log-space.""" + X, y = np.c_[np.ones(len(self.x)), self.x], self.y + grid = np.c_[np.ones(len(grid)), np.log(grid)] + + def reg_func(_x, _y): + _x = np.c_[_x[:, 0], np.log(_x[:, 1])] + return np.linalg.pinv(_x).dot(_y) + + yhat = grid.dot(reg_func(X, y)) + if self.ci is None: + return yhat, None + + beta_boots = algo.bootstrap(X, y, + func=reg_func, + n_boot=self.n_boot, + units=self.units, + seed=self.seed).T + yhat_boots = grid.dot(beta_boots).T + return yhat, yhat_boots + + def bin_predictor(self, bins): + """Discretize a predictor by assigning value to closest bin.""" + x = self.x + if np.isscalar(bins): + percentiles = np.linspace(0, 100, bins + 2)[1:-1] + bins = np.c_[np.percentile(x, percentiles)] + else: + bins = np.c_[np.ravel(bins)] + + dist = distance.cdist(np.c_[x], bins) + x_binned = bins[np.argmin(dist, axis=1)].ravel() + + return x_binned, bins.ravel() + + def regress_out(self, a, b): + """Regress b from a keeping a's original mean.""" + a_mean = a.mean() + a = a - a_mean + b = b - b.mean() + b = np.c_[b] + a_prime = a - b.dot(np.linalg.pinv(b).dot(a)) + return np.asarray(a_prime + a_mean).reshape(a.shape) + + def plot(self, ax, scatter_kws, line_kws): + """Draw the full plot.""" + # Insert the plot label into the correct set of keyword arguments + if self.scatter: + scatter_kws["label"] = self.label + else: + line_kws["label"] = self.label + + # Use the current color cycle state as a default + if self.color is None: + lines, = ax.plot([], []) + color = lines.get_color() + lines.remove() + else: + color = self.color + + # Ensure that color is hex to avoid matplotlib weirdness + color = mpl.colors.rgb2hex(mpl.colors.colorConverter.to_rgb(color)) + + # Let color in keyword arguments override overall plot color + scatter_kws.setdefault("color", color) + line_kws.setdefault("color", color) + + # Draw the constituent plots + if self.scatter: + self.scatterplot(ax, scatter_kws) + + if self.fit_reg: + self.lineplot(ax, line_kws) + + # Label the axes + if hasattr(self.x, "name"): + ax.set_xlabel(self.x.name) + if hasattr(self.y, "name"): + ax.set_ylabel(self.y.name) + + def scatterplot(self, ax, kws): + """Draw the data.""" + # Treat the line-based markers specially, explicitly setting larger + # linewidth than is provided by the seaborn style defaults. + # This would ideally be handled better in matplotlib (i.e., distinguish + # between edgewidth for solid glyphs and linewidth for line glyphs + # but this should do for now. + line_markers = ["1", "2", "3", "4", "+", "x", "|", "_"] + if self.x_estimator is None: + if "marker" in kws and kws["marker"] in line_markers: + lw = mpl.rcParams["lines.linewidth"] + else: + lw = mpl.rcParams["lines.markeredgewidth"] + kws.setdefault("linewidths", lw) + + if not hasattr(kws['color'], 'shape') or kws['color'].shape[1] < 4: + kws.setdefault("alpha", .8) + + x, y = self.scatter_data + ax.scatter(x, y, **kws) + else: + # TODO abstraction + ci_kws = {"color": kws["color"]} + ci_kws["linewidth"] = mpl.rcParams["lines.linewidth"] * 1.75 + kws.setdefault("s", 50) + + xs, ys, cis = self.estimate_data + if [ci for ci in cis if ci is not None]: + for x, ci in zip(xs, cis): + ax.plot([x, x], ci, **ci_kws) + ax.scatter(xs, ys, **kws) + + def lineplot(self, ax, kws): + """Draw the model.""" + # Fit the regression model + grid, yhat, err_bands = self.fit_regression(ax) + edges = grid[0], grid[-1] + + # Get set default aesthetics + fill_color = kws["color"] + lw = kws.pop("lw", mpl.rcParams["lines.linewidth"] * 1.5) + kws.setdefault("linewidth", lw) + + # Draw the regression line and confidence interval + line, = ax.plot(grid, yhat, **kws) + if not self.truncate: + line.sticky_edges.x[:] = edges # Prevent mpl from adding margin + if err_bands is not None: + ax.fill_between(grid, *err_bands, facecolor=fill_color, alpha=.15) + + +_regression_docs = dict( + + model_api=dedent("""\ + There are a number of mutually exclusive options for estimating the + regression model. See the :ref:`tutorial ` for more + information.\ + """), + regplot_vs_lmplot=dedent("""\ + The :func:`regplot` and :func:`lmplot` functions are closely related, but + the former is an axes-level function while the latter is a figure-level + function that combines :func:`regplot` and :class:`FacetGrid`.\ + """), + x_estimator=dedent("""\ + x_estimator : callable that maps vector -> scalar, optional + Apply this function to each unique value of ``x`` and plot the + resulting estimate. This is useful when ``x`` is a discrete variable. + If ``x_ci`` is given, this estimate will be bootstrapped and a + confidence interval will be drawn.\ + """), + x_bins=dedent("""\ + x_bins : int or vector, optional + Bin the ``x`` variable into discrete bins and then estimate the central + tendency and a confidence interval. This binning only influences how + the scatterplot is drawn; the regression is still fit to the original + data. This parameter is interpreted either as the number of + evenly-sized (not necessary spaced) bins or the positions of the bin + centers. When this parameter is used, it implies that the default of + ``x_estimator`` is ``numpy.mean``.\ + """), + x_ci=dedent("""\ + x_ci : "ci", "sd", int in [0, 100] or None, optional + Size of the confidence interval used when plotting a central tendency + for discrete values of ``x``. If ``"ci"``, defer to the value of the + ``ci`` parameter. If ``"sd"``, skip bootstrapping and show the + standard deviation of the observations in each bin.\ + """), + scatter=dedent("""\ + scatter : bool, optional + If ``True``, draw a scatterplot with the underlying observations (or + the ``x_estimator`` values).\ + """), + fit_reg=dedent("""\ + fit_reg : bool, optional + If ``True``, estimate and plot a regression model relating the ``x`` + and ``y`` variables.\ + """), + ci=dedent("""\ + ci : int in [0, 100] or None, optional + Size of the confidence interval for the regression estimate. This will + be drawn using translucent bands around the regression line. The + confidence interval is estimated using a bootstrap; for large + datasets, it may be advisable to avoid that computation by setting + this parameter to None.\ + """), + n_boot=dedent("""\ + n_boot : int, optional + Number of bootstrap resamples used to estimate the ``ci``. The default + value attempts to balance time and stability; you may want to increase + this value for "final" versions of plots.\ + """), + units=dedent("""\ + units : variable name in ``data``, optional + If the ``x`` and ``y`` observations are nested within sampling units, + those can be specified here. This will be taken into account when + computing the confidence intervals by performing a multilevel bootstrap + that resamples both units and observations (within unit). This does not + otherwise influence how the regression is estimated or drawn.\ + """), + seed=dedent("""\ + seed : int, numpy.random.Generator, or numpy.random.RandomState, optional + Seed or random number generator for reproducible bootstrapping.\ + """), + order=dedent("""\ + order : int, optional + If ``order`` is greater than 1, use ``numpy.polyfit`` to estimate a + polynomial regression.\ + """), + logistic=dedent("""\ + logistic : bool, optional + If ``True``, assume that ``y`` is a binary variable and use + ``statsmodels`` to estimate a logistic regression model. Note that this + is substantially more computationally intensive than linear regression, + so you may wish to decrease the number of bootstrap resamples + (``n_boot``) or set ``ci`` to None.\ + """), + lowess=dedent("""\ + lowess : bool, optional + If ``True``, use ``statsmodels`` to estimate a nonparametric lowess + model (locally weighted linear regression). Note that confidence + intervals cannot currently be drawn for this kind of model.\ + """), + robust=dedent("""\ + robust : bool, optional + If ``True``, use ``statsmodels`` to estimate a robust regression. This + will de-weight outliers. Note that this is substantially more + computationally intensive than standard linear regression, so you may + wish to decrease the number of bootstrap resamples (``n_boot``) or set + ``ci`` to None.\ + """), + logx=dedent("""\ + logx : bool, optional + If ``True``, estimate a linear regression of the form y ~ log(x), but + plot the scatterplot and regression model in the input space. Note that + ``x`` must be positive for this to work.\ + """), + xy_partial=dedent("""\ + {x,y}_partial : strings in ``data`` or matrices + Confounding variables to regress out of the ``x`` or ``y`` variables + before plotting.\ + """), + truncate=dedent("""\ + truncate : bool, optional + If ``True``, the regression line is bounded by the data limits. If + ``False``, it extends to the ``x`` axis limits. + """), + xy_jitter=dedent("""\ + {x,y}_jitter : floats, optional + Add uniform random noise of this size to either the ``x`` or ``y`` + variables. The noise is added to a copy of the data after fitting the + regression, and only influences the look of the scatterplot. This can + be helpful when plotting variables that take discrete values.\ + """), + scatter_line_kws=dedent("""\ + {scatter,line}_kws : dictionaries + Additional keyword arguments to pass to ``plt.scatter`` and + ``plt.plot``.\ + """), +) +_regression_docs.update(_facet_docs) + + +@_deprecate_positional_args +def lmplot( + *, + x=None, y=None, + data=None, + hue=None, col=None, row=None, # TODO move before data once * is enforced + palette=None, col_wrap=None, height=5, aspect=1, markers="o", + sharex=None, sharey=None, hue_order=None, col_order=None, row_order=None, + legend=True, legend_out=None, x_estimator=None, x_bins=None, + x_ci="ci", scatter=True, fit_reg=True, ci=95, n_boot=1000, + units=None, seed=None, order=1, logistic=False, lowess=False, + robust=False, logx=False, x_partial=None, y_partial=None, + truncate=True, x_jitter=None, y_jitter=None, scatter_kws=None, + line_kws=None, facet_kws=None, size=None, +): + + # Handle deprecations + if size is not None: + height = size + msg = ("The `size` parameter has been renamed to `height`; " + "please update your code.") + warnings.warn(msg, UserWarning) + + if facet_kws is None: + facet_kws = {} + + def facet_kw_deprecation(key, val): + msg = ( + f"{key} is deprecated from the `lmplot` function signature. " + "Please update your code to pass it using `facet_kws`." + ) + if val is not None: + warnings.warn(msg, UserWarning) + facet_kws[key] = val + + facet_kw_deprecation("sharex", sharex) + facet_kw_deprecation("sharey", sharey) + facet_kw_deprecation("legend_out", legend_out) + + if data is None: + raise TypeError("Missing required keyword argument `data`.") + + # Reduce the dataframe to only needed columns + need_cols = [x, y, hue, col, row, units, x_partial, y_partial] + cols = np.unique([a for a in need_cols if a is not None]).tolist() + data = data[cols] + + # Initialize the grid + facets = FacetGrid( + data, row=row, col=col, hue=hue, + palette=palette, + row_order=row_order, col_order=col_order, hue_order=hue_order, + height=height, aspect=aspect, col_wrap=col_wrap, + **facet_kws, + ) + + # Add the markers here as FacetGrid has figured out how many levels of the + # hue variable are needed and we don't want to duplicate that process + if facets.hue_names is None: + n_markers = 1 + else: + n_markers = len(facets.hue_names) + if not isinstance(markers, list): + markers = [markers] * n_markers + if len(markers) != n_markers: + raise ValueError(("markers must be a singeton or a list of markers " + "for each level of the hue variable")) + facets.hue_kws = {"marker": markers} + + def update_datalim(data, x, y, ax, **kws): + xys = np.asarray(data[[x, y]]).astype(float) + ax.update_datalim(xys, updatey=False) + ax.autoscale_view(scaley=False) + + facets.map_dataframe(update_datalim, x=x, y=y) + + # Draw the regression plot on each facet + regplot_kws = dict( + x_estimator=x_estimator, x_bins=x_bins, x_ci=x_ci, + scatter=scatter, fit_reg=fit_reg, ci=ci, n_boot=n_boot, units=units, + seed=seed, order=order, logistic=logistic, lowess=lowess, + robust=robust, logx=logx, x_partial=x_partial, y_partial=y_partial, + truncate=truncate, x_jitter=x_jitter, y_jitter=y_jitter, + scatter_kws=scatter_kws, line_kws=line_kws, + ) + facets.map_dataframe(regplot, x=x, y=y, **regplot_kws) + facets.set_axis_labels(x, y) + + # Add a legend + if legend and (hue is not None) and (hue not in [col, row]): + facets.add_legend() + return facets + + +lmplot.__doc__ = dedent("""\ + Plot data and regression model fits across a FacetGrid. + + This function combines :func:`regplot` and :class:`FacetGrid`. It is + intended as a convenient interface to fit regression models across + conditional subsets of a dataset. + + When thinking about how to assign variables to different facets, a general + rule is that it makes sense to use ``hue`` for the most important + comparison, followed by ``col`` and ``row``. However, always think about + your particular dataset and the goals of the visualization you are + creating. + + {model_api} + + The parameters to this function span most of the options in + :class:`FacetGrid`, although there may be occasional cases where you will + want to use that class and :func:`regplot` directly. + + Parameters + ---------- + x, y : strings, optional + Input variables; these should be column names in ``data``. + {data} + hue, col, row : strings + Variables that define subsets of the data, which will be drawn on + separate facets in the grid. See the ``*_order`` parameters to control + the order of levels of this variable. + {palette} + {col_wrap} + {height} + {aspect} + markers : matplotlib marker code or list of marker codes, optional + Markers for the scatterplot. If a list, each marker in the list will be + used for each level of the ``hue`` variable. + {share_xy} + + .. deprecated:: 0.12.0 + Pass using the `facet_kws` dictionary. + + {{hue,col,row}}_order : lists, optional + Order for the levels of the faceting variables. By default, this will + be the order that the levels appear in ``data`` or, if the variables + are pandas categoricals, the category order. + legend : bool, optional + If ``True`` and there is a ``hue`` variable, add a legend. + {legend_out} + + .. deprecated:: 0.12.0 + Pass using the `facet_kws` dictionary. + + {x_estimator} + {x_bins} + {x_ci} + {scatter} + {fit_reg} + {ci} + {n_boot} + {units} + {seed} + {order} + {logistic} + {lowess} + {robust} + {logx} + {xy_partial} + {truncate} + {xy_jitter} + {scatter_line_kws} + facet_kws : dict + Dictionary of keyword arguments for :class:`FacetGrid`. + + See Also + -------- + regplot : Plot data and a conditional model fit. + FacetGrid : Subplot grid for plotting conditional relationships. + pairplot : Combine :func:`regplot` and :class:`PairGrid` (when used with + ``kind="reg"``). + + Notes + ----- + + {regplot_vs_lmplot} + + Examples + -------- + + These examples focus on basic regression model plots to exhibit the + various faceting options; see the :func:`regplot` docs for demonstrations + of the other options for plotting the data and models. There are also + other examples for how to manipulate plot using the returned object on + the :class:`FacetGrid` docs. + + Plot a simple linear relationship between two variables: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns; sns.set_theme(color_codes=True) + >>> tips = sns.load_dataset("tips") + >>> g = sns.lmplot(x="total_bill", y="tip", data=tips) + + Condition on a third variable and plot the levels in different colors: + + .. plot:: + :context: close-figs + + >>> g = sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips) + + Use different markers as well as colors so the plot will reproduce to + black-and-white more easily: + + .. plot:: + :context: close-figs + + >>> g = sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips, + ... markers=["o", "x"]) + + Use a different color palette: + + .. plot:: + :context: close-figs + + >>> g = sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips, + ... palette="Set1") + + Map ``hue`` levels to colors with a dictionary: + + .. plot:: + :context: close-figs + + >>> g = sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips, + ... palette=dict(Yes="g", No="m")) + + Plot the levels of the third variable across different columns: + + .. plot:: + :context: close-figs + + >>> g = sns.lmplot(x="total_bill", y="tip", col="smoker", data=tips) + + Change the height and aspect ratio of the facets: + + .. plot:: + :context: close-figs + + >>> g = sns.lmplot(x="size", y="total_bill", hue="day", col="day", + ... data=tips, height=6, aspect=.4, x_jitter=.1) + + Wrap the levels of the column variable into multiple rows: + + .. plot:: + :context: close-figs + + >>> g = sns.lmplot(x="total_bill", y="tip", col="day", hue="day", + ... data=tips, col_wrap=2, height=3) + + Condition on two variables to make a full grid: + + .. plot:: + :context: close-figs + + >>> g = sns.lmplot(x="total_bill", y="tip", row="sex", col="time", + ... data=tips, height=3) + + Use methods on the returned :class:`FacetGrid` instance to further tweak + the plot: + + .. plot:: + :context: close-figs + + >>> g = sns.lmplot(x="total_bill", y="tip", row="sex", col="time", + ... data=tips, height=3) + >>> g = (g.set_axis_labels("Total bill (US Dollars)", "Tip") + ... .set(xlim=(0, 60), ylim=(0, 12), + ... xticks=[10, 30, 50], yticks=[2, 6, 10]) + ... .fig.subplots_adjust(wspace=.02)) + + + + """).format(**_regression_docs) + + +@_deprecate_positional_args +def regplot( + *, + x=None, y=None, + data=None, + x_estimator=None, x_bins=None, x_ci="ci", + scatter=True, fit_reg=True, ci=95, n_boot=1000, units=None, + seed=None, order=1, logistic=False, lowess=False, robust=False, + logx=False, x_partial=None, y_partial=None, + truncate=True, dropna=True, x_jitter=None, y_jitter=None, + label=None, color=None, marker="o", + scatter_kws=None, line_kws=None, ax=None +): + + plotter = _RegressionPlotter(x, y, data, x_estimator, x_bins, x_ci, + scatter, fit_reg, ci, n_boot, units, seed, + order, logistic, lowess, robust, logx, + x_partial, y_partial, truncate, dropna, + x_jitter, y_jitter, color, label) + + if ax is None: + ax = plt.gca() + + scatter_kws = {} if scatter_kws is None else copy.copy(scatter_kws) + scatter_kws["marker"] = marker + line_kws = {} if line_kws is None else copy.copy(line_kws) + plotter.plot(ax, scatter_kws, line_kws) + return ax + + +regplot.__doc__ = dedent("""\ + Plot data and a linear regression model fit. + + {model_api} + + Parameters + ---------- + x, y: string, series, or vector array + Input variables. If strings, these should correspond with column names + in ``data``. When pandas objects are used, axes will be labeled with + the series name. + {data} + {x_estimator} + {x_bins} + {x_ci} + {scatter} + {fit_reg} + {ci} + {n_boot} + {units} + {seed} + {order} + {logistic} + {lowess} + {robust} + {logx} + {xy_partial} + {truncate} + {xy_jitter} + label : string + Label to apply to either the scatterplot or regression line (if + ``scatter`` is ``False``) for use in a legend. + color : matplotlib color + Color to apply to all plot elements; will be superseded by colors + passed in ``scatter_kws`` or ``line_kws``. + marker : matplotlib marker code + Marker to use for the scatterplot glyphs. + {scatter_line_kws} + ax : matplotlib Axes, optional + Axes object to draw the plot onto, otherwise uses the current Axes. + + Returns + ------- + ax : matplotlib Axes + The Axes object containing the plot. + + See Also + -------- + lmplot : Combine :func:`regplot` and :class:`FacetGrid` to plot multiple + linear relationships in a dataset. + jointplot : Combine :func:`regplot` and :class:`JointGrid` (when used with + ``kind="reg"``). + pairplot : Combine :func:`regplot` and :class:`PairGrid` (when used with + ``kind="reg"``). + residplot : Plot the residuals of a linear regression model. + + Notes + ----- + + {regplot_vs_lmplot} + + + It's also easy to combine combine :func:`regplot` and :class:`JointGrid` or + :class:`PairGrid` through the :func:`jointplot` and :func:`pairplot` + functions, although these do not directly accept all of :func:`regplot`'s + parameters. + + Examples + -------- + + Plot the relationship between two variables in a DataFrame: + + .. plot:: + :context: close-figs + + >>> import seaborn as sns; sns.set_theme(color_codes=True) + >>> tips = sns.load_dataset("tips") + >>> ax = sns.regplot(x="total_bill", y="tip", data=tips) + + Plot with two variables defined as numpy arrays; use a different color: + + .. plot:: + :context: close-figs + + >>> import numpy as np; np.random.seed(8) + >>> mean, cov = [4, 6], [(1.5, .7), (.7, 1)] + >>> x, y = np.random.multivariate_normal(mean, cov, 80).T + >>> ax = sns.regplot(x=x, y=y, color="g") + + Plot with two variables defined as pandas Series; use a different marker: + + .. plot:: + :context: close-figs + + >>> import pandas as pd + >>> x, y = pd.Series(x, name="x_var"), pd.Series(y, name="y_var") + >>> ax = sns.regplot(x=x, y=y, marker="+") + + Use a 68% confidence interval, which corresponds with the standard error + of the estimate, and extend the regression line to the axis limits: + + .. plot:: + :context: close-figs + + >>> ax = sns.regplot(x=x, y=y, ci=68, truncate=False) + + Plot with a discrete ``x`` variable and add some jitter: + + .. plot:: + :context: close-figs + + >>> ax = sns.regplot(x="size", y="total_bill", data=tips, x_jitter=.1) + + Plot with a discrete ``x`` variable showing means and confidence intervals + for unique values: + + .. plot:: + :context: close-figs + + >>> ax = sns.regplot(x="size", y="total_bill", data=tips, + ... x_estimator=np.mean) + + Plot with a continuous variable divided into discrete bins: + + .. plot:: + :context: close-figs + + >>> ax = sns.regplot(x=x, y=y, x_bins=4) + + Fit a higher-order polynomial regression: + + .. plot:: + :context: close-figs + + >>> ans = sns.load_dataset("anscombe") + >>> ax = sns.regplot(x="x", y="y", data=ans.loc[ans.dataset == "II"], + ... scatter_kws={{"s": 80}}, + ... order=2, ci=None) + + Fit a robust regression and don't plot a confidence interval: + + .. plot:: + :context: close-figs + + >>> ax = sns.regplot(x="x", y="y", data=ans.loc[ans.dataset == "III"], + ... scatter_kws={{"s": 80}}, + ... robust=True, ci=None) + + Fit a logistic regression; jitter the y variable and use fewer bootstrap + iterations: + + .. plot:: + :context: close-figs + + >>> tips["big_tip"] = (tips.tip / tips.total_bill) > .175 + >>> ax = sns.regplot(x="total_bill", y="big_tip", data=tips, + ... logistic=True, n_boot=500, y_jitter=.03) + + Fit the regression model using log(x): + + .. plot:: + :context: close-figs + + >>> ax = sns.regplot(x="size", y="total_bill", data=tips, + ... x_estimator=np.mean, logx=True) + + """).format(**_regression_docs) + + +@_deprecate_positional_args +def residplot( + *, + x=None, y=None, + data=None, + lowess=False, x_partial=None, y_partial=None, + order=1, robust=False, dropna=True, label=None, color=None, + scatter_kws=None, line_kws=None, ax=None +): + """Plot the residuals of a linear regression. + + This function will regress y on x (possibly as a robust or polynomial + regression) and then draw a scatterplot of the residuals. You can + optionally fit a lowess smoother to the residual plot, which can + help in determining if there is structure to the residuals. + + Parameters + ---------- + x : vector or string + Data or column name in `data` for the predictor variable. + y : vector or string + Data or column name in `data` for the response variable. + data : DataFrame, optional + DataFrame to use if `x` and `y` are column names. + lowess : boolean, optional + Fit a lowess smoother to the residual scatterplot. + {x, y}_partial : matrix or string(s) , optional + Matrix with same first dimension as `x`, or column name(s) in `data`. + These variables are treated as confounding and are removed from + the `x` or `y` variables before plotting. + order : int, optional + Order of the polynomial to fit when calculating the residuals. + robust : boolean, optional + Fit a robust linear regression when calculating the residuals. + dropna : boolean, optional + If True, ignore observations with missing data when fitting and + plotting. + label : string, optional + Label that will be used in any plot legends. + color : matplotlib color, optional + Color to use for all elements of the plot. + {scatter, line}_kws : dictionaries, optional + Additional keyword arguments passed to scatter() and plot() for drawing + the components of the plot. + ax : matplotlib axis, optional + Plot into this axis, otherwise grab the current axis or make a new + one if not existing. + + Returns + ------- + ax: matplotlib axes + Axes with the regression plot. + + See Also + -------- + regplot : Plot a simple linear regression model. + jointplot : Draw a :func:`residplot` with univariate marginal distributions + (when used with ``kind="resid"``). + + """ + plotter = _RegressionPlotter(x, y, data, ci=None, + order=order, robust=robust, + x_partial=x_partial, y_partial=y_partial, + dropna=dropna, color=color, label=label) + + if ax is None: + ax = plt.gca() + + # Calculate the residual from a linear regression + _, yhat, _ = plotter.fit_regression(grid=plotter.x) + plotter.y = plotter.y - yhat + + # Set the regression option on the plotter + if lowess: + plotter.lowess = True + else: + plotter.fit_reg = False + + # Plot a horizontal line at 0 + ax.axhline(0, ls=":", c=".2") + + # Draw the scatterplot + scatter_kws = {} if scatter_kws is None else scatter_kws.copy() + line_kws = {} if line_kws is None else line_kws.copy() + plotter.plot(ax, scatter_kws, line_kws) + return ax diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/relational.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/relational.py new file mode 100644 index 0000000000000000000000000000000000000000..82de6534d84086a5ef02b639840a61690efc0585 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/relational.py @@ -0,0 +1,1157 @@ +import warnings + +import numpy as np +import pandas as pd +import matplotlib as mpl +import matplotlib.pyplot as plt + +from ._core import ( + VectorPlotter, +) +from .utils import ( + ci_to_errsize, + locator_to_legend_entries, + adjust_legend_subtitles, + ci as ci_func +) +from .algorithms import bootstrap +from .axisgrid import FacetGrid, _facet_docs +from ._decorators import _deprecate_positional_args +from ._docstrings import ( + DocstringComponents, + _core_docs, +) + + +__all__ = ["relplot", "scatterplot", "lineplot"] + + +_relational_narrative = DocstringComponents(dict( + + # --- Introductory prose + main_api=""" +The relationship between ``x`` and ``y`` can be shown for different subsets +of the data using the ``hue``, ``size``, and ``style`` parameters. These +parameters control what visual semantics are used to identify the different +subsets. It is possible to show up to three dimensions independently by +using all three semantic types, but this style of plot can be hard to +interpret and is often ineffective. Using redundant semantics (i.e. both +``hue`` and ``style`` for the same variable) can be helpful for making +graphics more accessible. + +See the :ref:`tutorial ` for more information. + """, + + relational_semantic=""" +The default treatment of the ``hue`` (and to a lesser extent, ``size``) +semantic, if present, depends on whether the variable is inferred to +represent "numeric" or "categorical" data. In particular, numeric variables +are represented with a sequential colormap by default, and the legend +entries show regular "ticks" with values that may or may not exist in the +data. This behavior can be controlled through various parameters, as +described and illustrated below. + """, +)) + +_relational_docs = dict( + + # --- Shared function parameters + data_vars=""" +x, y : names of variables in ``data`` or vector data + Input data variables; must be numeric. Can pass data directly or + reference columns in ``data``. + """, + data=""" +data : DataFrame, array, or list of arrays + Input data structure. If ``x`` and ``y`` are specified as names, this + should be a "long-form" DataFrame containing those columns. Otherwise + it is treated as "wide-form" data and grouping variables are ignored. + See the examples for the various ways this parameter can be specified + and the different effects of each. + """, + palette=""" +palette : string, list, dict, or matplotlib colormap + An object that determines how colors are chosen when ``hue`` is used. + It can be the name of a seaborn palette or matplotlib colormap, a list + of colors (anything matplotlib understands), a dict mapping levels + of the ``hue`` variable to colors, or a matplotlib colormap object. + """, + hue_order=""" +hue_order : list + Specified order for the appearance of the ``hue`` variable levels, + otherwise they are determined from the data. Not relevant when the + ``hue`` variable is numeric. + """, + hue_norm=""" +hue_norm : tuple or :class:`matplotlib.colors.Normalize` object + Normalization in data units for colormap applied to the ``hue`` + variable when it is numeric. Not relevant if it is categorical. + """, + sizes=""" +sizes : list, dict, or tuple + An object that determines how sizes are chosen when ``size`` is used. + It can always be a list of size values or a dict mapping levels of the + ``size`` variable to sizes. When ``size`` is numeric, it can also be + a tuple specifying the minimum and maximum size to use such that other + values are normalized within this range. + """, + size_order=""" +size_order : list + Specified order for appearance of the ``size`` variable levels, + otherwise they are determined from the data. Not relevant when the + ``size`` variable is numeric. + """, + size_norm=""" +size_norm : tuple or Normalize object + Normalization in data units for scaling plot objects when the + ``size`` variable is numeric. + """, + dashes=""" +dashes : boolean, list, or dictionary + Object determining how to draw the lines for different levels of the + ``style`` variable. Setting to ``True`` will use default dash codes, or + you can pass a list of dash codes or a dictionary mapping levels of the + ``style`` variable to dash codes. Setting to ``False`` will use solid + lines for all subsets. Dashes are specified as in matplotlib: a tuple + of ``(segment, gap)`` lengths, or an empty string to draw a solid line. + """, + markers=""" +markers : boolean, list, or dictionary + Object determining how to draw the markers for different levels of the + ``style`` variable. Setting to ``True`` will use default markers, or + you can pass a list of markers or a dictionary mapping levels of the + ``style`` variable to markers. Setting to ``False`` will draw + marker-less lines. Markers are specified as in matplotlib. + """, + style_order=""" +style_order : list + Specified order for appearance of the ``style`` variable levels + otherwise they are determined from the data. Not relevant when the + ``style`` variable is numeric. + """, + units=""" +units : vector or key in ``data`` + Grouping variable identifying sampling units. When used, a separate + line will be drawn for each unit with appropriate semantics, but no + legend entry will be added. Useful for showing distribution of + experimental replicates when exact identities are not needed. + """, + estimator=""" +estimator : name of pandas method or callable or None + Method for aggregating across multiple observations of the ``y`` + variable at the same ``x`` level. If ``None``, all observations will + be drawn. + """, + ci=""" +ci : int or "sd" or None + Size of the confidence interval to draw when aggregating with an + estimator. "sd" means to draw the standard deviation of the data. + Setting to ``None`` will skip bootstrapping. + """, + n_boot=""" +n_boot : int + Number of bootstraps to use for computing the confidence interval. + """, + seed=""" +seed : int, numpy.random.Generator, or numpy.random.RandomState + Seed or random number generator for reproducible bootstrapping. + """, + legend=""" +legend : "auto", "brief", "full", or False + How to draw the legend. If "brief", numeric ``hue`` and ``size`` + variables will be represented with a sample of evenly spaced values. + If "full", every group will get an entry in the legend. If "auto", + choose between brief or full representation based on number of levels. + If ``False``, no legend data is added and no legend is drawn. + """, + ax_in=""" +ax : matplotlib Axes + Axes object to draw the plot onto, otherwise uses the current Axes. + """, + ax_out=""" +ax : matplotlib Axes + Returns the Axes object with the plot drawn onto it. + """, + +) + + +_param_docs = DocstringComponents.from_nested_components( + core=_core_docs["params"], + facets=DocstringComponents(_facet_docs), + rel=DocstringComponents(_relational_docs), +) + + +class _RelationalPlotter(VectorPlotter): + + wide_structure = { + "x": "@index", "y": "@values", "hue": "@columns", "style": "@columns", + } + + # TODO where best to define default parameters? + sort = True + + def add_legend_data(self, ax): + """Add labeled artists to represent the different plot semantics.""" + verbosity = self.legend + if isinstance(verbosity, str) and verbosity not in ["auto", "brief", "full"]: + err = "`legend` must be 'auto', 'brief', 'full', or a boolean." + raise ValueError(err) + elif verbosity is True: + verbosity = "auto" + + legend_kwargs = {} + keys = [] + + # Assign a legend title if there is only going to be one sub-legend, + # otherwise, subtitles will be inserted into the texts list with an + # invisible handle (which is a hack) + titles = { + title for title in + (self.variables.get(v, None) for v in ["hue", "size", "style"]) + if title is not None + } + if len(titles) == 1: + legend_title = titles.pop() + else: + legend_title = "" + + title_kws = dict( + visible=False, color="w", s=0, linewidth=0, marker="", dashes="" + ) + + def update(var_name, val_name, **kws): + + key = var_name, val_name + if key in legend_kwargs: + legend_kwargs[key].update(**kws) + else: + keys.append(key) + + legend_kwargs[key] = dict(**kws) + + # Define the maximum number of ticks to use for "brief" legends + brief_ticks = 6 + + # -- Add a legend for hue semantics + brief_hue = self._hue_map.map_type == "numeric" and ( + verbosity == "brief" + or (verbosity == "auto" and len(self._hue_map.levels) > brief_ticks) + ) + if brief_hue: + if isinstance(self._hue_map.norm, mpl.colors.LogNorm): + locator = mpl.ticker.LogLocator(numticks=brief_ticks) + else: + locator = mpl.ticker.MaxNLocator(nbins=brief_ticks) + limits = min(self._hue_map.levels), max(self._hue_map.levels) + hue_levels, hue_formatted_levels = locator_to_legend_entries( + locator, limits, self.plot_data["hue"].infer_objects().dtype + ) + elif self._hue_map.levels is None: + hue_levels = hue_formatted_levels = [] + else: + hue_levels = hue_formatted_levels = self._hue_map.levels + + # Add the hue semantic subtitle + if not legend_title and self.variables.get("hue", None) is not None: + update((self.variables["hue"], "title"), + self.variables["hue"], **title_kws) + + # Add the hue semantic labels + for level, formatted_level in zip(hue_levels, hue_formatted_levels): + if level is not None: + color = self._hue_map(level) + update(self.variables["hue"], formatted_level, color=color) + + # -- Add a legend for size semantics + brief_size = self._size_map.map_type == "numeric" and ( + verbosity == "brief" + or (verbosity == "auto" and len(self._size_map.levels) > brief_ticks) + ) + if brief_size: + # Define how ticks will interpolate between the min/max data values + if isinstance(self._size_map.norm, mpl.colors.LogNorm): + locator = mpl.ticker.LogLocator(numticks=brief_ticks) + else: + locator = mpl.ticker.MaxNLocator(nbins=brief_ticks) + # Define the min/max data values + limits = min(self._size_map.levels), max(self._size_map.levels) + size_levels, size_formatted_levels = locator_to_legend_entries( + locator, limits, self.plot_data["size"].infer_objects().dtype + ) + elif self._size_map.levels is None: + size_levels = size_formatted_levels = [] + else: + size_levels = size_formatted_levels = self._size_map.levels + + # Add the size semantic subtitle + if not legend_title and self.variables.get("size", None) is not None: + update((self.variables["size"], "title"), + self.variables["size"], **title_kws) + + # Add the size semantic labels + for level, formatted_level in zip(size_levels, size_formatted_levels): + if level is not None: + size = self._size_map(level) + update( + self.variables["size"], + formatted_level, + linewidth=size, + s=size, + ) + + # -- Add a legend for style semantics + + # Add the style semantic title + if not legend_title and self.variables.get("style", None) is not None: + update((self.variables["style"], "title"), + self.variables["style"], **title_kws) + + # Add the style semantic labels + if self._style_map.levels is not None: + for level in self._style_map.levels: + if level is not None: + attrs = self._style_map(level) + update( + self.variables["style"], + level, + marker=attrs.get("marker", ""), + dashes=attrs.get("dashes", ""), + ) + + func = getattr(ax, self._legend_func) + + legend_data = {} + legend_order = [] + + for key in keys: + + _, label = key + kws = legend_kwargs[key] + kws.setdefault("color", ".2") + use_kws = {} + for attr in self._legend_attributes + ["visible"]: + if attr in kws: + use_kws[attr] = kws[attr] + artist = func([], [], label=label, **use_kws) + if self._legend_func == "plot": + artist = artist[0] + legend_data[key] = artist + legend_order.append(key) + + self.legend_title = legend_title + self.legend_data = legend_data + self.legend_order = legend_order + + +class _LinePlotter(_RelationalPlotter): + + _legend_attributes = ["color", "linewidth", "marker", "dashes"] + _legend_func = "plot" + + def __init__( + self, *, + data=None, variables={}, + estimator=None, ci=None, n_boot=None, seed=None, + sort=True, err_style=None, err_kws=None, legend=None + ): + + # TODO this is messy, we want the mapping to be agnoistic about + # the kind of plot to draw, but for the time being we need to set + # this information so the SizeMapping can use it + self._default_size_range = ( + np.r_[.5, 2] * mpl.rcParams["lines.linewidth"] + ) + + super().__init__(data=data, variables=variables) + + self.estimator = estimator + self.ci = ci + self.n_boot = n_boot + self.seed = seed + self.sort = sort + self.err_style = err_style + self.err_kws = {} if err_kws is None else err_kws + + self.legend = legend + + def aggregate(self, vals, grouper, units=None): + """Compute an estimate and confidence interval using grouper.""" + func = self.estimator + ci = self.ci + n_boot = self.n_boot + seed = self.seed + + # Define a "null" CI for when we only have one value + null_ci = pd.Series(index=["low", "high"], dtype=float) + + # Function to bootstrap in the context of a pandas group by + def bootstrapped_cis(vals): + + if len(vals) <= 1: + return null_ci + + boots = bootstrap(vals, func=func, n_boot=n_boot, seed=seed) + cis = ci_func(boots, ci) + return pd.Series(cis, ["low", "high"]) + + # Group and get the aggregation estimate + grouped = vals.groupby(grouper, sort=self.sort) + est = grouped.agg(func) + + # Exit early if we don't want a confidence interval + if ci is None: + return est.index, est, None + + # Compute the error bar extents + if ci == "sd": + sd = grouped.std() + cis = pd.DataFrame(np.c_[est - sd, est + sd], + index=est.index, + columns=["low", "high"]).stack() + else: + cis = grouped.apply(bootstrapped_cis) + + # Unpack the CIs into "wide" format for plotting + if cis.notnull().any(): + cis = cis.unstack().reindex(est.index) + else: + cis = None + + return est.index, est, cis + + def plot(self, ax, kws): + """Draw the plot onto an axes, passing matplotlib kwargs.""" + + # Draw a test plot, using the passed in kwargs. The goal here is to + # honor both (a) the current state of the plot cycler and (b) the + # specified kwargs on all the lines we will draw, overriding when + # relevant with the data semantics. Note that we won't cycle + # internally; in other words, if ``hue`` is not used, all elements will + # have the same color, but they will have the color that you would have + # gotten from the corresponding matplotlib function, and calling the + # function will advance the axes property cycle. + + scout, = ax.plot([], [], **kws) + + orig_color = kws.pop("color", scout.get_color()) + orig_marker = kws.pop("marker", scout.get_marker()) + orig_linewidth = kws.pop("linewidth", + kws.pop("lw", scout.get_linewidth())) + + # Note that scout.get_linestyle() is` not correct as of mpl 3.2 + orig_linestyle = kws.pop("linestyle", kws.pop("ls", None)) + + kws.setdefault("markeredgewidth", kws.pop("mew", .75)) + kws.setdefault("markeredgecolor", kws.pop("mec", "w")) + + scout.remove() + + # Set default error kwargs + err_kws = self.err_kws.copy() + if self.err_style == "band": + err_kws.setdefault("alpha", .2) + elif self.err_style == "bars": + pass + elif self.err_style is not None: + err = "`err_style` must be 'band' or 'bars', not {}" + raise ValueError(err.format(self.err_style)) + + # Set the default artist keywords + kws.update(dict( + color=orig_color, + marker=orig_marker, + linewidth=orig_linewidth, + linestyle=orig_linestyle, + )) + + # Loop over the semantic subsets and add to the plot + grouping_vars = "hue", "size", "style" + for sub_vars, sub_data in self.iter_data(grouping_vars, from_comp_data=True): + + if self.sort: + sort_vars = ["units", "x", "y"] + sort_cols = [var for var in sort_vars if var in self.variables] + sub_data = sub_data.sort_values(sort_cols) + + # TODO + # How to handle NA? We don't want NA to propagate through to the + # estimate/CI when some values are present, but we would also like + # matplotlib to show "gaps" in the line when all values are missing. + # This is straightforward absent aggregation, but complicated with it. + sub_data = sub_data.dropna() + + # Due to the original design, code below was written assuming that + # sub_data always has x, y, and units columns, which may be empty. + # Adding this here to avoid otherwise disruptive changes, but it + # could get removed if the rest of the logic is sorted out + null = pd.Series(index=sub_data.index, dtype=float) + + x = sub_data.get("x", null) + y = sub_data.get("y", null) + u = sub_data.get("units", null) + + if self.estimator is not None: + if "units" in self.variables: + err = "estimator must be None when specifying units" + raise ValueError(err) + x, y, y_ci = self.aggregate(y, x, u) + else: + y_ci = None + + if "hue" in sub_vars: + kws["color"] = self._hue_map(sub_vars["hue"]) + if "size" in sub_vars: + kws["linewidth"] = self._size_map(sub_vars["size"]) + if "style" in sub_vars: + attributes = self._style_map(sub_vars["style"]) + if "dashes" in attributes: + kws["dashes"] = attributes["dashes"] + if "marker" in attributes: + kws["marker"] = attributes["marker"] + + line, = ax.plot([], [], **kws) + line_color = line.get_color() + line_alpha = line.get_alpha() + line_capstyle = line.get_solid_capstyle() + line.remove() + + # --- Draw the main line + + x, y = np.asarray(x), np.asarray(y) + + if "units" in self.variables: + for u_i in u.unique(): + rows = np.asarray(u == u_i) + ax.plot(x[rows], y[rows], **kws) + else: + line, = ax.plot(x, y, **kws) + + # --- Draw the confidence intervals + + if y_ci is not None: + + low, high = np.asarray(y_ci["low"]), np.asarray(y_ci["high"]) + + if self.err_style == "band": + + ax.fill_between(x, low, high, color=line_color, **err_kws) + + elif self.err_style == "bars": + + y_err = ci_to_errsize((low, high), y) + ebars = ax.errorbar(x, y, y_err, linestyle="", + color=line_color, alpha=line_alpha, + **err_kws) + + # Set the capstyle properly on the error bars + for obj in ebars.get_children(): + try: + obj.set_capstyle(line_capstyle) + except AttributeError: + # Does not exist on mpl < 2.2 + pass + + # Finalize the axes details + self._add_axis_labels(ax) + if self.legend: + self.add_legend_data(ax) + handles, _ = ax.get_legend_handles_labels() + if handles: + legend = ax.legend(title=self.legend_title) + adjust_legend_subtitles(legend) + + +class _ScatterPlotter(_RelationalPlotter): + + _legend_attributes = ["color", "s", "marker"] + _legend_func = "scatter" + + def __init__( + self, *, + data=None, variables={}, + x_bins=None, y_bins=None, + estimator=None, ci=None, n_boot=None, + alpha=None, x_jitter=None, y_jitter=None, + legend=None + ): + + # TODO this is messy, we want the mapping to be agnoistic about + # the kind of plot to draw, but for the time being we need to set + # this information so the SizeMapping can use it + self._default_size_range = ( + np.r_[.5, 2] * np.square(mpl.rcParams["lines.markersize"]) + ) + + super().__init__(data=data, variables=variables) + + self.alpha = alpha + self.legend = legend + + def plot(self, ax, kws): + + # Draw a test plot, using the passed in kwargs. The goal here is to + # honor both (a) the current state of the plot cycler and (b) the + # specified kwargs on all the lines we will draw, overriding when + # relevant with the data semantics. Note that we won't cycle + # internally; in other words, if ``hue`` is not used, all elements will + # have the same color, but they will have the color that you would have + # gotten from the corresponding matplotlib function, and calling the + # function will advance the axes property cycle. + + scout_size = max( + np.atleast_1d(kws.get("s", [])).shape[0], + np.atleast_1d(kws.get("c", [])).shape[0], + ) + scout_x = scout_y = np.full(scout_size, np.nan) + scout = ax.scatter(scout_x, scout_y, **kws) + s = kws.pop("s", scout.get_sizes()) + c = kws.pop("c", scout.get_facecolors()) + scout.remove() + + kws.pop("color", None) # TODO is this optimal? + + # --- Determine the visual attributes of the plot + + data = self.plot_data[list(self.variables)].dropna() + if not data.size: + return + + # Define the vectors of x and y positions + empty = np.full(len(data), np.nan) + x = data.get("x", empty) + y = data.get("y", empty) + + # Apply the mapping from semantic variables to artist attributes + if "hue" in self.variables: + c = self._hue_map(data["hue"]) + + if "size" in self.variables: + s = self._size_map(data["size"]) + + # Set defaults for other visual attributes + kws.setdefault("linewidth", .08 * np.sqrt(np.percentile(s, 10))) + + if "style" in self.variables: + # Use a representative marker so scatter sets the edgecolor + # properly for line art markers. We currently enforce either + # all or none line art so this works. + example_level = self._style_map.levels[0] + example_marker = self._style_map(example_level, "marker") + kws.setdefault("marker", example_marker) + + # Conditionally set the marker edgecolor based on whether the marker is "filled" + # See https://github.com/matplotlib/matplotlib/issues/17849 for context + m = kws.get("marker", mpl.rcParams.get("marker", "o")) + if not isinstance(m, mpl.markers.MarkerStyle): + m = mpl.markers.MarkerStyle(m) + if m.is_filled(): + kws.setdefault("edgecolor", "w") + + # TODO this makes it impossible to vary alpha with hue which might + # otherwise be useful? Should we just pass None? + kws["alpha"] = 1 if self.alpha == "auto" else self.alpha + + # Draw the scatter plot + args = np.asarray(x), np.asarray(y), np.asarray(s), np.asarray(c) + points = ax.scatter(*args, **kws) + + # Update the paths to get different marker shapes. + # This has to be done here because ax.scatter allows varying sizes + # and colors but only a single marker shape per call. + if "style" in self.variables: + p = [self._style_map(val, "path") for val in data["style"]] + points.set_paths(p) + + # Finalize the axes details + self._add_axis_labels(ax) + if self.legend: + self.add_legend_data(ax) + handles, _ = ax.get_legend_handles_labels() + if handles: + legend = ax.legend(title=self.legend_title) + adjust_legend_subtitles(legend) + + +@_deprecate_positional_args +def lineplot( + *, + x=None, y=None, + hue=None, size=None, style=None, + data=None, + palette=None, hue_order=None, hue_norm=None, + sizes=None, size_order=None, size_norm=None, + dashes=True, markers=None, style_order=None, + units=None, estimator="mean", ci=95, n_boot=1000, seed=None, + sort=True, err_style="band", err_kws=None, + legend="auto", ax=None, **kwargs +): + + variables = _LinePlotter.get_semantics(locals()) + p = _LinePlotter( + data=data, variables=variables, + estimator=estimator, ci=ci, n_boot=n_boot, seed=seed, + sort=sort, err_style=err_style, err_kws=err_kws, legend=legend, + ) + + p.map_hue(palette=palette, order=hue_order, norm=hue_norm) + p.map_size(sizes=sizes, order=size_order, norm=size_norm) + p.map_style(markers=markers, dashes=dashes, order=style_order) + + if ax is None: + ax = plt.gca() + + if not p.has_xy_data: + return ax + + p._attach(ax) + + p.plot(ax, kwargs) + return ax + + +lineplot.__doc__ = """\ +Draw a line plot with possibility of several semantic groupings. + +{narrative.main_api} + +{narrative.relational_semantic} + +By default, the plot aggregates over multiple ``y`` values at each value of +``x`` and shows an estimate of the central tendency and a confidence +interval for that estimate. + +Parameters +---------- +{params.core.xy} +hue : vector or key in ``data`` + Grouping variable that will produce lines with different colors. + Can be either categorical or numeric, although color mapping will + behave differently in latter case. +size : vector or key in ``data`` + Grouping variable that will produce lines with different widths. + Can be either categorical or numeric, although size mapping will + behave differently in latter case. +style : vector or key in ``data`` + Grouping variable that will produce lines with different dashes + and/or markers. Can have a numeric dtype but will always be treated + as categorical. +{params.core.data} +{params.core.palette} +{params.core.hue_order} +{params.core.hue_norm} +{params.rel.sizes} +{params.rel.size_order} +{params.rel.size_norm} +{params.rel.dashes} +{params.rel.markers} +{params.rel.style_order} +{params.rel.units} +{params.rel.estimator} +{params.rel.ci} +{params.rel.n_boot} +{params.rel.seed} +sort : boolean + If True, the data will be sorted by the x and y variables, otherwise + lines will connect points in the order they appear in the dataset. +err_style : "band" or "bars" + Whether to draw the confidence intervals with translucent error bands + or discrete error bars. +err_kws : dict of keyword arguments + Additional paramters to control the aesthetics of the error bars. The + kwargs are passed either to :meth:`matplotlib.axes.Axes.fill_between` + or :meth:`matplotlib.axes.Axes.errorbar`, depending on ``err_style``. +{params.rel.legend} +{params.core.ax} +kwargs : key, value mappings + Other keyword arguments are passed down to + :meth:`matplotlib.axes.Axes.plot`. + +Returns +------- +{returns.ax} + +See Also +-------- +{seealso.scatterplot} +{seealso.pointplot} + +Examples +-------- + +.. include:: ../docstrings/lineplot.rst + +""".format( + narrative=_relational_narrative, + params=_param_docs, + returns=_core_docs["returns"], + seealso=_core_docs["seealso"], +) + + +@_deprecate_positional_args +def scatterplot( + *, + x=None, y=None, + hue=None, style=None, size=None, data=None, + palette=None, hue_order=None, hue_norm=None, + sizes=None, size_order=None, size_norm=None, + markers=True, style_order=None, + x_bins=None, y_bins=None, + units=None, estimator=None, ci=95, n_boot=1000, + alpha=None, x_jitter=None, y_jitter=None, + legend="auto", ax=None, **kwargs +): + + variables = _ScatterPlotter.get_semantics(locals()) + p = _ScatterPlotter( + data=data, variables=variables, + x_bins=x_bins, y_bins=y_bins, + estimator=estimator, ci=ci, n_boot=n_boot, + alpha=alpha, x_jitter=x_jitter, y_jitter=y_jitter, legend=legend, + ) + + p.map_hue(palette=palette, order=hue_order, norm=hue_norm) + p.map_size(sizes=sizes, order=size_order, norm=size_norm) + p.map_style(markers=markers, order=style_order) + + if ax is None: + ax = plt.gca() + + if not p.has_xy_data: + return ax + + p._attach(ax) + + p.plot(ax, kwargs) + + return ax + + +scatterplot.__doc__ = """\ +Draw a scatter plot with possibility of several semantic groupings. + +{narrative.main_api} + +{narrative.relational_semantic} + +Parameters +---------- +{params.core.xy} +hue : vector or key in ``data`` + Grouping variable that will produce points with different colors. + Can be either categorical or numeric, although color mapping will + behave differently in latter case. +size : vector or key in ``data`` + Grouping variable that will produce points with different sizes. + Can be either categorical or numeric, although size mapping will + behave differently in latter case. +style : vector or key in ``data`` + Grouping variable that will produce points with different markers. + Can have a numeric dtype but will always be treated as categorical. +{params.core.data} +{params.core.palette} +{params.core.hue_order} +{params.core.hue_norm} +{params.rel.sizes} +{params.rel.size_order} +{params.rel.size_norm} +{params.rel.markers} +{params.rel.style_order} +{{x,y}}_bins : lists or arrays or functions + *Currently non-functional.* +{params.rel.units} + *Currently non-functional.* +{params.rel.estimator} + *Currently non-functional.* +{params.rel.ci} + *Currently non-functional.* +{params.rel.n_boot} + *Currently non-functional.* +alpha : float + Proportional opacity of the points. +{{x,y}}_jitter : booleans or floats + *Currently non-functional.* +{params.rel.legend} +{params.core.ax} +kwargs : key, value mappings + Other keyword arguments are passed down to + :meth:`matplotlib.axes.Axes.scatter`. + +Returns +------- +{returns.ax} + +See Also +-------- +{seealso.lineplot} +{seealso.stripplot} +{seealso.swarmplot} + +Examples +-------- + +.. include:: ../docstrings/scatterplot.rst + +""".format( + narrative=_relational_narrative, + params=_param_docs, + returns=_core_docs["returns"], + seealso=_core_docs["seealso"], +) + + +@_deprecate_positional_args +def relplot( + *, + x=None, y=None, + hue=None, size=None, style=None, data=None, + row=None, col=None, + col_wrap=None, row_order=None, col_order=None, + palette=None, hue_order=None, hue_norm=None, + sizes=None, size_order=None, size_norm=None, + markers=None, dashes=None, style_order=None, + legend="auto", kind="scatter", + height=5, aspect=1, facet_kws=None, + units=None, + **kwargs +): + + if kind == "scatter": + + plotter = _ScatterPlotter + func = scatterplot + markers = True if markers is None else markers + + elif kind == "line": + + plotter = _LinePlotter + func = lineplot + dashes = True if dashes is None else dashes + + else: + err = "Plot kind {} not recognized".format(kind) + raise ValueError(err) + + # Check for attempt to plot onto specific axes and warn + if "ax" in kwargs: + msg = ( + "relplot is a figure-level function and does not accept " + "the `ax` parameter. You may wish to try {}".format(kind + "plot") + ) + warnings.warn(msg, UserWarning) + kwargs.pop("ax") + + # Use the full dataset to map the semantics + p = plotter( + data=data, + variables=plotter.get_semantics(locals()), + legend=legend, + ) + p.map_hue(palette=palette, order=hue_order, norm=hue_norm) + p.map_size(sizes=sizes, order=size_order, norm=size_norm) + p.map_style(markers=markers, dashes=dashes, order=style_order) + + # Extract the semantic mappings + if "hue" in p.variables: + palette = p._hue_map.lookup_table + hue_order = p._hue_map.levels + hue_norm = p._hue_map.norm + else: + palette = hue_order = hue_norm = None + + if "size" in p.variables: + sizes = p._size_map.lookup_table + size_order = p._size_map.levels + size_norm = p._size_map.norm + + if "style" in p.variables: + style_order = p._style_map.levels + if markers: + markers = {k: p._style_map(k, "marker") for k in style_order} + else: + markers = None + if dashes: + dashes = {k: p._style_map(k, "dashes") for k in style_order} + else: + dashes = None + else: + markers = dashes = style_order = None + + # Now extract the data that would be used to draw a single plot + variables = p.variables + plot_data = p.plot_data + plot_semantics = p.semantics + + # Define the common plotting parameters + plot_kws = dict( + palette=palette, hue_order=hue_order, hue_norm=hue_norm, + sizes=sizes, size_order=size_order, size_norm=size_norm, + markers=markers, dashes=dashes, style_order=style_order, + legend=False, + ) + plot_kws.update(kwargs) + if kind == "scatter": + plot_kws.pop("dashes") + + # Add the grid semantics onto the plotter + grid_semantics = "row", "col" + p.semantics = plot_semantics + grid_semantics + p.assign_variables( + data=data, + variables=dict( + x=x, y=y, + hue=hue, size=size, style=style, units=units, + row=row, col=col, + ), + ) + + # Define the named variables for plotting on each facet + # Rename the variables with a leading underscore to avoid + # collisions with faceting variable names + plot_variables = {v: f"_{v}" for v in variables} + plot_kws.update(plot_variables) + + # Pass the row/col variables to FacetGrid with their original + # names so that the axes titles render correctly + grid_kws = {v: p.variables.get(v, None) for v in grid_semantics} + + # Rename the columns of the plot_data structure appropriately + new_cols = plot_variables.copy() + new_cols.update(grid_kws) + full_data = p.plot_data.rename(columns=new_cols) + + # Set up the FacetGrid object + facet_kws = {} if facet_kws is None else facet_kws.copy() + g = FacetGrid( + data=full_data.dropna(axis=1, how="all"), + **grid_kws, + col_wrap=col_wrap, row_order=row_order, col_order=col_order, + height=height, aspect=aspect, dropna=False, + **facet_kws + ) + + # Draw the plot + g.map_dataframe(func, **plot_kws) + + # Label the axes + g.set_axis_labels( + variables.get("x", None), variables.get("y", None) + ) + + # Show the legend + if legend: + # Replace the original plot data so the legend uses + # numeric data with the correct type + p.plot_data = plot_data + p.add_legend_data(g.axes.flat[0]) + if p.legend_data: + g.add_legend(legend_data=p.legend_data, + label_order=p.legend_order, + title=p.legend_title, + adjust_subtitles=True) + + # Rename the columns of the FacetGrid's `data` attribute + # to match the original column names + orig_cols = { + f"_{k}": f"_{k}_" if v is None else v for k, v in variables.items() + } + grid_data = g.data.rename(columns=orig_cols) + if data is not None and (x is not None or y is not None): + if not isinstance(data, pd.DataFrame): + data = pd.DataFrame(data) + g.data = pd.merge( + data, + grid_data[grid_data.columns.difference(data.columns)], + left_index=True, + right_index=True, + ) + else: + g.data = grid_data + + return g + + +relplot.__doc__ = """\ +Figure-level interface for drawing relational plots onto a FacetGrid. + +This function provides access to several different axes-level functions +that show the relationship between two variables with semantic mappings +of subsets. The ``kind`` parameter selects the underlying axes-level +function to use: + +- :func:`scatterplot` (with ``kind="scatter"``; the default) +- :func:`lineplot` (with ``kind="line"``) + +Extra keyword arguments are passed to the underlying function, so you +should refer to the documentation for each to see kind-specific options. + +{narrative.main_api} + +{narrative.relational_semantic} + +After plotting, the :class:`FacetGrid` with the plot is returned and can +be used directly to tweak supporting plot details or add other layers. + +Note that, unlike when using the underlying plotting functions directly, +data must be passed in a long-form DataFrame with variables specified by +passing strings to ``x``, ``y``, and other parameters. + +Parameters +---------- +{params.core.xy} +hue : vector or key in ``data`` + Grouping variable that will produce elements with different colors. + Can be either categorical or numeric, although color mapping will + behave differently in latter case. +size : vector or key in ``data`` + Grouping variable that will produce elements with different sizes. + Can be either categorical or numeric, although size mapping will + behave differently in latter case. +style : vector or key in ``data`` + Grouping variable that will produce elements with different styles. + Can have a numeric dtype but will always be treated as categorical. +{params.core.data} +{params.facets.rowcol} +{params.facets.col_wrap} +row_order, col_order : lists of strings + Order to organize the rows and/or columns of the grid in, otherwise the + orders are inferred from the data objects. +{params.core.palette} +{params.core.hue_order} +{params.core.hue_norm} +{params.rel.sizes} +{params.rel.size_order} +{params.rel.size_norm} +{params.rel.style_order} +{params.rel.dashes} +{params.rel.markers} +{params.rel.legend} +kind : string + Kind of plot to draw, corresponding to a seaborn relational plot. + Options are {{``scatter`` and ``line``}}. +{params.facets.height} +{params.facets.aspect} +facet_kws : dict + Dictionary of other keyword arguments to pass to :class:`FacetGrid`. +{params.rel.units} +kwargs : key, value pairings + Other keyword arguments are passed through to the underlying plotting + function. + +Returns +------- +{returns.facetgrid} + +Examples +-------- + +.. include:: ../docstrings/relplot.rst + +""".format( + narrative=_relational_narrative, + params=_param_docs, + returns=_core_docs["returns"], + seealso=_core_docs["seealso"], +) diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/timeseries.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/timeseries.py new file mode 100644 index 0000000000000000000000000000000000000000..a3b25bf457929357c97fc5af9b699978da9bf981 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/timeseries.py @@ -0,0 +1,454 @@ +"""Timeseries plotting functions.""" +from __future__ import division +import numpy as np +import pandas as pd +from scipy import stats, interpolate +import matplotlib as mpl +import matplotlib.pyplot as plt + +import warnings + +from .external.six import string_types + +from . import utils +from . import algorithms as algo +from .palettes import color_palette + + +__all__ = ["tsplot"] + + +def tsplot(data, time=None, unit=None, condition=None, value=None, + err_style="ci_band", ci=68, interpolate=True, color=None, + estimator=np.mean, n_boot=5000, err_palette=None, err_kws=None, + legend=True, ax=None, **kwargs): + """Plot one or more timeseries with flexible representation of uncertainty. + + This function is intended to be used with data where observations are + nested within sampling units that were measured at multiple timepoints. + + It can take data specified either as a long-form (tidy) DataFrame or as an + ndarray with dimensions (unit, time) The interpretation of some of the + other parameters changes depending on the type of object passed as data. + + Parameters + ---------- + data : DataFrame or ndarray + Data for the plot. Should either be a "long form" dataframe or an + array with dimensions (unit, time, condition). In both cases, the + condition field/dimension is optional. The type of this argument + determines the interpretation of the next few parameters. When + using a DataFrame, the index has to be sequential. + time : string or series-like + Either the name of the field corresponding to time in the data + DataFrame or x values for a plot when data is an array. If a Series, + the name will be used to label the x axis. + unit : string + Field in the data DataFrame identifying the sampling unit (e.g. + subject, neuron, etc.). The error representation will collapse over + units at each time/condition observation. This has no role when data + is an array. + value : string + Either the name of the field corresponding to the data values in + the data DataFrame (i.e. the y coordinate) or a string that forms + the y axis label when data is an array. + condition : string or Series-like + Either the name of the field identifying the condition an observation + falls under in the data DataFrame, or a sequence of names with a length + equal to the size of the third dimension of data. There will be a + separate trace plotted for each condition. If condition is a Series + with a name attribute, the name will form the title for the plot + legend (unless legend is set to False). + err_style : string or list of strings or None + Names of ways to plot uncertainty across units from set of + {ci_band, ci_bars, boot_traces, boot_kde, unit_traces, unit_points}. + Can use one or more than one method. + ci : float or list of floats in [0, 100] or "sd" or None + Confidence interval size(s). If a list, it will stack the error plots + for each confidence interval. If ``"sd"``, show standard deviation of + the observations instead of boostrapped confidence intervals. Only + relevant for error styles with "ci" in the name. + interpolate : boolean + Whether to do a linear interpolation between each timepoint when + plotting. The value of this parameter also determines the marker + used for the main plot traces, unless marker is specified as a keyword + argument. + color : seaborn palette or matplotlib color name or dictionary + Palette or color for the main plots and error representation (unless + plotting by unit, which can be separately controlled with err_palette). + If a dictionary, should map condition name to color spec. + estimator : callable + Function to determine central tendency and to pass to bootstrap + must take an ``axis`` argument. + n_boot : int + Number of bootstrap iterations. + err_palette : seaborn palette + Palette name or list of colors used when plotting data for each unit. + err_kws : dict, optional + Keyword argument dictionary passed through to matplotlib function + generating the error plot, + legend : bool, optional + If ``True`` and there is a ``condition`` variable, add a legend to + the plot. + ax : axis object, optional + Plot in given axis; if None creates a new figure + kwargs : + Other keyword arguments are passed to main plot() call + + Returns + ------- + ax : matplotlib axis + axis with plot data + + Examples + -------- + + Plot a trace with translucent confidence bands: + + .. plot:: + :context: close-figs + + >>> import numpy as np; np.random.seed(22) + >>> import seaborn as sns; sns.set(color_codes=True) + >>> x = np.linspace(0, 15, 31) + >>> data = np.sin(x) + np.random.rand(10, 31) + np.random.randn(10, 1) + >>> ax = sns.tsplot(data=data) + + Plot a long-form dataframe with several conditions: + + .. plot:: + :context: close-figs + + >>> gammas = sns.load_dataset("gammas") + >>> ax = sns.tsplot(time="timepoint", value="BOLD signal", + ... unit="subject", condition="ROI", + ... data=gammas) + + Use error bars at the positions of the observations: + + .. plot:: + :context: close-figs + + >>> ax = sns.tsplot(data=data, err_style="ci_bars", color="g") + + Don't interpolate between the observations: + + .. plot:: + :context: close-figs + + >>> import matplotlib.pyplot as plt + >>> ax = sns.tsplot(data=data, err_style="ci_bars", interpolate=False) + + Show multiple confidence bands: + + .. plot:: + :context: close-figs + + >>> ax = sns.tsplot(data=data, ci=[68, 95], color="m") + + Show the standard deviation of the observations: + + .. plot:: + :context: close-figs + + >>> ax = sns.tsplot(data=data, ci="sd") + + Use a different estimator: + + .. plot:: + :context: close-figs + + >>> ax = sns.tsplot(data=data, estimator=np.median) + + Show each bootstrap resample: + + .. plot:: + :context: close-figs + + >>> ax = sns.tsplot(data=data, err_style="boot_traces", n_boot=500) + + Show the trace from each sampling unit: + + + .. plot:: + :context: close-figs + + >>> ax = sns.tsplot(data=data, err_style="unit_traces") + + """ + msg = ( + "The `tsplot` function is deprecated and will be removed in a future " + "release. Please update your code to use the new `lineplot` function." + ) + warnings.warn(msg, UserWarning) + + # Sort out default values for the parameters + if ax is None: + ax = plt.gca() + + if err_kws is None: + err_kws = {} + + # Handle different types of input data + if isinstance(data, pd.DataFrame): + + xlabel = time + ylabel = value + + # Condition is optional + if condition is None: + condition = pd.Series(1, index=data.index) + legend = False + legend_name = None + n_cond = 1 + else: + legend = True and legend + legend_name = condition + n_cond = len(data[condition].unique()) + + else: + data = np.asarray(data) + + # Data can be a timecourse from a single unit or + # several observations in one condition + if data.ndim == 1: + data = data[np.newaxis, :, np.newaxis] + elif data.ndim == 2: + data = data[:, :, np.newaxis] + n_unit, n_time, n_cond = data.shape + + # Units are experimental observations. Maybe subjects, or neurons + if unit is None: + units = np.arange(n_unit) + unit = "unit" + units = np.repeat(units, n_time * n_cond) + ylabel = None + + # Time forms the xaxis of the plot + if time is None: + times = np.arange(n_time) + else: + times = np.asarray(time) + xlabel = None + if hasattr(time, "name"): + xlabel = time.name + time = "time" + times = np.tile(np.repeat(times, n_cond), n_unit) + + # Conditions split the timeseries plots + if condition is None: + conds = range(n_cond) + legend = False + if isinstance(color, dict): + err = "Must have condition names if using color dict." + raise ValueError(err) + else: + conds = np.asarray(condition) + legend = True and legend + if hasattr(condition, "name"): + legend_name = condition.name + else: + legend_name = None + condition = "cond" + conds = np.tile(conds, n_unit * n_time) + + # Value forms the y value in the plot + if value is None: + ylabel = None + else: + ylabel = value + value = "value" + + # Convert to long-form DataFrame + data = pd.DataFrame(dict(value=data.ravel(), + time=times, + unit=units, + cond=conds)) + + # Set up the err_style and ci arguments for the loop below + if isinstance(err_style, string_types): + err_style = [err_style] + elif err_style is None: + err_style = [] + if not hasattr(ci, "__iter__"): + ci = [ci] + + # Set up the color palette + if color is None: + current_palette = utils.get_color_cycle() + if len(current_palette) < n_cond: + colors = color_palette("husl", n_cond) + else: + colors = color_palette(n_colors=n_cond) + elif isinstance(color, dict): + colors = [color[c] for c in data[condition].unique()] + else: + try: + colors = color_palette(color, n_cond) + except ValueError: + color = mpl.colors.colorConverter.to_rgb(color) + colors = [color] * n_cond + + # Do a groupby with condition and plot each trace + c = None + for c, (cond, df_c) in enumerate(data.groupby(condition, sort=False)): + + df_c = df_c.pivot(unit, time, value) + x = df_c.columns.values.astype(np.float) + + # Bootstrap the data for confidence intervals + if "sd" in ci: + est = estimator(df_c.values, axis=0) + sd = np.std(df_c.values, axis=0) + cis = [(est - sd, est + sd)] + boot_data = df_c.values + else: + boot_data = algo.bootstrap(df_c.values, n_boot=n_boot, + axis=0, func=estimator) + cis = [utils.ci(boot_data, v, axis=0) for v in ci] + central_data = estimator(df_c.values, axis=0) + + # Get the color for this condition + color = colors[c] + + # Use subroutines to plot the uncertainty + for style in err_style: + + # Allow for null style (only plot central tendency) + if style is None: + continue + + # Grab the function from the global environment + try: + plot_func = globals()["_plot_%s" % style] + except KeyError: + raise ValueError("%s is not a valid err_style" % style) + + # Possibly set up to plot each observation in a different color + if err_palette is not None and "unit" in style: + orig_color = color + color = color_palette(err_palette, len(df_c.values)) + + # Pass all parameters to the error plotter as keyword args + plot_kwargs = dict(ax=ax, x=x, data=df_c.values, + boot_data=boot_data, + central_data=central_data, + color=color, err_kws=err_kws) + + # Plot the error representation, possibly for multiple cis + for ci_i in cis: + plot_kwargs["ci"] = ci_i + plot_func(**plot_kwargs) + + if err_palette is not None and "unit" in style: + color = orig_color + + # Plot the central trace + kwargs.setdefault("marker", "" if interpolate else "o") + ls = kwargs.pop("ls", "-" if interpolate else "") + kwargs.setdefault("linestyle", ls) + label = cond if legend else "_nolegend_" + ax.plot(x, central_data, color=color, label=label, **kwargs) + + if c is None: + raise RuntimeError("Invalid input data for tsplot.") + + # Pad the sides of the plot only when not interpolating + ax.set_xlim(x.min(), x.max()) + x_diff = x[1] - x[0] + if not interpolate: + ax.set_xlim(x.min() - x_diff, x.max() + x_diff) + + # Add the plot labels + if xlabel is not None: + ax.set_xlabel(xlabel) + if ylabel is not None: + ax.set_ylabel(ylabel) + if legend: + ax.legend(loc=0, title=legend_name) + + return ax + +# Subroutines for tsplot errorbar plotting +# ---------------------------------------- + + +def _plot_ci_band(ax, x, ci, color, err_kws, **kwargs): + """Plot translucent error bands around the central tendancy.""" + low, high = ci + if "alpha" not in err_kws: + err_kws["alpha"] = 0.2 + ax.fill_between(x, low, high, facecolor=color, **err_kws) + + +def _plot_ci_bars(ax, x, central_data, ci, color, err_kws, **kwargs): + """Plot error bars at each data point.""" + for x_i, y_i, (low, high) in zip(x, central_data, ci.T): + ax.plot([x_i, x_i], [low, high], color=color, + solid_capstyle="round", **err_kws) + + +def _plot_boot_traces(ax, x, boot_data, color, err_kws, **kwargs): + """Plot 250 traces from bootstrap.""" + err_kws.setdefault("alpha", 0.25) + err_kws.setdefault("linewidth", 0.25) + if "lw" in err_kws: + err_kws["linewidth"] = err_kws.pop("lw") + ax.plot(x, boot_data.T, color=color, label="_nolegend_", **err_kws) + + +def _plot_unit_traces(ax, x, data, ci, color, err_kws, **kwargs): + """Plot a trace for each observation in the original data.""" + if isinstance(color, list): + if "alpha" not in err_kws: + err_kws["alpha"] = .5 + for i, obs in enumerate(data): + ax.plot(x, obs, color=color[i], label="_nolegend_", **err_kws) + else: + if "alpha" not in err_kws: + err_kws["alpha"] = .2 + ax.plot(x, data.T, color=color, label="_nolegend_", **err_kws) + + +def _plot_unit_points(ax, x, data, color, err_kws, **kwargs): + """Plot each original data point discretely.""" + if isinstance(color, list): + for i, obs in enumerate(data): + ax.plot(x, obs, "o", color=color[i], alpha=0.8, markersize=4, + label="_nolegend_", **err_kws) + else: + ax.plot(x, data.T, "o", color=color, alpha=0.5, markersize=4, + label="_nolegend_", **err_kws) + + +def _plot_boot_kde(ax, x, boot_data, color, **kwargs): + """Plot the kernal density estimate of the bootstrap distribution.""" + kwargs.pop("data") + _ts_kde(ax, x, boot_data, color, **kwargs) + + +def _plot_unit_kde(ax, x, data, color, **kwargs): + """Plot the kernal density estimate over the sample.""" + _ts_kde(ax, x, data, color, **kwargs) + + +def _ts_kde(ax, x, data, color, **kwargs): + """Upsample over time and plot a KDE of the bootstrap distribution.""" + kde_data = [] + y_min, y_max = data.min(), data.max() + y_vals = np.linspace(y_min, y_max, 100) + upsampler = interpolate.interp1d(x, data) + data_upsample = upsampler(np.linspace(x.min(), x.max(), 100)) + for pt_data in data_upsample.T: + pt_kde = stats.kde.gaussian_kde(pt_data) + kde_data.append(pt_kde(y_vals)) + kde_data = np.transpose(kde_data) + rgb = mpl.colors.ColorConverter().to_rgb(color) + img = np.zeros((kde_data.shape[0], kde_data.shape[1], 4)) + img[:, :, :3] = rgb + kde_data /= kde_data.max(axis=0) + kde_data[kde_data > 1] = 1 + img[:, :, 3] = kde_data + ax.imshow(img, interpolation="spline16", zorder=2, + extent=(x.min(), x.max(), y_min, y_max), + aspect="auto", origin="lower") diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/widgets.py b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/widgets.py new file mode 100644 index 0000000000000000000000000000000000000000..c75cc66c4829a6381c3808cfef08e535135c9b10 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/seaborn/widgets.py @@ -0,0 +1,440 @@ +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.colors import LinearSegmentedColormap + +# Lots of different places that widgets could come from... +try: + from ipywidgets import interact, FloatSlider, IntSlider +except ImportError: + import warnings + # ignore ShimWarning raised by IPython, see GH #892 + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + try: + from IPython.html.widgets import interact, FloatSlider, IntSlider + except ImportError: + try: + from IPython.html.widgets import (interact, + FloatSliderWidget, + IntSliderWidget) + FloatSlider = FloatSliderWidget + IntSlider = IntSliderWidget + except ImportError: + pass + + +from .miscplot import palplot +from .palettes import (color_palette, dark_palette, light_palette, + diverging_palette, cubehelix_palette) + + +__all__ = ["choose_colorbrewer_palette", "choose_cubehelix_palette", + "choose_dark_palette", "choose_light_palette", + "choose_diverging_palette"] + + +def _init_mutable_colormap(): + """Create a matplotlib colormap that will be updated by the widgets.""" + greys = color_palette("Greys", 256) + cmap = LinearSegmentedColormap.from_list("interactive", greys) + cmap._init() + cmap._set_extremes() + return cmap + + +def _update_lut(cmap, colors): + """Change the LUT values in a matplotlib colormap in-place.""" + cmap._lut[:256] = colors + cmap._set_extremes() + + +def _show_cmap(cmap): + """Show a continuous matplotlib colormap.""" + from .rcmod import axes_style # Avoid circular import + with axes_style("white"): + f, ax = plt.subplots(figsize=(8.25, .75)) + ax.set(xticks=[], yticks=[]) + x = np.linspace(0, 1, 256)[np.newaxis, :] + ax.pcolormesh(x, cmap=cmap) + + +def choose_colorbrewer_palette(data_type, as_cmap=False): + """Select a palette from the ColorBrewer set. + + These palettes are built into matplotlib and can be used by name in + many seaborn functions, or by passing the object returned by this function. + + Parameters + ---------- + data_type : {'sequential', 'diverging', 'qualitative'} + This describes the kind of data you want to visualize. See the seaborn + color palette docs for more information about how to choose this value. + Note that you can pass substrings (e.g. 'q' for 'qualitative. + + as_cmap : bool + If True, the return value is a matplotlib colormap rather than a + list of discrete colors. + + Returns + ------- + pal or cmap : list of colors or matplotlib colormap + Object that can be passed to plotting functions. + + See Also + -------- + dark_palette : Create a sequential palette with dark low values. + light_palette : Create a sequential palette with bright low values. + diverging_palette : Create a diverging palette from selected colors. + cubehelix_palette : Create a sequential palette or colormap using the + cubehelix system. + + + """ + if data_type.startswith("q") and as_cmap: + raise ValueError("Qualitative palettes cannot be colormaps.") + + pal = [] + if as_cmap: + cmap = _init_mutable_colormap() + + if data_type.startswith("s"): + opts = ["Greys", "Reds", "Greens", "Blues", "Oranges", "Purples", + "BuGn", "BuPu", "GnBu", "OrRd", "PuBu", "PuRd", "RdPu", "YlGn", + "PuBuGn", "YlGnBu", "YlOrBr", "YlOrRd"] + variants = ["regular", "reverse", "dark"] + + @interact + def choose_sequential(name=opts, n=(2, 18), + desat=FloatSlider(min=0, max=1, value=1), + variant=variants): + if variant == "reverse": + name += "_r" + elif variant == "dark": + name += "_d" + + if as_cmap: + colors = color_palette(name, 256, desat) + _update_lut(cmap, np.c_[colors, np.ones(256)]) + _show_cmap(cmap) + else: + pal[:] = color_palette(name, n, desat) + palplot(pal) + + elif data_type.startswith("d"): + opts = ["RdBu", "RdGy", "PRGn", "PiYG", "BrBG", + "RdYlBu", "RdYlGn", "Spectral"] + variants = ["regular", "reverse"] + + @interact + def choose_diverging(name=opts, n=(2, 16), + desat=FloatSlider(min=0, max=1, value=1), + variant=variants): + if variant == "reverse": + name += "_r" + if as_cmap: + colors = color_palette(name, 256, desat) + _update_lut(cmap, np.c_[colors, np.ones(256)]) + _show_cmap(cmap) + else: + pal[:] = color_palette(name, n, desat) + palplot(pal) + + elif data_type.startswith("q"): + opts = ["Set1", "Set2", "Set3", "Paired", "Accent", + "Pastel1", "Pastel2", "Dark2"] + + @interact + def choose_qualitative(name=opts, n=(2, 16), + desat=FloatSlider(min=0, max=1, value=1)): + pal[:] = color_palette(name, n, desat) + palplot(pal) + + if as_cmap: + return cmap + return pal + + +def choose_dark_palette(input="husl", as_cmap=False): + """Launch an interactive widget to create a dark sequential palette. + + This corresponds with the :func:`dark_palette` function. This kind + of palette is good for data that range between relatively uninteresting + low values and interesting high values. + + Requires IPython 2+ and must be used in the notebook. + + Parameters + ---------- + input : {'husl', 'hls', 'rgb'} + Color space for defining the seed value. Note that the default is + different than the default input for :func:`dark_palette`. + as_cmap : bool + If True, the return value is a matplotlib colormap rather than a + list of discrete colors. + + Returns + ------- + pal or cmap : list of colors or matplotlib colormap + Object that can be passed to plotting functions. + + See Also + -------- + dark_palette : Create a sequential palette with dark low values. + light_palette : Create a sequential palette with bright low values. + cubehelix_palette : Create a sequential palette or colormap using the + cubehelix system. + + """ + pal = [] + if as_cmap: + cmap = _init_mutable_colormap() + + if input == "rgb": + @interact + def choose_dark_palette_rgb(r=(0., 1.), + g=(0., 1.), + b=(0., 1.), + n=(3, 17)): + color = r, g, b + if as_cmap: + colors = dark_palette(color, 256, input="rgb") + _update_lut(cmap, colors) + _show_cmap(cmap) + else: + pal[:] = dark_palette(color, n, input="rgb") + palplot(pal) + + elif input == "hls": + @interact + def choose_dark_palette_hls(h=(0., 1.), + l=(0., 1.), # noqa: E741 + s=(0., 1.), + n=(3, 17)): + color = h, l, s + if as_cmap: + colors = dark_palette(color, 256, input="hls") + _update_lut(cmap, colors) + _show_cmap(cmap) + else: + pal[:] = dark_palette(color, n, input="hls") + palplot(pal) + + elif input == "husl": + @interact + def choose_dark_palette_husl(h=(0, 359), + s=(0, 99), + l=(0, 99), # noqa: E741 + n=(3, 17)): + color = h, s, l + if as_cmap: + colors = dark_palette(color, 256, input="husl") + _update_lut(cmap, colors) + _show_cmap(cmap) + else: + pal[:] = dark_palette(color, n, input="husl") + palplot(pal) + + if as_cmap: + return cmap + return pal + + +def choose_light_palette(input="husl", as_cmap=False): + """Launch an interactive widget to create a light sequential palette. + + This corresponds with the :func:`light_palette` function. This kind + of palette is good for data that range between relatively uninteresting + low values and interesting high values. + + Requires IPython 2+ and must be used in the notebook. + + Parameters + ---------- + input : {'husl', 'hls', 'rgb'} + Color space for defining the seed value. Note that the default is + different than the default input for :func:`light_palette`. + as_cmap : bool + If True, the return value is a matplotlib colormap rather than a + list of discrete colors. + + Returns + ------- + pal or cmap : list of colors or matplotlib colormap + Object that can be passed to plotting functions. + + See Also + -------- + light_palette : Create a sequential palette with bright low values. + dark_palette : Create a sequential palette with dark low values. + cubehelix_palette : Create a sequential palette or colormap using the + cubehelix system. + + """ + pal = [] + if as_cmap: + cmap = _init_mutable_colormap() + + if input == "rgb": + @interact + def choose_light_palette_rgb(r=(0., 1.), + g=(0., 1.), + b=(0., 1.), + n=(3, 17)): + color = r, g, b + if as_cmap: + colors = light_palette(color, 256, input="rgb") + _update_lut(cmap, colors) + _show_cmap(cmap) + else: + pal[:] = light_palette(color, n, input="rgb") + palplot(pal) + + elif input == "hls": + @interact + def choose_light_palette_hls(h=(0., 1.), + l=(0., 1.), # noqa: E741 + s=(0., 1.), + n=(3, 17)): + color = h, l, s + if as_cmap: + colors = light_palette(color, 256, input="hls") + _update_lut(cmap, colors) + _show_cmap(cmap) + else: + pal[:] = light_palette(color, n, input="hls") + palplot(pal) + + elif input == "husl": + @interact + def choose_light_palette_husl(h=(0, 359), + s=(0, 99), + l=(0, 99), # noqa: E741 + n=(3, 17)): + color = h, s, l + if as_cmap: + colors = light_palette(color, 256, input="husl") + _update_lut(cmap, colors) + _show_cmap(cmap) + else: + pal[:] = light_palette(color, n, input="husl") + palplot(pal) + + if as_cmap: + return cmap + return pal + + +def choose_diverging_palette(as_cmap=False): + """Launch an interactive widget to choose a diverging color palette. + + This corresponds with the :func:`diverging_palette` function. This kind + of palette is good for data that range between interesting low values + and interesting high values with a meaningful midpoint. (For example, + change scores relative to some baseline value). + + Requires IPython 2+ and must be used in the notebook. + + Parameters + ---------- + as_cmap : bool + If True, the return value is a matplotlib colormap rather than a + list of discrete colors. + + Returns + ------- + pal or cmap : list of colors or matplotlib colormap + Object that can be passed to plotting functions. + + See Also + -------- + diverging_palette : Create a diverging color palette or colormap. + choose_colorbrewer_palette : Interactively choose palettes from the + colorbrewer set, including diverging palettes. + + """ + pal = [] + if as_cmap: + cmap = _init_mutable_colormap() + + @interact + def choose_diverging_palette( + h_neg=IntSlider(min=0, + max=359, + value=220), + h_pos=IntSlider(min=0, + max=359, + value=10), + s=IntSlider(min=0, max=99, value=74), + l=IntSlider(min=0, max=99, value=50), # noqa: E741 + sep=IntSlider(min=1, max=50, value=10), + n=(2, 16), + center=["light", "dark"] + ): + if as_cmap: + colors = diverging_palette(h_neg, h_pos, s, l, sep, 256, center) + _update_lut(cmap, colors) + _show_cmap(cmap) + else: + pal[:] = diverging_palette(h_neg, h_pos, s, l, sep, n, center) + palplot(pal) + + if as_cmap: + return cmap + return pal + + +def choose_cubehelix_palette(as_cmap=False): + """Launch an interactive widget to create a sequential cubehelix palette. + + This corresponds with the :func:`cubehelix_palette` function. This kind + of palette is good for data that range between relatively uninteresting + low values and interesting high values. The cubehelix system allows the + palette to have more hue variance across the range, which can be helpful + for distinguishing a wider range of values. + + Requires IPython 2+ and must be used in the notebook. + + Parameters + ---------- + as_cmap : bool + If True, the return value is a matplotlib colormap rather than a + list of discrete colors. + + Returns + ------- + pal or cmap : list of colors or matplotlib colormap + Object that can be passed to plotting functions. + + See Also + -------- + cubehelix_palette : Create a sequential palette or colormap using the + cubehelix system. + + """ + pal = [] + if as_cmap: + cmap = _init_mutable_colormap() + + @interact + def choose_cubehelix(n_colors=IntSlider(min=2, max=16, value=9), + start=FloatSlider(min=0, max=3, value=0), + rot=FloatSlider(min=-1, max=1, value=.4), + gamma=FloatSlider(min=0, max=5, value=1), + hue=FloatSlider(min=0, max=1, value=.8), + light=FloatSlider(min=0, max=1, value=.85), + dark=FloatSlider(min=0, max=1, value=.15), + reverse=False): + + if as_cmap: + colors = cubehelix_palette(256, start, rot, gamma, + hue, light, dark, reverse) + _update_lut(cmap, np.c_[colors, np.ones(256)]) + _show_cmap(cmap) + else: + pal[:] = cubehelix_palette(n_colors, start, rot, gamma, + hue, light, dark, reverse) + palplot(pal) + + if as_cmap: + return cmap + return pal diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/torchaudio/_torchaudio.so b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/torchaudio/_torchaudio.so new file mode 100644 index 0000000000000000000000000000000000000000..f7f6e377a03015c65c05763e41bf8470616a43ec --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/torchaudio/_torchaudio.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690e44b7f2bf3a39d88316d1ea3389ca4cf523fd234b86fe8a9a4f167bc73b6d +size 2520912 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/INSTALLER b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/RECORD b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..38532531a1bb8586d2d1d87f2c0963b22eeca0ca --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/RECORD @@ -0,0 +1,83 @@ +urllib3-1.26.9.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +urllib3-1.26.9.dist-info/LICENSE.txt,sha256=w3vxhuJ8-dvpYZ5V7f486nswCRzrPaY8fay-Dm13kHs,1115 +urllib3-1.26.9.dist-info/METADATA,sha256=UicmDwTLIrYL2O4fOBvq8wxMMAwM2L8JYP75jEXt8DQ,46325 +urllib3-1.26.9.dist-info/RECORD,, +urllib3-1.26.9.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +urllib3-1.26.9.dist-info/WHEEL,sha256=z9j0xAa_JmUKMpmz72K0ZGALSM_n-wQVmGbleXx2VHg,110 +urllib3-1.26.9.dist-info/top_level.txt,sha256=EMiXL2sKrTcmrMxIHTqdc3ET54pQI2Y072LexFEemvo,8 +urllib3/__init__.py,sha256=j3yzHIbmW7CS-IKQJ9-PPQf_YKO8EOAey_rMW0UR7us,2763 +urllib3/__pycache__/__init__.cpython-38.pyc,, +urllib3/__pycache__/_collections.cpython-38.pyc,, +urllib3/__pycache__/_version.cpython-38.pyc,, +urllib3/__pycache__/connection.cpython-38.pyc,, +urllib3/__pycache__/connectionpool.cpython-38.pyc,, +urllib3/__pycache__/exceptions.cpython-38.pyc,, +urllib3/__pycache__/fields.cpython-38.pyc,, +urllib3/__pycache__/filepost.cpython-38.pyc,, +urllib3/__pycache__/poolmanager.cpython-38.pyc,, +urllib3/__pycache__/request.cpython-38.pyc,, +urllib3/__pycache__/response.cpython-38.pyc,, +urllib3/_collections.py,sha256=Rp1mVyBgc_UlAcp6M3at1skJBXR5J43NawRTvW2g_XY,10811 +urllib3/_version.py,sha256=WE7GLYd0IVwgk-1gQZ-7jw00bCUYjYTIlcWIk7NOhEM,63 +urllib3/connection.py,sha256=mMuCIjdG01kRpFUENwJRoDKmYer7CZO56pfTbBCS7cw,20070 +urllib3/connectionpool.py,sha256=qz-ICrW6g4TZVCbDQ8fRe68BMpXkskkR9vAVY9zUWtA,39013 +urllib3/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +urllib3/contrib/__pycache__/__init__.cpython-38.pyc,, +urllib3/contrib/__pycache__/_appengine_environ.cpython-38.pyc,, +urllib3/contrib/__pycache__/appengine.cpython-38.pyc,, +urllib3/contrib/__pycache__/ntlmpool.cpython-38.pyc,, +urllib3/contrib/__pycache__/pyopenssl.cpython-38.pyc,, +urllib3/contrib/__pycache__/securetransport.cpython-38.pyc,, +urllib3/contrib/__pycache__/socks.cpython-38.pyc,, +urllib3/contrib/_appengine_environ.py,sha256=bDbyOEhW2CKLJcQqAKAyrEHN-aklsyHFKq6vF8ZFsmk,957 +urllib3/contrib/_securetransport/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +urllib3/contrib/_securetransport/__pycache__/__init__.cpython-38.pyc,, +urllib3/contrib/_securetransport/__pycache__/bindings.cpython-38.pyc,, +urllib3/contrib/_securetransport/__pycache__/low_level.cpython-38.pyc,, +urllib3/contrib/_securetransport/bindings.py,sha256=4Xk64qIkPBt09A5q-RIFUuDhNc9mXilVapm7WnYnzRw,17632 +urllib3/contrib/_securetransport/low_level.py,sha256=B2JBB2_NRP02xK6DCa1Pa9IuxrPwxzDzZbixQkb7U9M,13922 +urllib3/contrib/appengine.py,sha256=jz515jZYBDFTnhR4zqfeaCo6JdDgAQqYbqzHK9sDkfw,11010 +urllib3/contrib/ntlmpool.py,sha256=ej9gGvfAb2Gt00lafFp45SIoRz-QwrQ4WChm6gQmAlM,4538 +urllib3/contrib/pyopenssl.py,sha256=YIMyTiXiLPV_QfFw3PjZ31mGqJmM5EzxIjhSLxZ7VUM,16874 +urllib3/contrib/securetransport.py,sha256=izdx43gFoUGFSgxasZlOCL42FaM4vSsAVTmhO0EH1vM,34417 +urllib3/contrib/socks.py,sha256=aRi9eWXo9ZEb95XUxef4Z21CFlnnjbEiAo9HOseoMt4,7097 +urllib3/exceptions.py,sha256=0Mnno3KHTNfXRfY7638NufOPkUb6mXOm-Lqj-4x2w8A,8217 +urllib3/fields.py,sha256=kvLDCg_JmH1lLjUUEY_FLS8UhY7hBvDPuVETbY8mdrM,8579 +urllib3/filepost.py,sha256=5b_qqgRHVlL7uLtdAYBzBh-GHmU5AfJVt_2N0XS3PeY,2440 +urllib3/packages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +urllib3/packages/__pycache__/__init__.cpython-38.pyc,, +urllib3/packages/__pycache__/six.cpython-38.pyc,, +urllib3/packages/backports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +urllib3/packages/backports/__pycache__/__init__.cpython-38.pyc,, +urllib3/packages/backports/__pycache__/makefile.cpython-38.pyc,, +urllib3/packages/backports/makefile.py,sha256=nbzt3i0agPVP07jqqgjhaYjMmuAi_W5E0EywZivVO8E,1417 +urllib3/packages/six.py,sha256=1LVW7ljqRirFlfExjwl-v1B7vSAUNTmzGMs-qays2zg,34666 +urllib3/poolmanager.py,sha256=0KOOJECoeLYVjUHvv-0h4Oq3FFQQ2yb-Fnjkbj8gJO0,19786 +urllib3/request.py,sha256=ZFSIqX0C6WizixecChZ3_okyu7BEv0lZu1VT0s6h4SM,5985 +urllib3/response.py,sha256=9b5NrbzHDnC2l_QC9uuNQPv75is1qgLa7M3Ax4Zr9z8,28276 +urllib3/util/__init__.py,sha256=JEmSmmqqLyaw8P51gUImZh8Gwg9i1zSe-DoqAitn2nc,1155 +urllib3/util/__pycache__/__init__.cpython-38.pyc,, +urllib3/util/__pycache__/connection.cpython-38.pyc,, +urllib3/util/__pycache__/proxy.cpython-38.pyc,, +urllib3/util/__pycache__/queue.cpython-38.pyc,, +urllib3/util/__pycache__/request.cpython-38.pyc,, +urllib3/util/__pycache__/response.cpython-38.pyc,, +urllib3/util/__pycache__/retry.cpython-38.pyc,, +urllib3/util/__pycache__/ssl_.cpython-38.pyc,, +urllib3/util/__pycache__/ssl_match_hostname.cpython-38.pyc,, +urllib3/util/__pycache__/ssltransport.cpython-38.pyc,, +urllib3/util/__pycache__/timeout.cpython-38.pyc,, +urllib3/util/__pycache__/url.cpython-38.pyc,, +urllib3/util/__pycache__/wait.cpython-38.pyc,, +urllib3/util/connection.py,sha256=5Lx2B1PW29KxBn2T0xkN1CBgRBa3gGVJBKoQoRogEVk,4901 +urllib3/util/proxy.py,sha256=zUvPPCJrp6dOF0N4GAVbOcl6o-4uXKSrGiTkkr5vUS4,1605 +urllib3/util/queue.py,sha256=nRgX8_eX-_VkvxoX096QWoz8Ps0QHUAExILCY_7PncM,498 +urllib3/util/request.py,sha256=fWiAaa8pwdLLIqoTLBxCC2e4ed80muzKU3e3HWWTzFQ,4225 +urllib3/util/response.py,sha256=GJpg3Egi9qaJXRwBh5wv-MNuRWan5BIu40oReoxWP28,3510 +urllib3/util/retry.py,sha256=iESg2PvViNdXBRY4MpL4h0kqwOOkHkxmLn1kkhFHPU8,22001 +urllib3/util/ssl_.py,sha256=c0sYiSC6272r6uPkxQpo5rYPP9QC1eR6oI7004gYqZo,17165 +urllib3/util/ssl_match_hostname.py,sha256=Ir4cZVEjmAk8gUAIHWSi7wtOO83UCYABY2xFD1Ql_WA,5758 +urllib3/util/ssltransport.py,sha256=NA-u5rMTrDFDFC8QzRKUEKMG0561hOD4qBTr3Z4pv6E,6895 +urllib3/util/timeout.py,sha256=QSbBUNOB9yh6AnDn61SrLQ0hg5oz0I9-uXEG91AJuIg,10003 +urllib3/util/url.py,sha256=au9jkUMnVr9Qp_9kg4HfZx9q9ur6yXQ4u5M17In-UKY,14030 +urllib3/util/wait.py,sha256=3MUKRSAUJDB2tgco7qRUskW0zXGAWYvRRE4Q1_6xlLs,5404 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/REQUESTED b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/WHEEL b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..0b18a281107a0448a9980396d9d324ea2aa7a7f8 --- /dev/null +++ b/my_container_sandbox/workspace/anaconda3/lib/python3.8/site-packages/urllib3-1.26.9.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.37.1) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/tmp_inputs/case00003.nii.gz b/tmp_inputs/case00003.nii.gz new file mode 100644 index 0000000000000000000000000000000000000000..9975ea27453594e44d54736b7bcc4118fae7bbbd --- /dev/null +++ b/tmp_inputs/case00003.nii.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e855b666b3327eaf0123ede8aae9f40c25f4f567e42f629096b5f914c95a12 +size 38949190 diff --git a/tmp_inputs_32_28/case00003.nii.gz b/tmp_inputs_32_28/case00003.nii.gz new file mode 100644 index 0000000000000000000000000000000000000000..2602538d8cb3d1543aa694add6836947600efbb3 --- /dev/null +++ b/tmp_inputs_32_28/case00003.nii.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5959b3caaa3af4e190a015342f6ca673132ddf0424b1dc8208a66c674b8eb672 +size 38929166 diff --git a/tmp_inputs_32_31/case00007.nii.gz b/tmp_inputs_32_31/case00007.nii.gz new file mode 100644 index 0000000000000000000000000000000000000000..167cfd313c6c0ca3dbf562caa0759595bb50843e --- /dev/null +++ b/tmp_inputs_32_31/case00007.nii.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a7e2a26e4380f226b6322de656b987c8360dd71ded8a3e123b6c74c80b1f7e0 +size 37525324 diff --git a/tmp_inputs_32_5/case00001.nii.gz b/tmp_inputs_32_5/case00001.nii.gz new file mode 100644 index 0000000000000000000000000000000000000000..82ebb7fa0142ecd64f75249db79c91e5f68a1150 --- /dev/null +++ b/tmp_inputs_32_5/case00001.nii.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:736504da8f2ad06160335b27cce279aece181b2de831736390aa56259f3546bd +size 41096835