Add files using upload-large-folder tool
Browse files- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/INSTALLER +1 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/LICENSE.txt +28 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/METADATA +74 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/RECORD +38 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/WHEEL +4 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/__init__.py +69 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/_version.py +16 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/archive.py +73 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/asyn.py +1096 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/callbacks.py +324 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/compression.py +175 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/config.py +131 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/conftest.py +55 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/core.py +738 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/exceptions.py +18 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/fuse.py +324 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/generic.py +408 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/mapping.py +251 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/parquet.py +541 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/registry.py +305 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/transaction.py +90 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/utils.py +740 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/INSTALLER +1 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/METADATA +347 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/RECORD +336 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/REQUESTED +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/WHEEL +5 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/entry_points.txt +7 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/top_level.txt +1 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/debug.pxi +36 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/docloader.pxi +178 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/dtd.pxi +479 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/iterparse.pxi +438 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/lxml.etree_api.h +204 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/nsclasses.pxi +281 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/objectify.pyx +2149 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/parser.pxi +2071 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/parsertarget.pxi +180 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi +565 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi +875 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/usedoctest.py +13 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xmlid.pxi +179 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xmlschema.pxi +215 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xslt.pxi +957 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/INSTALLER +1 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/LICENSE +18 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/METADATA +49 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/RECORD +8 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/WHEEL +6 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/top_level.txt +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/LICENSE.txt
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Copyright 2014 Pallets
|
| 2 |
+
|
| 3 |
+
Redistribution and use in source and binary forms, with or without
|
| 4 |
+
modification, are permitted provided that the following conditions are
|
| 5 |
+
met:
|
| 6 |
+
|
| 7 |
+
1. Redistributions of source code must retain the above copyright
|
| 8 |
+
notice, this list of conditions and the following disclaimer.
|
| 9 |
+
|
| 10 |
+
2. Redistributions in binary form must reproduce the above copyright
|
| 11 |
+
notice, this list of conditions and the following disclaimer in the
|
| 12 |
+
documentation and/or other materials provided with the distribution.
|
| 13 |
+
|
| 14 |
+
3. Neither the name of the copyright holder nor the names of its
|
| 15 |
+
contributors may be used to endorse or promote products derived from
|
| 16 |
+
this software without specific prior written permission.
|
| 17 |
+
|
| 18 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 19 |
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 20 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
| 21 |
+
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 22 |
+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| 23 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
| 24 |
+
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
| 25 |
+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
| 26 |
+
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
| 27 |
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
| 28 |
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/METADATA
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.3
|
| 2 |
+
Name: click
|
| 3 |
+
Version: 8.1.8
|
| 4 |
+
Summary: Composable command line interface toolkit
|
| 5 |
+
Maintainer-email: Pallets <contact@palletsprojects.com>
|
| 6 |
+
Requires-Python: >=3.7
|
| 7 |
+
Description-Content-Type: text/markdown
|
| 8 |
+
Classifier: Development Status :: 5 - Production/Stable
|
| 9 |
+
Classifier: Intended Audience :: Developers
|
| 10 |
+
Classifier: License :: OSI Approved :: BSD License
|
| 11 |
+
Classifier: Operating System :: OS Independent
|
| 12 |
+
Classifier: Programming Language :: Python
|
| 13 |
+
Classifier: Typing :: Typed
|
| 14 |
+
Requires-Dist: colorama; platform_system == 'Windows'
|
| 15 |
+
Requires-Dist: importlib-metadata; python_version < '3.8'
|
| 16 |
+
Project-URL: Changes, https://click.palletsprojects.com/changes/
|
| 17 |
+
Project-URL: Chat, https://discord.gg/pallets
|
| 18 |
+
Project-URL: Documentation, https://click.palletsprojects.com/
|
| 19 |
+
Project-URL: Donate, https://palletsprojects.com/donate
|
| 20 |
+
Project-URL: Source, https://github.com/pallets/click/
|
| 21 |
+
|
| 22 |
+
# $ click_
|
| 23 |
+
|
| 24 |
+
Click is a Python package for creating beautiful command line interfaces
|
| 25 |
+
in a composable way with as little code as necessary. It's the "Command
|
| 26 |
+
Line Interface Creation Kit". It's highly configurable but comes with
|
| 27 |
+
sensible defaults out of the box.
|
| 28 |
+
|
| 29 |
+
It aims to make the process of writing command line tools quick and fun
|
| 30 |
+
while also preventing any frustration caused by the inability to
|
| 31 |
+
implement an intended CLI API.
|
| 32 |
+
|
| 33 |
+
Click in three points:
|
| 34 |
+
|
| 35 |
+
- Arbitrary nesting of commands
|
| 36 |
+
- Automatic help page generation
|
| 37 |
+
- Supports lazy loading of subcommands at runtime
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
## A Simple Example
|
| 41 |
+
|
| 42 |
+
```python
|
| 43 |
+
import click
|
| 44 |
+
|
| 45 |
+
@click.command()
|
| 46 |
+
@click.option("--count", default=1, help="Number of greetings.")
|
| 47 |
+
@click.option("--name", prompt="Your name", help="The person to greet.")
|
| 48 |
+
def hello(count, name):
|
| 49 |
+
"""Simple program that greets NAME for a total of COUNT times."""
|
| 50 |
+
for _ in range(count):
|
| 51 |
+
click.echo(f"Hello, {name}!")
|
| 52 |
+
|
| 53 |
+
if __name__ == '__main__':
|
| 54 |
+
hello()
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
```
|
| 58 |
+
$ python hello.py --count=3
|
| 59 |
+
Your name: Click
|
| 60 |
+
Hello, Click!
|
| 61 |
+
Hello, Click!
|
| 62 |
+
Hello, Click!
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
## Donate
|
| 67 |
+
|
| 68 |
+
The Pallets organization develops and supports Click and other popular
|
| 69 |
+
packages. In order to grow the community of contributors and users, and
|
| 70 |
+
allow the maintainers to devote more time to the projects, [please
|
| 71 |
+
donate today][].
|
| 72 |
+
|
| 73 |
+
[please donate today]: https://palletsprojects.com/donate
|
| 74 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/RECORD
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
click-8.1.8.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
click-8.1.8.dist-info/LICENSE.txt,sha256=morRBqOU6FO_4h9C9OctWSgZoigF2ZG18ydQKSkrZY0,1475
|
| 3 |
+
click-8.1.8.dist-info/METADATA,sha256=WJtQ6uGS2ybLfvUE4vC0XIhIBr4yFGwjrMBR2fiCQ-Q,2263
|
| 4 |
+
click-8.1.8.dist-info/RECORD,,
|
| 5 |
+
click-8.1.8.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
|
| 6 |
+
click/__init__.py,sha256=j1DJeCbga4ribkv5uyvIAzI0oFN13fW9mevDKShFelo,3188
|
| 7 |
+
click/__pycache__/__init__.cpython-312.pyc,,
|
| 8 |
+
click/__pycache__/_compat.cpython-312.pyc,,
|
| 9 |
+
click/__pycache__/_termui_impl.cpython-312.pyc,,
|
| 10 |
+
click/__pycache__/_textwrap.cpython-312.pyc,,
|
| 11 |
+
click/__pycache__/_winconsole.cpython-312.pyc,,
|
| 12 |
+
click/__pycache__/core.cpython-312.pyc,,
|
| 13 |
+
click/__pycache__/decorators.cpython-312.pyc,,
|
| 14 |
+
click/__pycache__/exceptions.cpython-312.pyc,,
|
| 15 |
+
click/__pycache__/formatting.cpython-312.pyc,,
|
| 16 |
+
click/__pycache__/globals.cpython-312.pyc,,
|
| 17 |
+
click/__pycache__/parser.cpython-312.pyc,,
|
| 18 |
+
click/__pycache__/shell_completion.cpython-312.pyc,,
|
| 19 |
+
click/__pycache__/termui.cpython-312.pyc,,
|
| 20 |
+
click/__pycache__/testing.cpython-312.pyc,,
|
| 21 |
+
click/__pycache__/types.cpython-312.pyc,,
|
| 22 |
+
click/__pycache__/utils.cpython-312.pyc,,
|
| 23 |
+
click/_compat.py,sha256=IGKh_J5QdfKELitnRfTGHneejWxoCw_NX9tfMbdcg3w,18730
|
| 24 |
+
click/_termui_impl.py,sha256=a5z7I9gOFeMmu7Gb6_RPyQ8GPuVP1EeblixcWSPSQPk,24783
|
| 25 |
+
click/_textwrap.py,sha256=10fQ64OcBUMuK7mFvh8363_uoOxPlRItZBmKzRJDgoY,1353
|
| 26 |
+
click/_winconsole.py,sha256=5ju3jQkcZD0W27WEMGqmEP4y_crUVzPCqsX_FYb7BO0,7860
|
| 27 |
+
click/core.py,sha256=Q1nEVdctZwvIPOlt4vfHko0TYnHCeE40UEEul8Wpyvs,114748
|
| 28 |
+
click/decorators.py,sha256=7t6F-QWowtLh6F_6l-4YV4Y4yNTcqFQEu9i37zIz68s,18925
|
| 29 |
+
click/exceptions.py,sha256=V7zDT6emqJ8iNl0kF1P5kpFmLMWQ1T1L7aNNKM4YR0w,9600
|
| 30 |
+
click/formatting.py,sha256=Frf0-5W33-loyY_i9qrwXR8-STnW3m5gvyxLVUdyxyk,9706
|
| 31 |
+
click/globals.py,sha256=cuJ6Bbo073lgEEmhjr394PeM-QFmXM-Ci-wmfsd7H5g,1954
|
| 32 |
+
click/parser.py,sha256=h4sndcpF5OHrZQN8vD8IWb5OByvW7ABbhRToxovrqS8,19067
|
| 33 |
+
click/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 34 |
+
click/shell_completion.py,sha256=TR0dXEGcvWb9Eo3aaQEXGhnvNS3FF4H4QcuLnvAvYo4,18636
|
| 35 |
+
click/termui.py,sha256=dLxiS70UOvIYBda_nEEZaPAFOVDVmRs1sEPMuLDowQo,28310
|
| 36 |
+
click/testing.py,sha256=3RA8anCf7TZ8-5RAF5it2Te-aWXBAL5VLasQnMiC2ZQ,16282
|
| 37 |
+
click/types.py,sha256=BD5Qqq4h-8kawBmOIzJlmq4xzThAf4wCvaOLZSBDNx0,36422
|
| 38 |
+
click/utils.py,sha256=ce-IrO9ilII76LGkU354pOdHbepM8UftfNH7SfMU_28,20330
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: flit 3.10.1
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/__init__.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from importlib.metadata import entry_points
|
| 2 |
+
|
| 3 |
+
from . import caching
|
| 4 |
+
from ._version import __version__ # noqa: F401
|
| 5 |
+
from .callbacks import Callback
|
| 6 |
+
from .compression import available_compressions
|
| 7 |
+
from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
|
| 8 |
+
from .exceptions import FSTimeoutError
|
| 9 |
+
from .mapping import FSMap, get_mapper
|
| 10 |
+
from .registry import (
|
| 11 |
+
available_protocols,
|
| 12 |
+
filesystem,
|
| 13 |
+
get_filesystem_class,
|
| 14 |
+
register_implementation,
|
| 15 |
+
registry,
|
| 16 |
+
)
|
| 17 |
+
from .spec import AbstractFileSystem
|
| 18 |
+
|
| 19 |
+
__all__ = [
|
| 20 |
+
"AbstractFileSystem",
|
| 21 |
+
"FSTimeoutError",
|
| 22 |
+
"FSMap",
|
| 23 |
+
"filesystem",
|
| 24 |
+
"register_implementation",
|
| 25 |
+
"get_filesystem_class",
|
| 26 |
+
"get_fs_token_paths",
|
| 27 |
+
"get_mapper",
|
| 28 |
+
"open",
|
| 29 |
+
"open_files",
|
| 30 |
+
"open_local",
|
| 31 |
+
"registry",
|
| 32 |
+
"caching",
|
| 33 |
+
"Callback",
|
| 34 |
+
"available_protocols",
|
| 35 |
+
"available_compressions",
|
| 36 |
+
"url_to_fs",
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def process_entries():
|
| 41 |
+
if entry_points is not None:
|
| 42 |
+
try:
|
| 43 |
+
eps = entry_points()
|
| 44 |
+
except TypeError:
|
| 45 |
+
pass # importlib-metadata < 0.8
|
| 46 |
+
else:
|
| 47 |
+
if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
|
| 48 |
+
specs = eps.select(group="fsspec.specs")
|
| 49 |
+
else:
|
| 50 |
+
specs = eps.get("fsspec.specs", [])
|
| 51 |
+
registered_names = {}
|
| 52 |
+
for spec in specs:
|
| 53 |
+
err_msg = f"Unable to load filesystem from {spec}"
|
| 54 |
+
name = spec.name
|
| 55 |
+
if name in registered_names:
|
| 56 |
+
continue
|
| 57 |
+
registered_names[name] = True
|
| 58 |
+
register_implementation(
|
| 59 |
+
name,
|
| 60 |
+
spec.value.replace(":", "."),
|
| 61 |
+
errtxt=err_msg,
|
| 62 |
+
# We take our implementations as the ones to overload with if
|
| 63 |
+
# for some reason we encounter some, may be the same, already
|
| 64 |
+
# registered
|
| 65 |
+
clobber=True,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
process_entries()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/_version.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# file generated by setuptools_scm
|
| 2 |
+
# don't change, don't track in version control
|
| 3 |
+
TYPE_CHECKING = False
|
| 4 |
+
if TYPE_CHECKING:
|
| 5 |
+
from typing import Tuple, Union
|
| 6 |
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
| 7 |
+
else:
|
| 8 |
+
VERSION_TUPLE = object
|
| 9 |
+
|
| 10 |
+
version: str
|
| 11 |
+
__version__: str
|
| 12 |
+
__version_tuple__: VERSION_TUPLE
|
| 13 |
+
version_tuple: VERSION_TUPLE
|
| 14 |
+
|
| 15 |
+
__version__ = version = '2024.5.0'
|
| 16 |
+
__version_tuple__ = version_tuple = (2024, 5, 0)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/archive.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fsspec import AbstractFileSystem
|
| 2 |
+
from fsspec.utils import tokenize
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class AbstractArchiveFileSystem(AbstractFileSystem):
|
| 6 |
+
"""
|
| 7 |
+
A generic superclass for implementing Archive-based filesystems.
|
| 8 |
+
|
| 9 |
+
Currently, it is shared amongst
|
| 10 |
+
:class:`~fsspec.implementations.zip.ZipFileSystem`,
|
| 11 |
+
:class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and
|
| 12 |
+
:class:`~fsspec.implementations.tar.TarFileSystem`.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
def __str__(self):
|
| 16 |
+
return f"<Archive-like object {type(self).__name__} at {id(self)}>"
|
| 17 |
+
|
| 18 |
+
__repr__ = __str__
|
| 19 |
+
|
| 20 |
+
def ukey(self, path):
|
| 21 |
+
return tokenize(path, self.fo, self.protocol)
|
| 22 |
+
|
| 23 |
+
def _all_dirnames(self, paths):
|
| 24 |
+
"""Returns *all* directory names for each path in paths, including intermediate
|
| 25 |
+
ones.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
paths: Iterable of path strings
|
| 30 |
+
"""
|
| 31 |
+
if len(paths) == 0:
|
| 32 |
+
return set()
|
| 33 |
+
|
| 34 |
+
dirnames = {self._parent(path) for path in paths} - {self.root_marker}
|
| 35 |
+
return dirnames | self._all_dirnames(dirnames)
|
| 36 |
+
|
| 37 |
+
def info(self, path, **kwargs):
|
| 38 |
+
self._get_dirs()
|
| 39 |
+
path = self._strip_protocol(path)
|
| 40 |
+
if path in {"", "/"} and self.dir_cache:
|
| 41 |
+
return {"name": "", "type": "directory", "size": 0}
|
| 42 |
+
if path in self.dir_cache:
|
| 43 |
+
return self.dir_cache[path]
|
| 44 |
+
elif path + "/" in self.dir_cache:
|
| 45 |
+
return self.dir_cache[path + "/"]
|
| 46 |
+
else:
|
| 47 |
+
raise FileNotFoundError(path)
|
| 48 |
+
|
| 49 |
+
def ls(self, path, detail=True, **kwargs):
|
| 50 |
+
self._get_dirs()
|
| 51 |
+
paths = {}
|
| 52 |
+
for p, f in self.dir_cache.items():
|
| 53 |
+
p = p.rstrip("/")
|
| 54 |
+
if "/" in p:
|
| 55 |
+
root = p.rsplit("/", 1)[0]
|
| 56 |
+
else:
|
| 57 |
+
root = ""
|
| 58 |
+
if root == path.rstrip("/"):
|
| 59 |
+
paths[p] = f
|
| 60 |
+
elif all(
|
| 61 |
+
(a == b)
|
| 62 |
+
for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
|
| 63 |
+
):
|
| 64 |
+
# root directory entry
|
| 65 |
+
ppath = p.rstrip("/").split("/", 1)[0]
|
| 66 |
+
if ppath not in paths:
|
| 67 |
+
out = {"name": ppath, "size": 0, "type": "directory"}
|
| 68 |
+
paths[ppath] = out
|
| 69 |
+
if detail:
|
| 70 |
+
out = sorted(paths.values(), key=lambda _: _["name"])
|
| 71 |
+
return out
|
| 72 |
+
else:
|
| 73 |
+
return sorted(paths)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/asyn.py
ADDED
|
@@ -0,0 +1,1096 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import asyncio.events
|
| 3 |
+
import functools
|
| 4 |
+
import inspect
|
| 5 |
+
import io
|
| 6 |
+
import numbers
|
| 7 |
+
import os
|
| 8 |
+
import re
|
| 9 |
+
import threading
|
| 10 |
+
from contextlib import contextmanager
|
| 11 |
+
from glob import has_magic
|
| 12 |
+
from typing import TYPE_CHECKING, Iterable
|
| 13 |
+
|
| 14 |
+
from .callbacks import DEFAULT_CALLBACK
|
| 15 |
+
from .exceptions import FSTimeoutError
|
| 16 |
+
from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
|
| 17 |
+
from .spec import AbstractBufferedFile, AbstractFileSystem
|
| 18 |
+
from .utils import glob_translate, is_exception, other_paths
|
| 19 |
+
|
| 20 |
+
private = re.compile("_[^_]")
|
| 21 |
+
iothread = [None] # dedicated fsspec IO thread
|
| 22 |
+
loop = [None] # global event loop for any non-async instance
|
| 23 |
+
_lock = None # global lock placeholder
|
| 24 |
+
get_running_loop = asyncio.get_running_loop
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_lock():
|
| 28 |
+
"""Allocate or return a threading lock.
|
| 29 |
+
|
| 30 |
+
The lock is allocated on first use to allow setting one lock per forked process.
|
| 31 |
+
"""
|
| 32 |
+
global _lock
|
| 33 |
+
if not _lock:
|
| 34 |
+
_lock = threading.Lock()
|
| 35 |
+
return _lock
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def reset_lock():
|
| 39 |
+
"""Reset the global lock.
|
| 40 |
+
|
| 41 |
+
This should be called only on the init of a forked process to reset the lock to
|
| 42 |
+
None, enabling the new forked process to get a new lock.
|
| 43 |
+
"""
|
| 44 |
+
global _lock
|
| 45 |
+
|
| 46 |
+
iothread[0] = None
|
| 47 |
+
loop[0] = None
|
| 48 |
+
_lock = None
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
async def _runner(event, coro, result, timeout=None):
|
| 52 |
+
timeout = timeout if timeout else None # convert 0 or 0.0 to None
|
| 53 |
+
if timeout is not None:
|
| 54 |
+
coro = asyncio.wait_for(coro, timeout=timeout)
|
| 55 |
+
try:
|
| 56 |
+
result[0] = await coro
|
| 57 |
+
except Exception as ex:
|
| 58 |
+
result[0] = ex
|
| 59 |
+
finally:
|
| 60 |
+
event.set()
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def sync(loop, func, *args, timeout=None, **kwargs):
|
| 64 |
+
"""
|
| 65 |
+
Make loop run coroutine until it returns. Runs in other thread
|
| 66 |
+
|
| 67 |
+
Examples
|
| 68 |
+
--------
|
| 69 |
+
>>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args,
|
| 70 |
+
timeout=timeout, **kwargs)
|
| 71 |
+
"""
|
| 72 |
+
timeout = timeout if timeout else None # convert 0 or 0.0 to None
|
| 73 |
+
# NB: if the loop is not running *yet*, it is OK to submit work
|
| 74 |
+
# and we will wait for it
|
| 75 |
+
if loop is None or loop.is_closed():
|
| 76 |
+
raise RuntimeError("Loop is not running")
|
| 77 |
+
try:
|
| 78 |
+
loop0 = asyncio.events.get_running_loop()
|
| 79 |
+
if loop0 is loop:
|
| 80 |
+
raise NotImplementedError("Calling sync() from within a running loop")
|
| 81 |
+
except NotImplementedError:
|
| 82 |
+
raise
|
| 83 |
+
except RuntimeError:
|
| 84 |
+
pass
|
| 85 |
+
coro = func(*args, **kwargs)
|
| 86 |
+
result = [None]
|
| 87 |
+
event = threading.Event()
|
| 88 |
+
asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
|
| 89 |
+
while True:
|
| 90 |
+
# this loops allows thread to get interrupted
|
| 91 |
+
if event.wait(1):
|
| 92 |
+
break
|
| 93 |
+
if timeout is not None:
|
| 94 |
+
timeout -= 1
|
| 95 |
+
if timeout < 0:
|
| 96 |
+
raise FSTimeoutError
|
| 97 |
+
|
| 98 |
+
return_result = result[0]
|
| 99 |
+
if isinstance(return_result, asyncio.TimeoutError):
|
| 100 |
+
# suppress asyncio.TimeoutError, raise FSTimeoutError
|
| 101 |
+
raise FSTimeoutError from return_result
|
| 102 |
+
elif isinstance(return_result, BaseException):
|
| 103 |
+
raise return_result
|
| 104 |
+
else:
|
| 105 |
+
return return_result
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def sync_wrapper(func, obj=None):
|
| 109 |
+
"""Given a function, make so can be called in blocking contexts
|
| 110 |
+
|
| 111 |
+
Leave obj=None if defining within a class. Pass the instance if attaching
|
| 112 |
+
as an attribute of the instance.
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
@functools.wraps(func)
|
| 116 |
+
def wrapper(*args, **kwargs):
|
| 117 |
+
self = obj or args[0]
|
| 118 |
+
return sync(self.loop, func, *args, **kwargs)
|
| 119 |
+
|
| 120 |
+
return wrapper
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
@contextmanager
|
| 124 |
+
def _selector_policy():
|
| 125 |
+
original_policy = asyncio.get_event_loop_policy()
|
| 126 |
+
try:
|
| 127 |
+
if os.name == "nt" and hasattr(asyncio, "WindowsSelectorEventLoopPolicy"):
|
| 128 |
+
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
| 129 |
+
|
| 130 |
+
yield
|
| 131 |
+
finally:
|
| 132 |
+
asyncio.set_event_loop_policy(original_policy)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def get_loop():
|
| 136 |
+
"""Create or return the default fsspec IO loop
|
| 137 |
+
|
| 138 |
+
The loop will be running on a separate thread.
|
| 139 |
+
"""
|
| 140 |
+
if loop[0] is None:
|
| 141 |
+
with get_lock():
|
| 142 |
+
# repeat the check just in case the loop got filled between the
|
| 143 |
+
# previous two calls from another thread
|
| 144 |
+
if loop[0] is None:
|
| 145 |
+
with _selector_policy():
|
| 146 |
+
loop[0] = asyncio.new_event_loop()
|
| 147 |
+
th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
|
| 148 |
+
th.daemon = True
|
| 149 |
+
th.start()
|
| 150 |
+
iothread[0] = th
|
| 151 |
+
return loop[0]
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
if TYPE_CHECKING:
|
| 155 |
+
import resource
|
| 156 |
+
|
| 157 |
+
ResourceError = resource.error
|
| 158 |
+
else:
|
| 159 |
+
try:
|
| 160 |
+
import resource
|
| 161 |
+
except ImportError:
|
| 162 |
+
resource = None
|
| 163 |
+
ResourceError = OSError
|
| 164 |
+
else:
|
| 165 |
+
ResourceError = getattr(resource, "error", OSError)
|
| 166 |
+
|
| 167 |
+
_DEFAULT_BATCH_SIZE = 128
|
| 168 |
+
_NOFILES_DEFAULT_BATCH_SIZE = 1280
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def _get_batch_size(nofiles=False):
|
| 172 |
+
from fsspec.config import conf
|
| 173 |
+
|
| 174 |
+
if nofiles:
|
| 175 |
+
if "nofiles_gather_batch_size" in conf:
|
| 176 |
+
return conf["nofiles_gather_batch_size"]
|
| 177 |
+
else:
|
| 178 |
+
if "gather_batch_size" in conf:
|
| 179 |
+
return conf["gather_batch_size"]
|
| 180 |
+
if nofiles:
|
| 181 |
+
return _NOFILES_DEFAULT_BATCH_SIZE
|
| 182 |
+
if resource is None:
|
| 183 |
+
return _DEFAULT_BATCH_SIZE
|
| 184 |
+
|
| 185 |
+
try:
|
| 186 |
+
soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
|
| 187 |
+
except (ImportError, ValueError, ResourceError):
|
| 188 |
+
return _DEFAULT_BATCH_SIZE
|
| 189 |
+
|
| 190 |
+
if soft_limit == resource.RLIM_INFINITY:
|
| 191 |
+
return -1
|
| 192 |
+
else:
|
| 193 |
+
return soft_limit // 8
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def running_async() -> bool:
|
| 197 |
+
"""Being executed by an event loop?"""
|
| 198 |
+
try:
|
| 199 |
+
asyncio.get_running_loop()
|
| 200 |
+
return True
|
| 201 |
+
except RuntimeError:
|
| 202 |
+
return False
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
async def _run_coros_in_chunks(
|
| 206 |
+
coros,
|
| 207 |
+
batch_size=None,
|
| 208 |
+
callback=DEFAULT_CALLBACK,
|
| 209 |
+
timeout=None,
|
| 210 |
+
return_exceptions=False,
|
| 211 |
+
nofiles=False,
|
| 212 |
+
):
|
| 213 |
+
"""Run the given coroutines in chunks.
|
| 214 |
+
|
| 215 |
+
Parameters
|
| 216 |
+
----------
|
| 217 |
+
coros: list of coroutines to run
|
| 218 |
+
batch_size: int or None
|
| 219 |
+
Number of coroutines to submit/wait on simultaneously.
|
| 220 |
+
If -1, then it will not be any throttling. If
|
| 221 |
+
None, it will be inferred from _get_batch_size()
|
| 222 |
+
callback: fsspec.callbacks.Callback instance
|
| 223 |
+
Gets a relative_update when each coroutine completes
|
| 224 |
+
timeout: number or None
|
| 225 |
+
If given, each coroutine times out after this time. Note that, since
|
| 226 |
+
there are multiple batches, the total run time of this function will in
|
| 227 |
+
general be longer
|
| 228 |
+
return_exceptions: bool
|
| 229 |
+
Same meaning as in asyncio.gather
|
| 230 |
+
nofiles: bool
|
| 231 |
+
If inferring the batch_size, does this operation involve local files?
|
| 232 |
+
If yes, you normally expect smaller batches.
|
| 233 |
+
"""
|
| 234 |
+
|
| 235 |
+
if batch_size is None:
|
| 236 |
+
batch_size = _get_batch_size(nofiles=nofiles)
|
| 237 |
+
|
| 238 |
+
if batch_size == -1:
|
| 239 |
+
batch_size = len(coros)
|
| 240 |
+
|
| 241 |
+
assert batch_size > 0
|
| 242 |
+
|
| 243 |
+
async def _run_coro(coro, i):
|
| 244 |
+
try:
|
| 245 |
+
return await asyncio.wait_for(coro, timeout=timeout), i
|
| 246 |
+
except Exception as e:
|
| 247 |
+
if not return_exceptions:
|
| 248 |
+
raise
|
| 249 |
+
return e, i
|
| 250 |
+
finally:
|
| 251 |
+
callback.relative_update(1)
|
| 252 |
+
|
| 253 |
+
i = 0
|
| 254 |
+
n = len(coros)
|
| 255 |
+
results = [None] * n
|
| 256 |
+
pending = set()
|
| 257 |
+
|
| 258 |
+
while pending or i < n:
|
| 259 |
+
while len(pending) < batch_size and i < n:
|
| 260 |
+
pending.add(asyncio.ensure_future(_run_coro(coros[i], i)))
|
| 261 |
+
i += 1
|
| 262 |
+
|
| 263 |
+
if not pending:
|
| 264 |
+
break
|
| 265 |
+
|
| 266 |
+
done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
|
| 267 |
+
while done:
|
| 268 |
+
result, k = await done.pop()
|
| 269 |
+
results[k] = result
|
| 270 |
+
|
| 271 |
+
return results
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
# these methods should be implemented as async by any async-able backend
|
| 275 |
+
async_methods = [
|
| 276 |
+
"_ls",
|
| 277 |
+
"_cat_file",
|
| 278 |
+
"_get_file",
|
| 279 |
+
"_put_file",
|
| 280 |
+
"_rm_file",
|
| 281 |
+
"_cp_file",
|
| 282 |
+
"_pipe_file",
|
| 283 |
+
"_expand_path",
|
| 284 |
+
"_info",
|
| 285 |
+
"_isfile",
|
| 286 |
+
"_isdir",
|
| 287 |
+
"_exists",
|
| 288 |
+
"_walk",
|
| 289 |
+
"_glob",
|
| 290 |
+
"_find",
|
| 291 |
+
"_du",
|
| 292 |
+
"_size",
|
| 293 |
+
"_mkdir",
|
| 294 |
+
"_makedirs",
|
| 295 |
+
]
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
class AsyncFileSystem(AbstractFileSystem):
|
| 299 |
+
"""Async file operations, default implementations
|
| 300 |
+
|
| 301 |
+
Passes bulk operations to asyncio.gather for concurrent operation.
|
| 302 |
+
|
| 303 |
+
Implementations that have concurrent batch operations and/or async methods
|
| 304 |
+
should inherit from this class instead of AbstractFileSystem. Docstrings are
|
| 305 |
+
copied from the un-underscored method in AbstractFileSystem, if not given.
|
| 306 |
+
"""
|
| 307 |
+
|
| 308 |
+
# note that methods do not have docstring here; they will be copied
|
| 309 |
+
# for _* methods and inferred for overridden methods.
|
| 310 |
+
|
| 311 |
+
async_impl = True
|
| 312 |
+
mirror_sync_methods = True
|
| 313 |
+
disable_throttling = False
|
| 314 |
+
|
| 315 |
+
def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs):
|
| 316 |
+
self.asynchronous = asynchronous
|
| 317 |
+
self._pid = os.getpid()
|
| 318 |
+
if not asynchronous:
|
| 319 |
+
self._loop = loop or get_loop()
|
| 320 |
+
else:
|
| 321 |
+
self._loop = None
|
| 322 |
+
self.batch_size = batch_size
|
| 323 |
+
super().__init__(*args, **kwargs)
|
| 324 |
+
|
| 325 |
+
@property
|
| 326 |
+
def loop(self):
|
| 327 |
+
if self._pid != os.getpid():
|
| 328 |
+
raise RuntimeError("This class is not fork-safe")
|
| 329 |
+
return self._loop
|
| 330 |
+
|
| 331 |
+
async def _rm_file(self, path, **kwargs):
|
| 332 |
+
raise NotImplementedError
|
| 333 |
+
|
| 334 |
+
async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
|
| 335 |
+
# TODO: implement on_error
|
| 336 |
+
batch_size = batch_size or self.batch_size
|
| 337 |
+
path = await self._expand_path(path, recursive=recursive)
|
| 338 |
+
return await _run_coros_in_chunks(
|
| 339 |
+
[self._rm_file(p, **kwargs) for p in reversed(path)],
|
| 340 |
+
batch_size=batch_size,
|
| 341 |
+
nofiles=True,
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
async def _cp_file(self, path1, path2, **kwargs):
|
| 345 |
+
raise NotImplementedError
|
| 346 |
+
|
| 347 |
+
async def _copy(
|
| 348 |
+
self,
|
| 349 |
+
path1,
|
| 350 |
+
path2,
|
| 351 |
+
recursive=False,
|
| 352 |
+
on_error=None,
|
| 353 |
+
maxdepth=None,
|
| 354 |
+
batch_size=None,
|
| 355 |
+
**kwargs,
|
| 356 |
+
):
|
| 357 |
+
if on_error is None and recursive:
|
| 358 |
+
on_error = "ignore"
|
| 359 |
+
elif on_error is None:
|
| 360 |
+
on_error = "raise"
|
| 361 |
+
|
| 362 |
+
if isinstance(path1, list) and isinstance(path2, list):
|
| 363 |
+
# No need to expand paths when both source and destination
|
| 364 |
+
# are provided as lists
|
| 365 |
+
paths1 = path1
|
| 366 |
+
paths2 = path2
|
| 367 |
+
else:
|
| 368 |
+
source_is_str = isinstance(path1, str)
|
| 369 |
+
paths1 = await self._expand_path(
|
| 370 |
+
path1, maxdepth=maxdepth, recursive=recursive
|
| 371 |
+
)
|
| 372 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 373 |
+
# Non-recursive glob does not copy directories
|
| 374 |
+
paths1 = [
|
| 375 |
+
p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
|
| 376 |
+
]
|
| 377 |
+
if not paths1:
|
| 378 |
+
return
|
| 379 |
+
|
| 380 |
+
source_is_file = len(paths1) == 1
|
| 381 |
+
dest_is_dir = isinstance(path2, str) and (
|
| 382 |
+
trailing_sep(path2) or await self._isdir(path2)
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
exists = source_is_str and (
|
| 386 |
+
(has_magic(path1) and source_is_file)
|
| 387 |
+
or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
|
| 388 |
+
)
|
| 389 |
+
paths2 = other_paths(
|
| 390 |
+
paths1,
|
| 391 |
+
path2,
|
| 392 |
+
exists=exists,
|
| 393 |
+
flatten=not source_is_str,
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
batch_size = batch_size or self.batch_size
|
| 397 |
+
coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
|
| 398 |
+
result = await _run_coros_in_chunks(
|
| 399 |
+
coros, batch_size=batch_size, return_exceptions=True, nofiles=True
|
| 400 |
+
)
|
| 401 |
+
|
| 402 |
+
for ex in filter(is_exception, result):
|
| 403 |
+
if on_error == "ignore" and isinstance(ex, FileNotFoundError):
|
| 404 |
+
continue
|
| 405 |
+
raise ex
|
| 406 |
+
|
| 407 |
+
async def _pipe_file(self, path, value, **kwargs):
|
| 408 |
+
raise NotImplementedError
|
| 409 |
+
|
| 410 |
+
async def _pipe(self, path, value=None, batch_size=None, **kwargs):
|
| 411 |
+
if isinstance(path, str):
|
| 412 |
+
path = {path: value}
|
| 413 |
+
batch_size = batch_size or self.batch_size
|
| 414 |
+
return await _run_coros_in_chunks(
|
| 415 |
+
[self._pipe_file(k, v, **kwargs) for k, v in path.items()],
|
| 416 |
+
batch_size=batch_size,
|
| 417 |
+
nofiles=True,
|
| 418 |
+
)
|
| 419 |
+
|
| 420 |
+
async def _process_limits(self, url, start, end):
|
| 421 |
+
"""Helper for "Range"-based _cat_file"""
|
| 422 |
+
size = None
|
| 423 |
+
suff = False
|
| 424 |
+
if start is not None and start < 0:
|
| 425 |
+
# if start is negative and end None, end is the "suffix length"
|
| 426 |
+
if end is None:
|
| 427 |
+
end = -start
|
| 428 |
+
start = ""
|
| 429 |
+
suff = True
|
| 430 |
+
else:
|
| 431 |
+
size = size or (await self._info(url))["size"]
|
| 432 |
+
start = size + start
|
| 433 |
+
elif start is None:
|
| 434 |
+
start = 0
|
| 435 |
+
if not suff:
|
| 436 |
+
if end is not None and end < 0:
|
| 437 |
+
if start is not None:
|
| 438 |
+
size = size or (await self._info(url))["size"]
|
| 439 |
+
end = size + end
|
| 440 |
+
elif end is None:
|
| 441 |
+
end = ""
|
| 442 |
+
if isinstance(end, numbers.Integral):
|
| 443 |
+
end -= 1 # bytes range is inclusive
|
| 444 |
+
return f"bytes={start}-{end}"
|
| 445 |
+
|
| 446 |
+
async def _cat_file(self, path, start=None, end=None, **kwargs):
|
| 447 |
+
raise NotImplementedError
|
| 448 |
+
|
| 449 |
+
async def _cat(
|
| 450 |
+
self, path, recursive=False, on_error="raise", batch_size=None, **kwargs
|
| 451 |
+
):
|
| 452 |
+
paths = await self._expand_path(path, recursive=recursive)
|
| 453 |
+
coros = [self._cat_file(path, **kwargs) for path in paths]
|
| 454 |
+
batch_size = batch_size or self.batch_size
|
| 455 |
+
out = await _run_coros_in_chunks(
|
| 456 |
+
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
| 457 |
+
)
|
| 458 |
+
if on_error == "raise":
|
| 459 |
+
ex = next(filter(is_exception, out), False)
|
| 460 |
+
if ex:
|
| 461 |
+
raise ex
|
| 462 |
+
if (
|
| 463 |
+
len(paths) > 1
|
| 464 |
+
or isinstance(path, list)
|
| 465 |
+
or paths[0] != self._strip_protocol(path)
|
| 466 |
+
):
|
| 467 |
+
return {
|
| 468 |
+
k: v
|
| 469 |
+
for k, v in zip(paths, out)
|
| 470 |
+
if on_error != "omit" or not is_exception(v)
|
| 471 |
+
}
|
| 472 |
+
else:
|
| 473 |
+
return out[0]
|
| 474 |
+
|
| 475 |
+
async def _cat_ranges(
|
| 476 |
+
self,
|
| 477 |
+
paths,
|
| 478 |
+
starts,
|
| 479 |
+
ends,
|
| 480 |
+
max_gap=None,
|
| 481 |
+
batch_size=None,
|
| 482 |
+
on_error="return",
|
| 483 |
+
**kwargs,
|
| 484 |
+
):
|
| 485 |
+
"""Get the contents of byte ranges from one or more files
|
| 486 |
+
|
| 487 |
+
Parameters
|
| 488 |
+
----------
|
| 489 |
+
paths: list
|
| 490 |
+
A list of of filepaths on this filesystems
|
| 491 |
+
starts, ends: int or list
|
| 492 |
+
Bytes limits of the read. If using a single int, the same value will be
|
| 493 |
+
used to read all the specified files.
|
| 494 |
+
"""
|
| 495 |
+
# TODO: on_error
|
| 496 |
+
if max_gap is not None:
|
| 497 |
+
# use utils.merge_offset_ranges
|
| 498 |
+
raise NotImplementedError
|
| 499 |
+
if not isinstance(paths, list):
|
| 500 |
+
raise TypeError
|
| 501 |
+
if not isinstance(starts, Iterable):
|
| 502 |
+
starts = [starts] * len(paths)
|
| 503 |
+
if not isinstance(ends, Iterable):
|
| 504 |
+
ends = [ends] * len(paths)
|
| 505 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
| 506 |
+
raise ValueError
|
| 507 |
+
coros = [
|
| 508 |
+
self._cat_file(p, start=s, end=e, **kwargs)
|
| 509 |
+
for p, s, e in zip(paths, starts, ends)
|
| 510 |
+
]
|
| 511 |
+
batch_size = batch_size or self.batch_size
|
| 512 |
+
return await _run_coros_in_chunks(
|
| 513 |
+
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
async def _put_file(self, lpath, rpath, **kwargs):
|
| 517 |
+
raise NotImplementedError
|
| 518 |
+
|
| 519 |
+
async def _put(
|
| 520 |
+
self,
|
| 521 |
+
lpath,
|
| 522 |
+
rpath,
|
| 523 |
+
recursive=False,
|
| 524 |
+
callback=DEFAULT_CALLBACK,
|
| 525 |
+
batch_size=None,
|
| 526 |
+
maxdepth=None,
|
| 527 |
+
**kwargs,
|
| 528 |
+
):
|
| 529 |
+
"""Copy file(s) from local.
|
| 530 |
+
|
| 531 |
+
Copies a specific file or tree of files (if recursive=True). If rpath
|
| 532 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 533 |
+
will go within.
|
| 534 |
+
|
| 535 |
+
The put_file method will be called concurrently on a batch of files. The
|
| 536 |
+
batch_size option can configure the amount of futures that can be executed
|
| 537 |
+
at the same time. If it is -1, then all the files will be uploaded concurrently.
|
| 538 |
+
The default can be set for this instance by passing "batch_size" in the
|
| 539 |
+
constructor, or for all instances by setting the "gather_batch_size" key
|
| 540 |
+
in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
|
| 541 |
+
"""
|
| 542 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 543 |
+
# No need to expand paths when both source and destination
|
| 544 |
+
# are provided as lists
|
| 545 |
+
rpaths = rpath
|
| 546 |
+
lpaths = lpath
|
| 547 |
+
else:
|
| 548 |
+
source_is_str = isinstance(lpath, str)
|
| 549 |
+
if source_is_str:
|
| 550 |
+
lpath = make_path_posix(lpath)
|
| 551 |
+
fs = LocalFileSystem()
|
| 552 |
+
lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
|
| 553 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 554 |
+
# Non-recursive glob does not copy directories
|
| 555 |
+
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
|
| 556 |
+
if not lpaths:
|
| 557 |
+
return
|
| 558 |
+
|
| 559 |
+
source_is_file = len(lpaths) == 1
|
| 560 |
+
dest_is_dir = isinstance(rpath, str) and (
|
| 561 |
+
trailing_sep(rpath) or await self._isdir(rpath)
|
| 562 |
+
)
|
| 563 |
+
|
| 564 |
+
rpath = self._strip_protocol(rpath)
|
| 565 |
+
exists = source_is_str and (
|
| 566 |
+
(has_magic(lpath) and source_is_file)
|
| 567 |
+
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
|
| 568 |
+
)
|
| 569 |
+
rpaths = other_paths(
|
| 570 |
+
lpaths,
|
| 571 |
+
rpath,
|
| 572 |
+
exists=exists,
|
| 573 |
+
flatten=not source_is_str,
|
| 574 |
+
)
|
| 575 |
+
|
| 576 |
+
is_dir = {l: os.path.isdir(l) for l in lpaths}
|
| 577 |
+
rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
|
| 578 |
+
file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]]
|
| 579 |
+
|
| 580 |
+
await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs])
|
| 581 |
+
batch_size = batch_size or self.batch_size
|
| 582 |
+
|
| 583 |
+
coros = []
|
| 584 |
+
callback.set_size(len(file_pairs))
|
| 585 |
+
for lfile, rfile in file_pairs:
|
| 586 |
+
put_file = callback.branch_coro(self._put_file)
|
| 587 |
+
coros.append(put_file(lfile, rfile, **kwargs))
|
| 588 |
+
|
| 589 |
+
return await _run_coros_in_chunks(
|
| 590 |
+
coros, batch_size=batch_size, callback=callback
|
| 591 |
+
)
|
| 592 |
+
|
| 593 |
+
async def _get_file(self, rpath, lpath, **kwargs):
|
| 594 |
+
raise NotImplementedError
|
| 595 |
+
|
| 596 |
+
async def _get(
|
| 597 |
+
self,
|
| 598 |
+
rpath,
|
| 599 |
+
lpath,
|
| 600 |
+
recursive=False,
|
| 601 |
+
callback=DEFAULT_CALLBACK,
|
| 602 |
+
maxdepth=None,
|
| 603 |
+
**kwargs,
|
| 604 |
+
):
|
| 605 |
+
"""Copy file(s) to local.
|
| 606 |
+
|
| 607 |
+
Copies a specific file or tree of files (if recursive=True). If lpath
|
| 608 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 609 |
+
will go within. Can submit a list of paths, which may be glob-patterns
|
| 610 |
+
and will be expanded.
|
| 611 |
+
|
| 612 |
+
The get_file method will be called concurrently on a batch of files. The
|
| 613 |
+
batch_size option can configure the amount of futures that can be executed
|
| 614 |
+
at the same time. If it is -1, then all the files will be uploaded concurrently.
|
| 615 |
+
The default can be set for this instance by passing "batch_size" in the
|
| 616 |
+
constructor, or for all instances by setting the "gather_batch_size" key
|
| 617 |
+
in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
|
| 618 |
+
"""
|
| 619 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 620 |
+
# No need to expand paths when both source and destination
|
| 621 |
+
# are provided as lists
|
| 622 |
+
rpaths = rpath
|
| 623 |
+
lpaths = lpath
|
| 624 |
+
else:
|
| 625 |
+
source_is_str = isinstance(rpath, str)
|
| 626 |
+
# First check for rpath trailing slash as _strip_protocol removes it.
|
| 627 |
+
source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
|
| 628 |
+
rpath = self._strip_protocol(rpath)
|
| 629 |
+
rpaths = await self._expand_path(
|
| 630 |
+
rpath, recursive=recursive, maxdepth=maxdepth
|
| 631 |
+
)
|
| 632 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 633 |
+
# Non-recursive glob does not copy directories
|
| 634 |
+
rpaths = [
|
| 635 |
+
p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
|
| 636 |
+
]
|
| 637 |
+
if not rpaths:
|
| 638 |
+
return
|
| 639 |
+
|
| 640 |
+
lpath = make_path_posix(lpath)
|
| 641 |
+
source_is_file = len(rpaths) == 1
|
| 642 |
+
dest_is_dir = isinstance(lpath, str) and (
|
| 643 |
+
trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
|
| 644 |
+
)
|
| 645 |
+
|
| 646 |
+
exists = source_is_str and (
|
| 647 |
+
(has_magic(rpath) and source_is_file)
|
| 648 |
+
or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
|
| 649 |
+
)
|
| 650 |
+
lpaths = other_paths(
|
| 651 |
+
rpaths,
|
| 652 |
+
lpath,
|
| 653 |
+
exists=exists,
|
| 654 |
+
flatten=not source_is_str,
|
| 655 |
+
)
|
| 656 |
+
|
| 657 |
+
[os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
|
| 658 |
+
batch_size = kwargs.pop("batch_size", self.batch_size)
|
| 659 |
+
|
| 660 |
+
coros = []
|
| 661 |
+
callback.set_size(len(lpaths))
|
| 662 |
+
for lpath, rpath in zip(lpaths, rpaths):
|
| 663 |
+
get_file = callback.branch_coro(self._get_file)
|
| 664 |
+
coros.append(get_file(rpath, lpath, **kwargs))
|
| 665 |
+
return await _run_coros_in_chunks(
|
| 666 |
+
coros, batch_size=batch_size, callback=callback
|
| 667 |
+
)
|
| 668 |
+
|
| 669 |
+
async def _isfile(self, path):
|
| 670 |
+
try:
|
| 671 |
+
return (await self._info(path))["type"] == "file"
|
| 672 |
+
except: # noqa: E722
|
| 673 |
+
return False
|
| 674 |
+
|
| 675 |
+
async def _isdir(self, path):
|
| 676 |
+
try:
|
| 677 |
+
return (await self._info(path))["type"] == "directory"
|
| 678 |
+
except OSError:
|
| 679 |
+
return False
|
| 680 |
+
|
| 681 |
+
async def _size(self, path):
|
| 682 |
+
return (await self._info(path)).get("size", None)
|
| 683 |
+
|
| 684 |
+
async def _sizes(self, paths, batch_size=None):
|
| 685 |
+
batch_size = batch_size or self.batch_size
|
| 686 |
+
return await _run_coros_in_chunks(
|
| 687 |
+
[self._size(p) for p in paths], batch_size=batch_size
|
| 688 |
+
)
|
| 689 |
+
|
| 690 |
+
async def _exists(self, path, **kwargs):
|
| 691 |
+
try:
|
| 692 |
+
await self._info(path, **kwargs)
|
| 693 |
+
return True
|
| 694 |
+
except FileNotFoundError:
|
| 695 |
+
return False
|
| 696 |
+
|
| 697 |
+
async def _info(self, path, **kwargs):
|
| 698 |
+
raise NotImplementedError
|
| 699 |
+
|
| 700 |
+
async def _ls(self, path, detail=True, **kwargs):
|
| 701 |
+
raise NotImplementedError
|
| 702 |
+
|
| 703 |
+
async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
|
| 704 |
+
if maxdepth is not None and maxdepth < 1:
|
| 705 |
+
raise ValueError("maxdepth must be at least 1")
|
| 706 |
+
|
| 707 |
+
path = self._strip_protocol(path)
|
| 708 |
+
full_dirs = {}
|
| 709 |
+
dirs = {}
|
| 710 |
+
files = {}
|
| 711 |
+
|
| 712 |
+
detail = kwargs.pop("detail", False)
|
| 713 |
+
try:
|
| 714 |
+
listing = await self._ls(path, detail=True, **kwargs)
|
| 715 |
+
except (FileNotFoundError, OSError) as e:
|
| 716 |
+
if on_error == "raise":
|
| 717 |
+
raise
|
| 718 |
+
elif callable(on_error):
|
| 719 |
+
on_error(e)
|
| 720 |
+
if detail:
|
| 721 |
+
yield path, {}, {}
|
| 722 |
+
else:
|
| 723 |
+
yield path, [], []
|
| 724 |
+
return
|
| 725 |
+
|
| 726 |
+
for info in listing:
|
| 727 |
+
# each info name must be at least [path]/part , but here
|
| 728 |
+
# we check also for names like [path]/part/
|
| 729 |
+
pathname = info["name"].rstrip("/")
|
| 730 |
+
name = pathname.rsplit("/", 1)[-1]
|
| 731 |
+
if info["type"] == "directory" and pathname != path:
|
| 732 |
+
# do not include "self" path
|
| 733 |
+
full_dirs[name] = pathname
|
| 734 |
+
dirs[name] = info
|
| 735 |
+
elif pathname == path:
|
| 736 |
+
# file-like with same name as give path
|
| 737 |
+
files[""] = info
|
| 738 |
+
else:
|
| 739 |
+
files[name] = info
|
| 740 |
+
|
| 741 |
+
if detail:
|
| 742 |
+
yield path, dirs, files
|
| 743 |
+
else:
|
| 744 |
+
yield path, list(dirs), list(files)
|
| 745 |
+
|
| 746 |
+
if maxdepth is not None:
|
| 747 |
+
maxdepth -= 1
|
| 748 |
+
if maxdepth < 1:
|
| 749 |
+
return
|
| 750 |
+
|
| 751 |
+
for d in dirs:
|
| 752 |
+
async for _ in self._walk(
|
| 753 |
+
full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs
|
| 754 |
+
):
|
| 755 |
+
yield _
|
| 756 |
+
|
| 757 |
+
async def _glob(self, path, maxdepth=None, **kwargs):
|
| 758 |
+
if maxdepth is not None and maxdepth < 1:
|
| 759 |
+
raise ValueError("maxdepth must be at least 1")
|
| 760 |
+
|
| 761 |
+
import re
|
| 762 |
+
|
| 763 |
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
| 764 |
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
| 765 |
+
path = self._strip_protocol(path)
|
| 766 |
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
| 767 |
+
tuple(sep + "**" for sep in seps)
|
| 768 |
+
)
|
| 769 |
+
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
| 770 |
+
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
| 771 |
+
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
| 772 |
+
|
| 773 |
+
min_idx = min(idx_star, idx_qmark, idx_brace)
|
| 774 |
+
|
| 775 |
+
detail = kwargs.pop("detail", False)
|
| 776 |
+
|
| 777 |
+
if not has_magic(path):
|
| 778 |
+
if await self._exists(path, **kwargs):
|
| 779 |
+
if not detail:
|
| 780 |
+
return [path]
|
| 781 |
+
else:
|
| 782 |
+
return {path: await self._info(path, **kwargs)}
|
| 783 |
+
else:
|
| 784 |
+
if not detail:
|
| 785 |
+
return [] # glob of non-existent returns empty
|
| 786 |
+
else:
|
| 787 |
+
return {}
|
| 788 |
+
elif "/" in path[:min_idx]:
|
| 789 |
+
min_idx = path[:min_idx].rindex("/")
|
| 790 |
+
root = path[: min_idx + 1]
|
| 791 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 792 |
+
else:
|
| 793 |
+
root = ""
|
| 794 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 795 |
+
|
| 796 |
+
if "**" in path:
|
| 797 |
+
if maxdepth is not None:
|
| 798 |
+
idx_double_stars = path.find("**")
|
| 799 |
+
depth_double_stars = path[idx_double_stars:].count("/") + 1
|
| 800 |
+
depth = depth - depth_double_stars + maxdepth
|
| 801 |
+
else:
|
| 802 |
+
depth = None
|
| 803 |
+
|
| 804 |
+
allpaths = await self._find(
|
| 805 |
+
root, maxdepth=depth, withdirs=True, detail=True, **kwargs
|
| 806 |
+
)
|
| 807 |
+
|
| 808 |
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
| 809 |
+
pattern = re.compile(pattern)
|
| 810 |
+
|
| 811 |
+
out = {
|
| 812 |
+
p: info
|
| 813 |
+
for p, info in sorted(allpaths.items())
|
| 814 |
+
if pattern.match(
|
| 815 |
+
(
|
| 816 |
+
p + "/"
|
| 817 |
+
if append_slash_to_dirname and info["type"] == "directory"
|
| 818 |
+
else p
|
| 819 |
+
)
|
| 820 |
+
)
|
| 821 |
+
}
|
| 822 |
+
|
| 823 |
+
if detail:
|
| 824 |
+
return out
|
| 825 |
+
else:
|
| 826 |
+
return list(out)
|
| 827 |
+
|
| 828 |
+
async def _du(self, path, total=True, maxdepth=None, **kwargs):
|
| 829 |
+
sizes = {}
|
| 830 |
+
# async for?
|
| 831 |
+
for f in await self._find(path, maxdepth=maxdepth, **kwargs):
|
| 832 |
+
info = await self._info(f)
|
| 833 |
+
sizes[info["name"]] = info["size"]
|
| 834 |
+
if total:
|
| 835 |
+
return sum(sizes.values())
|
| 836 |
+
else:
|
| 837 |
+
return sizes
|
| 838 |
+
|
| 839 |
+
async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
|
| 840 |
+
path = self._strip_protocol(path)
|
| 841 |
+
out = {}
|
| 842 |
+
detail = kwargs.pop("detail", False)
|
| 843 |
+
|
| 844 |
+
# Add the root directory if withdirs is requested
|
| 845 |
+
# This is needed for posix glob compliance
|
| 846 |
+
if withdirs and path != "" and await self._isdir(path):
|
| 847 |
+
out[path] = await self._info(path)
|
| 848 |
+
|
| 849 |
+
# async for?
|
| 850 |
+
async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
|
| 851 |
+
if withdirs:
|
| 852 |
+
files.update(dirs)
|
| 853 |
+
out.update({info["name"]: info for name, info in files.items()})
|
| 854 |
+
if not out and (await self._isfile(path)):
|
| 855 |
+
# walk works on directories, but find should also return [path]
|
| 856 |
+
# when path happens to be a file
|
| 857 |
+
out[path] = {}
|
| 858 |
+
names = sorted(out)
|
| 859 |
+
if not detail:
|
| 860 |
+
return names
|
| 861 |
+
else:
|
| 862 |
+
return {name: out[name] for name in names}
|
| 863 |
+
|
| 864 |
+
async def _expand_path(self, path, recursive=False, maxdepth=None):
|
| 865 |
+
if maxdepth is not None and maxdepth < 1:
|
| 866 |
+
raise ValueError("maxdepth must be at least 1")
|
| 867 |
+
|
| 868 |
+
if isinstance(path, str):
|
| 869 |
+
out = await self._expand_path([path], recursive, maxdepth)
|
| 870 |
+
else:
|
| 871 |
+
out = set()
|
| 872 |
+
path = [self._strip_protocol(p) for p in path]
|
| 873 |
+
for p in path: # can gather here
|
| 874 |
+
if has_magic(p):
|
| 875 |
+
bit = set(await self._glob(p, maxdepth=maxdepth))
|
| 876 |
+
out |= bit
|
| 877 |
+
if recursive:
|
| 878 |
+
# glob call above expanded one depth so if maxdepth is defined
|
| 879 |
+
# then decrement it in expand_path call below. If it is zero
|
| 880 |
+
# after decrementing then avoid expand_path call.
|
| 881 |
+
if maxdepth is not None and maxdepth <= 1:
|
| 882 |
+
continue
|
| 883 |
+
out |= set(
|
| 884 |
+
await self._expand_path(
|
| 885 |
+
list(bit),
|
| 886 |
+
recursive=recursive,
|
| 887 |
+
maxdepth=maxdepth - 1 if maxdepth is not None else None,
|
| 888 |
+
)
|
| 889 |
+
)
|
| 890 |
+
continue
|
| 891 |
+
elif recursive:
|
| 892 |
+
rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True))
|
| 893 |
+
out |= rec
|
| 894 |
+
if p not in out and (recursive is False or (await self._exists(p))):
|
| 895 |
+
# should only check once, for the root
|
| 896 |
+
out.add(p)
|
| 897 |
+
if not out:
|
| 898 |
+
raise FileNotFoundError(path)
|
| 899 |
+
return sorted(out)
|
| 900 |
+
|
| 901 |
+
async def _mkdir(self, path, create_parents=True, **kwargs):
|
| 902 |
+
pass # not necessary to implement, may not have directories
|
| 903 |
+
|
| 904 |
+
async def _makedirs(self, path, exist_ok=False):
|
| 905 |
+
pass # not necessary to implement, may not have directories
|
| 906 |
+
|
| 907 |
+
async def open_async(self, path, mode="rb", **kwargs):
|
| 908 |
+
if "b" not in mode or kwargs.get("compression"):
|
| 909 |
+
raise ValueError
|
| 910 |
+
raise NotImplementedError
|
| 911 |
+
|
| 912 |
+
|
| 913 |
+
def mirror_sync_methods(obj):
|
| 914 |
+
"""Populate sync and async methods for obj
|
| 915 |
+
|
| 916 |
+
For each method will create a sync version if the name refers to an async method
|
| 917 |
+
(coroutine) and there is no override in the child class; will create an async
|
| 918 |
+
method for the corresponding sync method if there is no implementation.
|
| 919 |
+
|
| 920 |
+
Uses the methods specified in
|
| 921 |
+
- async_methods: the set that an implementation is expected to provide
|
| 922 |
+
- default_async_methods: that can be derived from their sync version in
|
| 923 |
+
AbstractFileSystem
|
| 924 |
+
- AsyncFileSystem: async-specific default coroutines
|
| 925 |
+
"""
|
| 926 |
+
from fsspec import AbstractFileSystem
|
| 927 |
+
|
| 928 |
+
for method in async_methods + dir(AsyncFileSystem):
|
| 929 |
+
if not method.startswith("_"):
|
| 930 |
+
continue
|
| 931 |
+
smethod = method[1:]
|
| 932 |
+
if private.match(method):
|
| 933 |
+
isco = inspect.iscoroutinefunction(getattr(obj, method, None))
|
| 934 |
+
unsync = getattr(getattr(obj, smethod, False), "__func__", None)
|
| 935 |
+
is_default = unsync is getattr(AbstractFileSystem, smethod, "")
|
| 936 |
+
if isco and is_default:
|
| 937 |
+
mth = sync_wrapper(getattr(obj, method), obj=obj)
|
| 938 |
+
setattr(obj, smethod, mth)
|
| 939 |
+
if not mth.__doc__:
|
| 940 |
+
mth.__doc__ = getattr(
|
| 941 |
+
getattr(AbstractFileSystem, smethod, None), "__doc__", ""
|
| 942 |
+
)
|
| 943 |
+
|
| 944 |
+
|
| 945 |
+
class FSSpecCoroutineCancel(Exception):
|
| 946 |
+
pass
|
| 947 |
+
|
| 948 |
+
|
| 949 |
+
def _dump_running_tasks(
|
| 950 |
+
printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False
|
| 951 |
+
):
|
| 952 |
+
import traceback
|
| 953 |
+
|
| 954 |
+
tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()]
|
| 955 |
+
if printout:
|
| 956 |
+
[task.print_stack() for task in tasks]
|
| 957 |
+
out = [
|
| 958 |
+
{
|
| 959 |
+
"locals": task._coro.cr_frame.f_locals,
|
| 960 |
+
"file": task._coro.cr_frame.f_code.co_filename,
|
| 961 |
+
"firstline": task._coro.cr_frame.f_code.co_firstlineno,
|
| 962 |
+
"linelo": task._coro.cr_frame.f_lineno,
|
| 963 |
+
"stack": traceback.format_stack(task._coro.cr_frame),
|
| 964 |
+
"task": task if with_task else None,
|
| 965 |
+
}
|
| 966 |
+
for task in tasks
|
| 967 |
+
]
|
| 968 |
+
if cancel:
|
| 969 |
+
for t in tasks:
|
| 970 |
+
cbs = t._callbacks
|
| 971 |
+
t.cancel()
|
| 972 |
+
asyncio.futures.Future.set_exception(t, exc)
|
| 973 |
+
asyncio.futures.Future.cancel(t)
|
| 974 |
+
[cb[0](t) for cb in cbs] # cancels any dependent concurrent.futures
|
| 975 |
+
try:
|
| 976 |
+
t._coro.throw(exc) # exits coro, unless explicitly handled
|
| 977 |
+
except exc:
|
| 978 |
+
pass
|
| 979 |
+
return out
|
| 980 |
+
|
| 981 |
+
|
| 982 |
+
class AbstractAsyncStreamedFile(AbstractBufferedFile):
|
| 983 |
+
# no read buffering, and always auto-commit
|
| 984 |
+
# TODO: readahead might still be useful here, but needs async version
|
| 985 |
+
|
| 986 |
+
async def read(self, length=-1):
|
| 987 |
+
"""
|
| 988 |
+
Return data from cache, or fetch pieces as necessary
|
| 989 |
+
|
| 990 |
+
Parameters
|
| 991 |
+
----------
|
| 992 |
+
length: int (-1)
|
| 993 |
+
Number of bytes to read; if <0, all remaining bytes.
|
| 994 |
+
"""
|
| 995 |
+
length = -1 if length is None else int(length)
|
| 996 |
+
if self.mode != "rb":
|
| 997 |
+
raise ValueError("File not in read mode")
|
| 998 |
+
if length < 0:
|
| 999 |
+
length = self.size - self.loc
|
| 1000 |
+
if self.closed:
|
| 1001 |
+
raise ValueError("I/O operation on closed file.")
|
| 1002 |
+
if length == 0:
|
| 1003 |
+
# don't even bother calling fetch
|
| 1004 |
+
return b""
|
| 1005 |
+
out = await self._fetch_range(self.loc, self.loc + length)
|
| 1006 |
+
self.loc += len(out)
|
| 1007 |
+
return out
|
| 1008 |
+
|
| 1009 |
+
async def write(self, data):
|
| 1010 |
+
"""
|
| 1011 |
+
Write data to buffer.
|
| 1012 |
+
|
| 1013 |
+
Buffer only sent on flush() or if buffer is greater than
|
| 1014 |
+
or equal to blocksize.
|
| 1015 |
+
|
| 1016 |
+
Parameters
|
| 1017 |
+
----------
|
| 1018 |
+
data: bytes
|
| 1019 |
+
Set of bytes to be written.
|
| 1020 |
+
"""
|
| 1021 |
+
if self.mode not in {"wb", "ab"}:
|
| 1022 |
+
raise ValueError("File not in write mode")
|
| 1023 |
+
if self.closed:
|
| 1024 |
+
raise ValueError("I/O operation on closed file.")
|
| 1025 |
+
if self.forced:
|
| 1026 |
+
raise ValueError("This file has been force-flushed, can only close")
|
| 1027 |
+
out = self.buffer.write(data)
|
| 1028 |
+
self.loc += out
|
| 1029 |
+
if self.buffer.tell() >= self.blocksize:
|
| 1030 |
+
await self.flush()
|
| 1031 |
+
return out
|
| 1032 |
+
|
| 1033 |
+
async def close(self):
|
| 1034 |
+
"""Close file
|
| 1035 |
+
|
| 1036 |
+
Finalizes writes, discards cache
|
| 1037 |
+
"""
|
| 1038 |
+
if getattr(self, "_unclosable", False):
|
| 1039 |
+
return
|
| 1040 |
+
if self.closed:
|
| 1041 |
+
return
|
| 1042 |
+
if self.mode == "rb":
|
| 1043 |
+
self.cache = None
|
| 1044 |
+
else:
|
| 1045 |
+
if not self.forced:
|
| 1046 |
+
await self.flush(force=True)
|
| 1047 |
+
|
| 1048 |
+
if self.fs is not None:
|
| 1049 |
+
self.fs.invalidate_cache(self.path)
|
| 1050 |
+
self.fs.invalidate_cache(self.fs._parent(self.path))
|
| 1051 |
+
|
| 1052 |
+
self.closed = True
|
| 1053 |
+
|
| 1054 |
+
async def flush(self, force=False):
|
| 1055 |
+
if self.closed:
|
| 1056 |
+
raise ValueError("Flush on closed file")
|
| 1057 |
+
if force and self.forced:
|
| 1058 |
+
raise ValueError("Force flush cannot be called more than once")
|
| 1059 |
+
if force:
|
| 1060 |
+
self.forced = True
|
| 1061 |
+
|
| 1062 |
+
if self.mode not in {"wb", "ab"}:
|
| 1063 |
+
# no-op to flush on read-mode
|
| 1064 |
+
return
|
| 1065 |
+
|
| 1066 |
+
if not force and self.buffer.tell() < self.blocksize:
|
| 1067 |
+
# Defer write on small block
|
| 1068 |
+
return
|
| 1069 |
+
|
| 1070 |
+
if self.offset is None:
|
| 1071 |
+
# Initialize a multipart upload
|
| 1072 |
+
self.offset = 0
|
| 1073 |
+
try:
|
| 1074 |
+
await self._initiate_upload()
|
| 1075 |
+
except: # noqa: E722
|
| 1076 |
+
self.closed = True
|
| 1077 |
+
raise
|
| 1078 |
+
|
| 1079 |
+
if await self._upload_chunk(final=force) is not False:
|
| 1080 |
+
self.offset += self.buffer.seek(0, 2)
|
| 1081 |
+
self.buffer = io.BytesIO()
|
| 1082 |
+
|
| 1083 |
+
async def __aenter__(self):
|
| 1084 |
+
return self
|
| 1085 |
+
|
| 1086 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 1087 |
+
await self.close()
|
| 1088 |
+
|
| 1089 |
+
async def _fetch_range(self, start, end):
|
| 1090 |
+
raise NotImplementedError
|
| 1091 |
+
|
| 1092 |
+
async def _initiate_upload(self):
|
| 1093 |
+
pass
|
| 1094 |
+
|
| 1095 |
+
async def _upload_chunk(self, final=False):
|
| 1096 |
+
raise NotImplementedError
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/callbacks.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import wraps
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Callback:
|
| 5 |
+
"""
|
| 6 |
+
Base class and interface for callback mechanism
|
| 7 |
+
|
| 8 |
+
This class can be used directly for monitoring file transfers by
|
| 9 |
+
providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument,
|
| 10 |
+
below), or subclassed for more specialised behaviour.
|
| 11 |
+
|
| 12 |
+
Parameters
|
| 13 |
+
----------
|
| 14 |
+
size: int (optional)
|
| 15 |
+
Nominal quantity for the value that corresponds to a complete
|
| 16 |
+
transfer, e.g., total number of tiles or total number of
|
| 17 |
+
bytes
|
| 18 |
+
value: int (0)
|
| 19 |
+
Starting internal counter value
|
| 20 |
+
hooks: dict or None
|
| 21 |
+
A dict of named functions to be called on each update. The signature
|
| 22 |
+
of these must be ``f(size, value, **kwargs)``
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, size=None, value=0, hooks=None, **kwargs):
|
| 26 |
+
self.size = size
|
| 27 |
+
self.value = value
|
| 28 |
+
self.hooks = hooks or {}
|
| 29 |
+
self.kw = kwargs
|
| 30 |
+
|
| 31 |
+
def __enter__(self):
|
| 32 |
+
return self
|
| 33 |
+
|
| 34 |
+
def __exit__(self, *exc_args):
|
| 35 |
+
self.close()
|
| 36 |
+
|
| 37 |
+
def close(self):
|
| 38 |
+
"""Close callback."""
|
| 39 |
+
|
| 40 |
+
def branched(self, path_1, path_2, **kwargs):
|
| 41 |
+
"""
|
| 42 |
+
Return callback for child transfers
|
| 43 |
+
|
| 44 |
+
If this callback is operating at a higher level, e.g., put, which may
|
| 45 |
+
trigger transfers that can also be monitored. The function returns a callback
|
| 46 |
+
that has to be passed to the child method, e.g., put_file,
|
| 47 |
+
as `callback=` argument.
|
| 48 |
+
|
| 49 |
+
The implementation uses `callback.branch` for compatibility.
|
| 50 |
+
When implementing callbacks, it is recommended to override this function instead
|
| 51 |
+
of `branch` and avoid calling `super().branched(...)`.
|
| 52 |
+
|
| 53 |
+
Prefer using this function over `branch`.
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
path_1: str
|
| 58 |
+
Child's source path
|
| 59 |
+
path_2: str
|
| 60 |
+
Child's destination path
|
| 61 |
+
**kwargs:
|
| 62 |
+
Arbitrary keyword arguments
|
| 63 |
+
|
| 64 |
+
Returns
|
| 65 |
+
-------
|
| 66 |
+
callback: Callback
|
| 67 |
+
A callback instance to be passed to the child method
|
| 68 |
+
"""
|
| 69 |
+
self.branch(path_1, path_2, kwargs)
|
| 70 |
+
# mutate kwargs so that we can force the caller to pass "callback=" explicitly
|
| 71 |
+
return kwargs.pop("callback", DEFAULT_CALLBACK)
|
| 72 |
+
|
| 73 |
+
def branch_coro(self, fn):
|
| 74 |
+
"""
|
| 75 |
+
Wraps a coroutine, and pass a new child callback to it.
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
@wraps(fn)
|
| 79 |
+
async def func(path1, path2: str, **kwargs):
|
| 80 |
+
with self.branched(path1, path2, **kwargs) as child:
|
| 81 |
+
return await fn(path1, path2, callback=child, **kwargs)
|
| 82 |
+
|
| 83 |
+
return func
|
| 84 |
+
|
| 85 |
+
def set_size(self, size):
|
| 86 |
+
"""
|
| 87 |
+
Set the internal maximum size attribute
|
| 88 |
+
|
| 89 |
+
Usually called if not initially set at instantiation. Note that this
|
| 90 |
+
triggers a ``call()``.
|
| 91 |
+
|
| 92 |
+
Parameters
|
| 93 |
+
----------
|
| 94 |
+
size: int
|
| 95 |
+
"""
|
| 96 |
+
self.size = size
|
| 97 |
+
self.call()
|
| 98 |
+
|
| 99 |
+
def absolute_update(self, value):
|
| 100 |
+
"""
|
| 101 |
+
Set the internal value state
|
| 102 |
+
|
| 103 |
+
Triggers ``call()``
|
| 104 |
+
|
| 105 |
+
Parameters
|
| 106 |
+
----------
|
| 107 |
+
value: int
|
| 108 |
+
"""
|
| 109 |
+
self.value = value
|
| 110 |
+
self.call()
|
| 111 |
+
|
| 112 |
+
def relative_update(self, inc=1):
|
| 113 |
+
"""
|
| 114 |
+
Delta increment the internal counter
|
| 115 |
+
|
| 116 |
+
Triggers ``call()``
|
| 117 |
+
|
| 118 |
+
Parameters
|
| 119 |
+
----------
|
| 120 |
+
inc: int
|
| 121 |
+
"""
|
| 122 |
+
self.value += inc
|
| 123 |
+
self.call()
|
| 124 |
+
|
| 125 |
+
def call(self, hook_name=None, **kwargs):
|
| 126 |
+
"""
|
| 127 |
+
Execute hook(s) with current state
|
| 128 |
+
|
| 129 |
+
Each function is passed the internal size and current value
|
| 130 |
+
|
| 131 |
+
Parameters
|
| 132 |
+
----------
|
| 133 |
+
hook_name: str or None
|
| 134 |
+
If given, execute on this hook
|
| 135 |
+
kwargs: passed on to (all) hook(s)
|
| 136 |
+
"""
|
| 137 |
+
if not self.hooks:
|
| 138 |
+
return
|
| 139 |
+
kw = self.kw.copy()
|
| 140 |
+
kw.update(kwargs)
|
| 141 |
+
if hook_name:
|
| 142 |
+
if hook_name not in self.hooks:
|
| 143 |
+
return
|
| 144 |
+
return self.hooks[hook_name](self.size, self.value, **kw)
|
| 145 |
+
for hook in self.hooks.values() or []:
|
| 146 |
+
hook(self.size, self.value, **kw)
|
| 147 |
+
|
| 148 |
+
def wrap(self, iterable):
|
| 149 |
+
"""
|
| 150 |
+
Wrap an iterable to call ``relative_update`` on each iterations
|
| 151 |
+
|
| 152 |
+
Parameters
|
| 153 |
+
----------
|
| 154 |
+
iterable: Iterable
|
| 155 |
+
The iterable that is being wrapped
|
| 156 |
+
"""
|
| 157 |
+
for item in iterable:
|
| 158 |
+
self.relative_update()
|
| 159 |
+
yield item
|
| 160 |
+
|
| 161 |
+
def branch(self, path_1, path_2, kwargs):
|
| 162 |
+
"""
|
| 163 |
+
Set callbacks for child transfers
|
| 164 |
+
|
| 165 |
+
If this callback is operating at a higher level, e.g., put, which may
|
| 166 |
+
trigger transfers that can also be monitored. The passed kwargs are
|
| 167 |
+
to be *mutated* to add ``callback=``, if this class supports branching
|
| 168 |
+
to children.
|
| 169 |
+
|
| 170 |
+
Parameters
|
| 171 |
+
----------
|
| 172 |
+
path_1: str
|
| 173 |
+
Child's source path
|
| 174 |
+
path_2: str
|
| 175 |
+
Child's destination path
|
| 176 |
+
kwargs: dict
|
| 177 |
+
arguments passed to child method, e.g., put_file.
|
| 178 |
+
|
| 179 |
+
Returns
|
| 180 |
+
-------
|
| 181 |
+
|
| 182 |
+
"""
|
| 183 |
+
return None
|
| 184 |
+
|
| 185 |
+
def no_op(self, *_, **__):
|
| 186 |
+
pass
|
| 187 |
+
|
| 188 |
+
def __getattr__(self, item):
|
| 189 |
+
"""
|
| 190 |
+
If undefined methods are called on this class, nothing happens
|
| 191 |
+
"""
|
| 192 |
+
return self.no_op
|
| 193 |
+
|
| 194 |
+
@classmethod
|
| 195 |
+
def as_callback(cls, maybe_callback=None):
|
| 196 |
+
"""Transform callback=... into Callback instance
|
| 197 |
+
|
| 198 |
+
For the special value of ``None``, return the global instance of
|
| 199 |
+
``NoOpCallback``. This is an alternative to including
|
| 200 |
+
``callback=DEFAULT_CALLBACK`` directly in a method signature.
|
| 201 |
+
"""
|
| 202 |
+
if maybe_callback is None:
|
| 203 |
+
return DEFAULT_CALLBACK
|
| 204 |
+
return maybe_callback
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
class NoOpCallback(Callback):
|
| 208 |
+
"""
|
| 209 |
+
This implementation of Callback does exactly nothing
|
| 210 |
+
"""
|
| 211 |
+
|
| 212 |
+
def call(self, *args, **kwargs):
|
| 213 |
+
return None
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
class DotPrinterCallback(Callback):
|
| 217 |
+
"""
|
| 218 |
+
Simple example Callback implementation
|
| 219 |
+
|
| 220 |
+
Almost identical to Callback with a hook that prints a char; here we
|
| 221 |
+
demonstrate how the outer layer may print "#" and the inner layer "."
|
| 222 |
+
"""
|
| 223 |
+
|
| 224 |
+
def __init__(self, chr_to_print="#", **kwargs):
|
| 225 |
+
self.chr = chr_to_print
|
| 226 |
+
super().__init__(**kwargs)
|
| 227 |
+
|
| 228 |
+
def branch(self, path_1, path_2, kwargs):
|
| 229 |
+
"""Mutate kwargs to add new instance with different print char"""
|
| 230 |
+
kwargs["callback"] = DotPrinterCallback(".")
|
| 231 |
+
|
| 232 |
+
def call(self, **kwargs):
|
| 233 |
+
"""Just outputs a character"""
|
| 234 |
+
print(self.chr, end="")
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
class TqdmCallback(Callback):
|
| 238 |
+
"""
|
| 239 |
+
A callback to display a progress bar using tqdm
|
| 240 |
+
|
| 241 |
+
Parameters
|
| 242 |
+
----------
|
| 243 |
+
tqdm_kwargs : dict, (optional)
|
| 244 |
+
Any argument accepted by the tqdm constructor.
|
| 245 |
+
See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
|
| 246 |
+
Will be forwarded to `tqdm_cls`.
|
| 247 |
+
tqdm_cls: (optional)
|
| 248 |
+
subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
|
| 249 |
+
|
| 250 |
+
Examples
|
| 251 |
+
--------
|
| 252 |
+
>>> import fsspec
|
| 253 |
+
>>> from fsspec.callbacks import TqdmCallback
|
| 254 |
+
>>> fs = fsspec.filesystem("memory")
|
| 255 |
+
>>> path2distant_data = "/your-path"
|
| 256 |
+
>>> fs.upload(
|
| 257 |
+
".",
|
| 258 |
+
path2distant_data,
|
| 259 |
+
recursive=True,
|
| 260 |
+
callback=TqdmCallback(),
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
You can forward args to tqdm using the ``tqdm_kwargs`` parameter.
|
| 264 |
+
|
| 265 |
+
>>> fs.upload(
|
| 266 |
+
".",
|
| 267 |
+
path2distant_data,
|
| 268 |
+
recursive=True,
|
| 269 |
+
callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
You can also customize the progress bar by passing a subclass of `tqdm`.
|
| 273 |
+
|
| 274 |
+
.. code-block:: python
|
| 275 |
+
|
| 276 |
+
class TqdmFormat(tqdm):
|
| 277 |
+
'''Provides a `total_time` format parameter'''
|
| 278 |
+
@property
|
| 279 |
+
def format_dict(self):
|
| 280 |
+
d = super().format_dict
|
| 281 |
+
total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
|
| 282 |
+
d.update(total_time=self.format_interval(total_time) + " in total")
|
| 283 |
+
return d
|
| 284 |
+
|
| 285 |
+
>>> with TqdmCallback(
|
| 286 |
+
tqdm_kwargs={
|
| 287 |
+
"desc": "desc",
|
| 288 |
+
"bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
|
| 289 |
+
},
|
| 290 |
+
tqdm_cls=TqdmFormat,
|
| 291 |
+
) as callback:
|
| 292 |
+
fs.upload(".", path2distant_data, recursive=True, callback=callback)
|
| 293 |
+
"""
|
| 294 |
+
|
| 295 |
+
def __init__(self, tqdm_kwargs=None, *args, **kwargs):
|
| 296 |
+
try:
|
| 297 |
+
from tqdm import tqdm
|
| 298 |
+
|
| 299 |
+
except ImportError as exce:
|
| 300 |
+
raise ImportError(
|
| 301 |
+
"Using TqdmCallback requires tqdm to be installed"
|
| 302 |
+
) from exce
|
| 303 |
+
|
| 304 |
+
self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
|
| 305 |
+
self._tqdm_kwargs = tqdm_kwargs or {}
|
| 306 |
+
self.tqdm = None
|
| 307 |
+
super().__init__(*args, **kwargs)
|
| 308 |
+
|
| 309 |
+
def call(self, *args, **kwargs):
|
| 310 |
+
if self.tqdm is None:
|
| 311 |
+
self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
|
| 312 |
+
self.tqdm.total = self.size
|
| 313 |
+
self.tqdm.update(self.value - self.tqdm.n)
|
| 314 |
+
|
| 315 |
+
def close(self):
|
| 316 |
+
if self.tqdm is not None:
|
| 317 |
+
self.tqdm.close()
|
| 318 |
+
self.tqdm = None
|
| 319 |
+
|
| 320 |
+
def __del__(self):
|
| 321 |
+
return self.close()
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/compression.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Helper functions for a standard streaming compression API"""
|
| 2 |
+
|
| 3 |
+
from zipfile import ZipFile
|
| 4 |
+
|
| 5 |
+
import fsspec.utils
|
| 6 |
+
from fsspec.spec import AbstractBufferedFile
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def noop_file(file, mode, **kwargs):
|
| 10 |
+
return file
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# TODO: files should also be available as contexts
|
| 14 |
+
# should be functions of the form func(infile, mode=, **kwargs) -> file-like
|
| 15 |
+
compr = {None: noop_file}
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def register_compression(name, callback, extensions, force=False):
|
| 19 |
+
"""Register an "inferable" file compression type.
|
| 20 |
+
|
| 21 |
+
Registers transparent file compression type for use with fsspec.open.
|
| 22 |
+
Compression can be specified by name in open, or "infer"-ed for any files
|
| 23 |
+
ending with the given extensions.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
name: (str) The compression type name. Eg. "gzip".
|
| 27 |
+
callback: A callable of form (infile, mode, **kwargs) -> file-like.
|
| 28 |
+
Accepts an input file-like object, the target mode and kwargs.
|
| 29 |
+
Returns a wrapped file-like object.
|
| 30 |
+
extensions: (str, Iterable[str]) A file extension, or list of file
|
| 31 |
+
extensions for which to infer this compression scheme. Eg. "gz".
|
| 32 |
+
force: (bool) Force re-registration of compression type or extensions.
|
| 33 |
+
|
| 34 |
+
Raises:
|
| 35 |
+
ValueError: If name or extensions already registered, and not force.
|
| 36 |
+
|
| 37 |
+
"""
|
| 38 |
+
if isinstance(extensions, str):
|
| 39 |
+
extensions = [extensions]
|
| 40 |
+
|
| 41 |
+
# Validate registration
|
| 42 |
+
if name in compr and not force:
|
| 43 |
+
raise ValueError(f"Duplicate compression registration: {name}")
|
| 44 |
+
|
| 45 |
+
for ext in extensions:
|
| 46 |
+
if ext in fsspec.utils.compressions and not force:
|
| 47 |
+
raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
|
| 48 |
+
|
| 49 |
+
compr[name] = callback
|
| 50 |
+
|
| 51 |
+
for ext in extensions:
|
| 52 |
+
fsspec.utils.compressions[ext] = name
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def unzip(infile, mode="rb", filename=None, **kwargs):
|
| 56 |
+
if "r" not in mode:
|
| 57 |
+
filename = filename or "file"
|
| 58 |
+
z = ZipFile(infile, mode="w", **kwargs)
|
| 59 |
+
fo = z.open(filename, mode="w")
|
| 60 |
+
fo.close = lambda closer=fo.close: closer() or z.close()
|
| 61 |
+
return fo
|
| 62 |
+
z = ZipFile(infile)
|
| 63 |
+
if filename is None:
|
| 64 |
+
filename = z.namelist()[0]
|
| 65 |
+
return z.open(filename, mode="r", **kwargs)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
register_compression("zip", unzip, "zip")
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
from bz2 import BZ2File
|
| 72 |
+
except ImportError:
|
| 73 |
+
pass
|
| 74 |
+
else:
|
| 75 |
+
register_compression("bz2", BZ2File, "bz2")
|
| 76 |
+
|
| 77 |
+
try: # pragma: no cover
|
| 78 |
+
from isal import igzip
|
| 79 |
+
|
| 80 |
+
def isal(infile, mode="rb", **kwargs):
|
| 81 |
+
return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
|
| 82 |
+
|
| 83 |
+
register_compression("gzip", isal, "gz")
|
| 84 |
+
except ImportError:
|
| 85 |
+
from gzip import GzipFile
|
| 86 |
+
|
| 87 |
+
register_compression(
|
| 88 |
+
"gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
try:
|
| 92 |
+
from lzma import LZMAFile
|
| 93 |
+
|
| 94 |
+
register_compression("lzma", LZMAFile, "lzma")
|
| 95 |
+
register_compression("xz", LZMAFile, "xz")
|
| 96 |
+
except ImportError:
|
| 97 |
+
pass
|
| 98 |
+
|
| 99 |
+
try:
|
| 100 |
+
import lzmaffi
|
| 101 |
+
|
| 102 |
+
register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
|
| 103 |
+
register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
|
| 104 |
+
except ImportError:
|
| 105 |
+
pass
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class SnappyFile(AbstractBufferedFile):
|
| 109 |
+
def __init__(self, infile, mode, **kwargs):
|
| 110 |
+
import snappy
|
| 111 |
+
|
| 112 |
+
super().__init__(
|
| 113 |
+
fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
|
| 114 |
+
)
|
| 115 |
+
self.infile = infile
|
| 116 |
+
if "r" in mode:
|
| 117 |
+
self.codec = snappy.StreamDecompressor()
|
| 118 |
+
else:
|
| 119 |
+
self.codec = snappy.StreamCompressor()
|
| 120 |
+
|
| 121 |
+
def _upload_chunk(self, final=False):
|
| 122 |
+
self.buffer.seek(0)
|
| 123 |
+
out = self.codec.add_chunk(self.buffer.read())
|
| 124 |
+
self.infile.write(out)
|
| 125 |
+
return True
|
| 126 |
+
|
| 127 |
+
def seek(self, loc, whence=0):
|
| 128 |
+
raise NotImplementedError("SnappyFile is not seekable")
|
| 129 |
+
|
| 130 |
+
def seekable(self):
|
| 131 |
+
return False
|
| 132 |
+
|
| 133 |
+
def _fetch_range(self, start, end):
|
| 134 |
+
"""Get the specified set of bytes from remote"""
|
| 135 |
+
data = self.infile.read(end - start)
|
| 136 |
+
return self.codec.decompress(data)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
try:
|
| 140 |
+
import snappy
|
| 141 |
+
|
| 142 |
+
snappy.compress
|
| 143 |
+
# Snappy may use the .sz file extension, but this is not part of the
|
| 144 |
+
# standard implementation.
|
| 145 |
+
register_compression("snappy", SnappyFile, [])
|
| 146 |
+
|
| 147 |
+
except (ImportError, NameError, AttributeError):
|
| 148 |
+
pass
|
| 149 |
+
|
| 150 |
+
try:
|
| 151 |
+
import lz4.frame
|
| 152 |
+
|
| 153 |
+
register_compression("lz4", lz4.frame.open, "lz4")
|
| 154 |
+
except ImportError:
|
| 155 |
+
pass
|
| 156 |
+
|
| 157 |
+
try:
|
| 158 |
+
import zstandard as zstd
|
| 159 |
+
|
| 160 |
+
def zstandard_file(infile, mode="rb"):
|
| 161 |
+
if "r" in mode:
|
| 162 |
+
cctx = zstd.ZstdDecompressor()
|
| 163 |
+
return cctx.stream_reader(infile)
|
| 164 |
+
else:
|
| 165 |
+
cctx = zstd.ZstdCompressor(level=10)
|
| 166 |
+
return cctx.stream_writer(infile)
|
| 167 |
+
|
| 168 |
+
register_compression("zstd", zstandard_file, "zst")
|
| 169 |
+
except ImportError:
|
| 170 |
+
pass
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def available_compressions():
|
| 174 |
+
"""Return a list of the implemented compressions."""
|
| 175 |
+
return list(compr)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/config.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import configparser
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import warnings
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
conf: dict[str, dict[str, Any]] = {}
|
| 10 |
+
default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
|
| 11 |
+
conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def set_conf_env(conf_dict, envdict=os.environ):
|
| 15 |
+
"""Set config values from environment variables
|
| 16 |
+
|
| 17 |
+
Looks for variables of the form ``FSSPEC_<protocol>`` and
|
| 18 |
+
``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
|
| 19 |
+
as a json dictionary and used to ``update`` the config of the
|
| 20 |
+
corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
|
| 21 |
+
attempt to convert the string value, but the kwarg keys will be lower-cased.
|
| 22 |
+
|
| 23 |
+
The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
|
| 24 |
+
``FSSPEC_<protocol>`` ones.
|
| 25 |
+
|
| 26 |
+
Parameters
|
| 27 |
+
----------
|
| 28 |
+
conf_dict : dict(str, dict)
|
| 29 |
+
This dict will be mutated
|
| 30 |
+
envdict : dict-like(str, str)
|
| 31 |
+
Source for the values - usually the real environment
|
| 32 |
+
"""
|
| 33 |
+
kwarg_keys = []
|
| 34 |
+
for key in envdict:
|
| 35 |
+
if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
|
| 36 |
+
if key.count("_") > 1:
|
| 37 |
+
kwarg_keys.append(key)
|
| 38 |
+
continue
|
| 39 |
+
try:
|
| 40 |
+
value = json.loads(envdict[key])
|
| 41 |
+
except json.decoder.JSONDecodeError as ex:
|
| 42 |
+
warnings.warn(
|
| 43 |
+
f"Ignoring environment variable {key} due to a parse failure: {ex}"
|
| 44 |
+
)
|
| 45 |
+
else:
|
| 46 |
+
if isinstance(value, dict):
|
| 47 |
+
_, proto = key.split("_", 1)
|
| 48 |
+
conf_dict.setdefault(proto.lower(), {}).update(value)
|
| 49 |
+
else:
|
| 50 |
+
warnings.warn(
|
| 51 |
+
f"Ignoring environment variable {key} due to not being a dict:"
|
| 52 |
+
f" {type(value)}"
|
| 53 |
+
)
|
| 54 |
+
elif key.startswith("FSSPEC"):
|
| 55 |
+
warnings.warn(
|
| 56 |
+
f"Ignoring environment variable {key} due to having an unexpected name"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
for key in kwarg_keys:
|
| 60 |
+
_, proto, kwarg = key.split("_", 2)
|
| 61 |
+
conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def set_conf_files(cdir, conf_dict):
|
| 65 |
+
"""Set config values from files
|
| 66 |
+
|
| 67 |
+
Scans for INI and JSON files in the given dictionary, and uses their
|
| 68 |
+
contents to set the config. In case of repeated values, later values
|
| 69 |
+
win.
|
| 70 |
+
|
| 71 |
+
In the case of INI files, all values are strings, and these will not
|
| 72 |
+
be converted.
|
| 73 |
+
|
| 74 |
+
Parameters
|
| 75 |
+
----------
|
| 76 |
+
cdir : str
|
| 77 |
+
Directory to search
|
| 78 |
+
conf_dict : dict(str, dict)
|
| 79 |
+
This dict will be mutated
|
| 80 |
+
"""
|
| 81 |
+
if not os.path.isdir(cdir):
|
| 82 |
+
return
|
| 83 |
+
allfiles = sorted(os.listdir(cdir))
|
| 84 |
+
for fn in allfiles:
|
| 85 |
+
if fn.endswith(".ini"):
|
| 86 |
+
ini = configparser.ConfigParser()
|
| 87 |
+
ini.read(os.path.join(cdir, fn))
|
| 88 |
+
for key in ini:
|
| 89 |
+
if key == "DEFAULT":
|
| 90 |
+
continue
|
| 91 |
+
conf_dict.setdefault(key, {}).update(dict(ini[key]))
|
| 92 |
+
if fn.endswith(".json"):
|
| 93 |
+
with open(os.path.join(cdir, fn)) as f:
|
| 94 |
+
js = json.load(f)
|
| 95 |
+
for key in js:
|
| 96 |
+
conf_dict.setdefault(key, {}).update(dict(js[key]))
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def apply_config(cls, kwargs, conf_dict=None):
|
| 100 |
+
"""Supply default values for kwargs when instantiating class
|
| 101 |
+
|
| 102 |
+
Augments the passed kwargs, by finding entries in the config dict
|
| 103 |
+
which match the classes ``.protocol`` attribute (one or more str)
|
| 104 |
+
|
| 105 |
+
Parameters
|
| 106 |
+
----------
|
| 107 |
+
cls : file system implementation
|
| 108 |
+
kwargs : dict
|
| 109 |
+
conf_dict : dict of dict
|
| 110 |
+
Typically this is the global configuration
|
| 111 |
+
|
| 112 |
+
Returns
|
| 113 |
+
-------
|
| 114 |
+
dict : the modified set of kwargs
|
| 115 |
+
"""
|
| 116 |
+
if conf_dict is None:
|
| 117 |
+
conf_dict = conf
|
| 118 |
+
protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
|
| 119 |
+
kw = {}
|
| 120 |
+
for proto in protos:
|
| 121 |
+
# default kwargs from the current state of the config
|
| 122 |
+
if proto in conf_dict:
|
| 123 |
+
kw.update(conf_dict[proto])
|
| 124 |
+
# explicit kwargs always win
|
| 125 |
+
kw.update(**kwargs)
|
| 126 |
+
kwargs = kw
|
| 127 |
+
return kwargs
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
set_conf_files(conf_dir, conf)
|
| 131 |
+
set_conf_env(conf)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/conftest.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import subprocess
|
| 4 |
+
import sys
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import pytest
|
| 8 |
+
|
| 9 |
+
import fsspec
|
| 10 |
+
from fsspec.implementations.cached import CachingFileSystem
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@pytest.fixture()
|
| 14 |
+
def m():
|
| 15 |
+
"""
|
| 16 |
+
Fixture providing a memory filesystem.
|
| 17 |
+
"""
|
| 18 |
+
m = fsspec.filesystem("memory")
|
| 19 |
+
m.store.clear()
|
| 20 |
+
m.pseudo_dirs.clear()
|
| 21 |
+
m.pseudo_dirs.append("")
|
| 22 |
+
try:
|
| 23 |
+
yield m
|
| 24 |
+
finally:
|
| 25 |
+
m.store.clear()
|
| 26 |
+
m.pseudo_dirs.clear()
|
| 27 |
+
m.pseudo_dirs.append("")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@pytest.fixture
|
| 31 |
+
def ftp_writable(tmpdir):
|
| 32 |
+
"""
|
| 33 |
+
Fixture providing a writable FTP filesystem.
|
| 34 |
+
"""
|
| 35 |
+
pytest.importorskip("pyftpdlib")
|
| 36 |
+
from fsspec.implementations.ftp import FTPFileSystem
|
| 37 |
+
|
| 38 |
+
FTPFileSystem.clear_instance_cache() # remove lingering connections
|
| 39 |
+
CachingFileSystem.clear_instance_cache()
|
| 40 |
+
d = str(tmpdir)
|
| 41 |
+
with open(os.path.join(d, "out"), "wb") as f:
|
| 42 |
+
f.write(b"hello" * 10000)
|
| 43 |
+
P = subprocess.Popen(
|
| 44 |
+
[sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
|
| 45 |
+
)
|
| 46 |
+
try:
|
| 47 |
+
time.sleep(1)
|
| 48 |
+
yield "localhost", 2121, "user", "pass"
|
| 49 |
+
finally:
|
| 50 |
+
P.terminate()
|
| 51 |
+
P.wait()
|
| 52 |
+
try:
|
| 53 |
+
shutil.rmtree(tmpdir)
|
| 54 |
+
except Exception:
|
| 55 |
+
pass
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/core.py
ADDED
|
@@ -0,0 +1,738 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import re
|
| 7 |
+
from glob import has_magic
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# for backwards compat, we export cache things from here too
|
| 11 |
+
from fsspec.caching import ( # noqa: F401
|
| 12 |
+
BaseCache,
|
| 13 |
+
BlockCache,
|
| 14 |
+
BytesCache,
|
| 15 |
+
MMapCache,
|
| 16 |
+
ReadAheadCache,
|
| 17 |
+
caches,
|
| 18 |
+
)
|
| 19 |
+
from fsspec.compression import compr
|
| 20 |
+
from fsspec.config import conf
|
| 21 |
+
from fsspec.registry import filesystem, get_filesystem_class
|
| 22 |
+
from fsspec.utils import (
|
| 23 |
+
_unstrip_protocol,
|
| 24 |
+
build_name_function,
|
| 25 |
+
infer_compression,
|
| 26 |
+
stringify_path,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger("fsspec")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class OpenFile:
|
| 33 |
+
"""
|
| 34 |
+
File-like object to be used in a context
|
| 35 |
+
|
| 36 |
+
Can layer (buffered) text-mode and compression over any file-system, which
|
| 37 |
+
are typically binary-only.
|
| 38 |
+
|
| 39 |
+
These instances are safe to serialize, as the low-level file object
|
| 40 |
+
is not created until invoked using ``with``.
|
| 41 |
+
|
| 42 |
+
Parameters
|
| 43 |
+
----------
|
| 44 |
+
fs: FileSystem
|
| 45 |
+
The file system to use for opening the file. Should be a subclass or duck-type
|
| 46 |
+
with ``fsspec.spec.AbstractFileSystem``
|
| 47 |
+
path: str
|
| 48 |
+
Location to open
|
| 49 |
+
mode: str like 'rb', optional
|
| 50 |
+
Mode of the opened file
|
| 51 |
+
compression: str or None, optional
|
| 52 |
+
Compression to apply
|
| 53 |
+
encoding: str or None, optional
|
| 54 |
+
The encoding to use if opened in text mode.
|
| 55 |
+
errors: str or None, optional
|
| 56 |
+
How to handle encoding errors if opened in text mode.
|
| 57 |
+
newline: None or str
|
| 58 |
+
Passed to TextIOWrapper in text mode, how to handle line endings.
|
| 59 |
+
autoopen: bool
|
| 60 |
+
If True, calls open() immediately. Mostly used by pickle
|
| 61 |
+
pos: int
|
| 62 |
+
If given and autoopen is True, seek to this location immediately
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
def __init__(
|
| 66 |
+
self,
|
| 67 |
+
fs,
|
| 68 |
+
path,
|
| 69 |
+
mode="rb",
|
| 70 |
+
compression=None,
|
| 71 |
+
encoding=None,
|
| 72 |
+
errors=None,
|
| 73 |
+
newline=None,
|
| 74 |
+
):
|
| 75 |
+
self.fs = fs
|
| 76 |
+
self.path = path
|
| 77 |
+
self.mode = mode
|
| 78 |
+
self.compression = get_compression(path, compression)
|
| 79 |
+
self.encoding = encoding
|
| 80 |
+
self.errors = errors
|
| 81 |
+
self.newline = newline
|
| 82 |
+
self.fobjects = []
|
| 83 |
+
|
| 84 |
+
def __reduce__(self):
|
| 85 |
+
return (
|
| 86 |
+
OpenFile,
|
| 87 |
+
(
|
| 88 |
+
self.fs,
|
| 89 |
+
self.path,
|
| 90 |
+
self.mode,
|
| 91 |
+
self.compression,
|
| 92 |
+
self.encoding,
|
| 93 |
+
self.errors,
|
| 94 |
+
self.newline,
|
| 95 |
+
),
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
def __repr__(self):
|
| 99 |
+
return f"<OpenFile '{self.path}'>"
|
| 100 |
+
|
| 101 |
+
def __enter__(self):
|
| 102 |
+
mode = self.mode.replace("t", "").replace("b", "") + "b"
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
f = self.fs.open(self.path, mode=mode)
|
| 106 |
+
except FileNotFoundError as e:
|
| 107 |
+
if has_magic(self.path):
|
| 108 |
+
raise FileNotFoundError(
|
| 109 |
+
"%s not found. The URL contains glob characters: you maybe needed\n"
|
| 110 |
+
"to pass expand=True in fsspec.open() or the storage_options of \n"
|
| 111 |
+
"your library. You can also set the config value 'open_expand'\n"
|
| 112 |
+
"before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
|
| 113 |
+
self.path,
|
| 114 |
+
) from e
|
| 115 |
+
raise
|
| 116 |
+
|
| 117 |
+
self.fobjects = [f]
|
| 118 |
+
|
| 119 |
+
if self.compression is not None:
|
| 120 |
+
compress = compr[self.compression]
|
| 121 |
+
f = compress(f, mode=mode[0])
|
| 122 |
+
self.fobjects.append(f)
|
| 123 |
+
|
| 124 |
+
if "b" not in self.mode:
|
| 125 |
+
# assume, for example, that 'r' is equivalent to 'rt' as in builtin
|
| 126 |
+
f = PickleableTextIOWrapper(
|
| 127 |
+
f, encoding=self.encoding, errors=self.errors, newline=self.newline
|
| 128 |
+
)
|
| 129 |
+
self.fobjects.append(f)
|
| 130 |
+
|
| 131 |
+
return self.fobjects[-1]
|
| 132 |
+
|
| 133 |
+
def __exit__(self, *args):
|
| 134 |
+
self.close()
|
| 135 |
+
|
| 136 |
+
@property
|
| 137 |
+
def full_name(self):
|
| 138 |
+
return _unstrip_protocol(self.path, self.fs)
|
| 139 |
+
|
| 140 |
+
def open(self):
|
| 141 |
+
"""Materialise this as a real open file without context
|
| 142 |
+
|
| 143 |
+
The OpenFile object should be explicitly closed to avoid enclosed file
|
| 144 |
+
instances persisting. You must, therefore, keep a reference to the OpenFile
|
| 145 |
+
during the life of the file-like it generates.
|
| 146 |
+
"""
|
| 147 |
+
return self.__enter__()
|
| 148 |
+
|
| 149 |
+
def close(self):
|
| 150 |
+
"""Close all encapsulated file objects"""
|
| 151 |
+
for f in reversed(self.fobjects):
|
| 152 |
+
if "r" not in self.mode and not f.closed:
|
| 153 |
+
f.flush()
|
| 154 |
+
f.close()
|
| 155 |
+
self.fobjects.clear()
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
class OpenFiles(list):
|
| 159 |
+
"""List of OpenFile instances
|
| 160 |
+
|
| 161 |
+
Can be used in a single context, which opens and closes all of the
|
| 162 |
+
contained files. Normal list access to get the elements works as
|
| 163 |
+
normal.
|
| 164 |
+
|
| 165 |
+
A special case is made for caching filesystems - the files will
|
| 166 |
+
be down/uploaded together at the start or end of the context, and
|
| 167 |
+
this may happen concurrently, if the target filesystem supports it.
|
| 168 |
+
"""
|
| 169 |
+
|
| 170 |
+
def __init__(self, *args, mode="rb", fs=None):
|
| 171 |
+
self.mode = mode
|
| 172 |
+
self.fs = fs
|
| 173 |
+
self.files = []
|
| 174 |
+
super().__init__(*args)
|
| 175 |
+
|
| 176 |
+
def __enter__(self):
|
| 177 |
+
if self.fs is None:
|
| 178 |
+
raise ValueError("Context has already been used")
|
| 179 |
+
|
| 180 |
+
fs = self.fs
|
| 181 |
+
while True:
|
| 182 |
+
if hasattr(fs, "open_many"):
|
| 183 |
+
# check for concurrent cache download; or set up for upload
|
| 184 |
+
self.files = fs.open_many(self)
|
| 185 |
+
return self.files
|
| 186 |
+
if hasattr(fs, "fs") and fs.fs is not None:
|
| 187 |
+
fs = fs.fs
|
| 188 |
+
else:
|
| 189 |
+
break
|
| 190 |
+
return [s.__enter__() for s in self]
|
| 191 |
+
|
| 192 |
+
def __exit__(self, *args):
|
| 193 |
+
fs = self.fs
|
| 194 |
+
[s.__exit__(*args) for s in self]
|
| 195 |
+
if "r" not in self.mode:
|
| 196 |
+
while True:
|
| 197 |
+
if hasattr(fs, "open_many"):
|
| 198 |
+
# check for concurrent cache upload
|
| 199 |
+
fs.commit_many(self.files)
|
| 200 |
+
return
|
| 201 |
+
if hasattr(fs, "fs") and fs.fs is not None:
|
| 202 |
+
fs = fs.fs
|
| 203 |
+
else:
|
| 204 |
+
break
|
| 205 |
+
|
| 206 |
+
def __getitem__(self, item):
|
| 207 |
+
out = super().__getitem__(item)
|
| 208 |
+
if isinstance(item, slice):
|
| 209 |
+
return OpenFiles(out, mode=self.mode, fs=self.fs)
|
| 210 |
+
return out
|
| 211 |
+
|
| 212 |
+
def __repr__(self):
|
| 213 |
+
return f"<List of {len(self)} OpenFile instances>"
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def open_files(
|
| 217 |
+
urlpath,
|
| 218 |
+
mode="rb",
|
| 219 |
+
compression=None,
|
| 220 |
+
encoding="utf8",
|
| 221 |
+
errors=None,
|
| 222 |
+
name_function=None,
|
| 223 |
+
num=1,
|
| 224 |
+
protocol=None,
|
| 225 |
+
newline=None,
|
| 226 |
+
auto_mkdir=True,
|
| 227 |
+
expand=True,
|
| 228 |
+
**kwargs,
|
| 229 |
+
):
|
| 230 |
+
"""Given a path or paths, return a list of ``OpenFile`` objects.
|
| 231 |
+
|
| 232 |
+
For writing, a str path must contain the "*" character, which will be filled
|
| 233 |
+
in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.
|
| 234 |
+
|
| 235 |
+
For either reading or writing, can instead provide explicit list of paths.
|
| 236 |
+
|
| 237 |
+
Parameters
|
| 238 |
+
----------
|
| 239 |
+
urlpath: string or list
|
| 240 |
+
Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
|
| 241 |
+
to read from alternative filesystems. To read from multiple files you
|
| 242 |
+
can pass a globstring or a list of paths, with the caveat that they
|
| 243 |
+
must all have the same protocol.
|
| 244 |
+
mode: 'rb', 'wt', etc.
|
| 245 |
+
compression: string or None
|
| 246 |
+
If given, open file using compression codec. Can either be a compression
|
| 247 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 248 |
+
compression from the filename suffix.
|
| 249 |
+
encoding: str
|
| 250 |
+
For text mode only
|
| 251 |
+
errors: None or str
|
| 252 |
+
Passed to TextIOWrapper in text mode
|
| 253 |
+
name_function: function or None
|
| 254 |
+
if opening a set of files for writing, those files do not yet exist,
|
| 255 |
+
so we need to generate their names by formatting the urlpath for
|
| 256 |
+
each sequence number
|
| 257 |
+
num: int [1]
|
| 258 |
+
if writing mode, number of files we expect to create (passed to
|
| 259 |
+
name+function)
|
| 260 |
+
protocol: str or None
|
| 261 |
+
If given, overrides the protocol found in the URL.
|
| 262 |
+
newline: bytes or None
|
| 263 |
+
Used for line terminator in text mode. If None, uses system default;
|
| 264 |
+
if blank, uses no translation.
|
| 265 |
+
auto_mkdir: bool (True)
|
| 266 |
+
If in write mode, this will ensure the target directory exists before
|
| 267 |
+
writing, by calling ``fs.mkdirs(exist_ok=True)``.
|
| 268 |
+
expand: bool
|
| 269 |
+
**kwargs: dict
|
| 270 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 271 |
+
host, port, username, password, etc.
|
| 272 |
+
|
| 273 |
+
Examples
|
| 274 |
+
--------
|
| 275 |
+
>>> files = open_files('2015-*-*.csv') # doctest: +SKIP
|
| 276 |
+
>>> files = open_files(
|
| 277 |
+
... 's3://bucket/2015-*-*.csv.gz', compression='gzip'
|
| 278 |
+
... ) # doctest: +SKIP
|
| 279 |
+
|
| 280 |
+
Returns
|
| 281 |
+
-------
|
| 282 |
+
An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
|
| 283 |
+
be used as a single context
|
| 284 |
+
|
| 285 |
+
Notes
|
| 286 |
+
-----
|
| 287 |
+
For a full list of the available protocols and the implementations that
|
| 288 |
+
they map across to see the latest online documentation:
|
| 289 |
+
|
| 290 |
+
- For implementations built into ``fsspec`` see
|
| 291 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
|
| 292 |
+
- For implementations in separate packages see
|
| 293 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
| 294 |
+
"""
|
| 295 |
+
fs, fs_token, paths = get_fs_token_paths(
|
| 296 |
+
urlpath,
|
| 297 |
+
mode,
|
| 298 |
+
num=num,
|
| 299 |
+
name_function=name_function,
|
| 300 |
+
storage_options=kwargs,
|
| 301 |
+
protocol=protocol,
|
| 302 |
+
expand=expand,
|
| 303 |
+
)
|
| 304 |
+
if fs.protocol == "file":
|
| 305 |
+
fs.auto_mkdir = auto_mkdir
|
| 306 |
+
elif "r" not in mode and auto_mkdir:
|
| 307 |
+
parents = {fs._parent(path) for path in paths}
|
| 308 |
+
for parent in parents:
|
| 309 |
+
try:
|
| 310 |
+
fs.makedirs(parent, exist_ok=True)
|
| 311 |
+
except PermissionError:
|
| 312 |
+
pass
|
| 313 |
+
return OpenFiles(
|
| 314 |
+
[
|
| 315 |
+
OpenFile(
|
| 316 |
+
fs,
|
| 317 |
+
path,
|
| 318 |
+
mode=mode,
|
| 319 |
+
compression=compression,
|
| 320 |
+
encoding=encoding,
|
| 321 |
+
errors=errors,
|
| 322 |
+
newline=newline,
|
| 323 |
+
)
|
| 324 |
+
for path in paths
|
| 325 |
+
],
|
| 326 |
+
mode=mode,
|
| 327 |
+
fs=fs,
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def _un_chain(path, kwargs):
|
| 332 |
+
x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
|
| 333 |
+
bits = (
|
| 334 |
+
[p if "://" in p or x.match(p) else p + "://" for p in path.split("::")]
|
| 335 |
+
if "::" in path
|
| 336 |
+
else [path]
|
| 337 |
+
)
|
| 338 |
+
# [[url, protocol, kwargs], ...]
|
| 339 |
+
out = []
|
| 340 |
+
previous_bit = None
|
| 341 |
+
kwargs = kwargs.copy()
|
| 342 |
+
for bit in reversed(bits):
|
| 343 |
+
protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
|
| 344 |
+
cls = get_filesystem_class(protocol)
|
| 345 |
+
extra_kwargs = cls._get_kwargs_from_urls(bit)
|
| 346 |
+
kws = kwargs.pop(protocol, {})
|
| 347 |
+
if bit is bits[0]:
|
| 348 |
+
kws.update(kwargs)
|
| 349 |
+
kw = dict(**extra_kwargs, **kws)
|
| 350 |
+
bit = cls._strip_protocol(bit)
|
| 351 |
+
if (
|
| 352 |
+
protocol in {"blockcache", "filecache", "simplecache"}
|
| 353 |
+
and "target_protocol" not in kw
|
| 354 |
+
):
|
| 355 |
+
bit = previous_bit
|
| 356 |
+
out.append((bit, protocol, kw))
|
| 357 |
+
previous_bit = bit
|
| 358 |
+
out.reverse()
|
| 359 |
+
return out
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
def url_to_fs(url, **kwargs):
|
| 363 |
+
"""
|
| 364 |
+
Turn fully-qualified and potentially chained URL into filesystem instance
|
| 365 |
+
|
| 366 |
+
Parameters
|
| 367 |
+
----------
|
| 368 |
+
url : str
|
| 369 |
+
The fsspec-compatible URL
|
| 370 |
+
**kwargs: dict
|
| 371 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 372 |
+
host, port, username, password, etc.
|
| 373 |
+
|
| 374 |
+
Returns
|
| 375 |
+
-------
|
| 376 |
+
filesystem : FileSystem
|
| 377 |
+
The new filesystem discovered from ``url`` and created with
|
| 378 |
+
``**kwargs``.
|
| 379 |
+
urlpath : str
|
| 380 |
+
The file-systems-specific URL for ``url``.
|
| 381 |
+
"""
|
| 382 |
+
url = stringify_path(url)
|
| 383 |
+
# non-FS arguments that appear in fsspec.open()
|
| 384 |
+
# inspect could keep this in sync with open()'s signature
|
| 385 |
+
known_kwargs = {
|
| 386 |
+
"compression",
|
| 387 |
+
"encoding",
|
| 388 |
+
"errors",
|
| 389 |
+
"expand",
|
| 390 |
+
"mode",
|
| 391 |
+
"name_function",
|
| 392 |
+
"newline",
|
| 393 |
+
"num",
|
| 394 |
+
}
|
| 395 |
+
kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
|
| 396 |
+
chain = _un_chain(url, kwargs)
|
| 397 |
+
inkwargs = {}
|
| 398 |
+
# Reverse iterate the chain, creating a nested target_* structure
|
| 399 |
+
for i, ch in enumerate(reversed(chain)):
|
| 400 |
+
urls, protocol, kw = ch
|
| 401 |
+
if i == len(chain) - 1:
|
| 402 |
+
inkwargs = dict(**kw, **inkwargs)
|
| 403 |
+
continue
|
| 404 |
+
inkwargs["target_options"] = dict(**kw, **inkwargs)
|
| 405 |
+
inkwargs["target_protocol"] = protocol
|
| 406 |
+
inkwargs["fo"] = urls
|
| 407 |
+
urlpath, protocol, _ = chain[0]
|
| 408 |
+
fs = filesystem(protocol, **inkwargs)
|
| 409 |
+
return fs, urlpath
|
| 410 |
+
|
| 411 |
+
|
| 412 |
+
DEFAULT_EXPAND = conf.get("open_expand", False)
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
def open(
|
| 416 |
+
urlpath,
|
| 417 |
+
mode="rb",
|
| 418 |
+
compression=None,
|
| 419 |
+
encoding="utf8",
|
| 420 |
+
errors=None,
|
| 421 |
+
protocol=None,
|
| 422 |
+
newline=None,
|
| 423 |
+
expand=None,
|
| 424 |
+
**kwargs,
|
| 425 |
+
):
|
| 426 |
+
"""Given a path or paths, return one ``OpenFile`` object.
|
| 427 |
+
|
| 428 |
+
Parameters
|
| 429 |
+
----------
|
| 430 |
+
urlpath: string or list
|
| 431 |
+
Absolute or relative filepath. Prefix with a protocol like ``s3://``
|
| 432 |
+
to read from alternative filesystems. Should not include glob
|
| 433 |
+
character(s).
|
| 434 |
+
mode: 'rb', 'wt', etc.
|
| 435 |
+
compression: string or None
|
| 436 |
+
If given, open file using compression codec. Can either be a compression
|
| 437 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 438 |
+
compression from the filename suffix.
|
| 439 |
+
encoding: str
|
| 440 |
+
For text mode only
|
| 441 |
+
errors: None or str
|
| 442 |
+
Passed to TextIOWrapper in text mode
|
| 443 |
+
protocol: str or None
|
| 444 |
+
If given, overrides the protocol found in the URL.
|
| 445 |
+
newline: bytes or None
|
| 446 |
+
Used for line terminator in text mode. If None, uses system default;
|
| 447 |
+
if blank, uses no translation.
|
| 448 |
+
expand: bool or Nonw
|
| 449 |
+
Whether to regard file paths containing special glob characters as needing
|
| 450 |
+
expansion (finding the first match) or absolute. Setting False allows using
|
| 451 |
+
paths which do embed such characters. If None (default), this argument
|
| 452 |
+
takes its value from the DEFAULT_EXPAND module variable, which takes
|
| 453 |
+
its initial value from the "open_expand" config value at startup, which will
|
| 454 |
+
be False if not set.
|
| 455 |
+
**kwargs: dict
|
| 456 |
+
Extra options that make sense to a particular storage connection, e.g.
|
| 457 |
+
host, port, username, password, etc.
|
| 458 |
+
|
| 459 |
+
Examples
|
| 460 |
+
--------
|
| 461 |
+
>>> openfile = open('2015-01-01.csv') # doctest: +SKIP
|
| 462 |
+
>>> openfile = open(
|
| 463 |
+
... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
|
| 464 |
+
... ) # doctest: +SKIP
|
| 465 |
+
>>> with openfile as f:
|
| 466 |
+
... df = pd.read_csv(f) # doctest: +SKIP
|
| 467 |
+
...
|
| 468 |
+
|
| 469 |
+
Returns
|
| 470 |
+
-------
|
| 471 |
+
``OpenFile`` object.
|
| 472 |
+
|
| 473 |
+
Notes
|
| 474 |
+
-----
|
| 475 |
+
For a full list of the available protocols and the implementations that
|
| 476 |
+
they map across to see the latest online documentation:
|
| 477 |
+
|
| 478 |
+
- For implementations built into ``fsspec`` see
|
| 479 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
|
| 480 |
+
- For implementations in separate packages see
|
| 481 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
| 482 |
+
"""
|
| 483 |
+
expand = DEFAULT_EXPAND if expand is None else expand
|
| 484 |
+
out = open_files(
|
| 485 |
+
urlpath=[urlpath],
|
| 486 |
+
mode=mode,
|
| 487 |
+
compression=compression,
|
| 488 |
+
encoding=encoding,
|
| 489 |
+
errors=errors,
|
| 490 |
+
protocol=protocol,
|
| 491 |
+
newline=newline,
|
| 492 |
+
expand=expand,
|
| 493 |
+
**kwargs,
|
| 494 |
+
)
|
| 495 |
+
if not out:
|
| 496 |
+
raise FileNotFoundError(urlpath)
|
| 497 |
+
return out[0]
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
def open_local(
|
| 501 |
+
url: str | list[str] | Path | list[Path],
|
| 502 |
+
mode: str = "rb",
|
| 503 |
+
**storage_options: dict,
|
| 504 |
+
) -> str | list[str]:
|
| 505 |
+
"""Open file(s) which can be resolved to local
|
| 506 |
+
|
| 507 |
+
For files which either are local, or get downloaded upon open
|
| 508 |
+
(e.g., by file caching)
|
| 509 |
+
|
| 510 |
+
Parameters
|
| 511 |
+
----------
|
| 512 |
+
url: str or list(str)
|
| 513 |
+
mode: str
|
| 514 |
+
Must be read mode
|
| 515 |
+
storage_options:
|
| 516 |
+
passed on to FS for or used by open_files (e.g., compression)
|
| 517 |
+
"""
|
| 518 |
+
if "r" not in mode:
|
| 519 |
+
raise ValueError("Can only ensure local files when reading")
|
| 520 |
+
of = open_files(url, mode=mode, **storage_options)
|
| 521 |
+
if not getattr(of[0].fs, "local_file", False):
|
| 522 |
+
raise ValueError(
|
| 523 |
+
"open_local can only be used on a filesystem which"
|
| 524 |
+
" has attribute local_file=True"
|
| 525 |
+
)
|
| 526 |
+
with of as files:
|
| 527 |
+
paths = [f.name for f in files]
|
| 528 |
+
if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
|
| 529 |
+
return paths[0]
|
| 530 |
+
return paths
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
def get_compression(urlpath, compression):
|
| 534 |
+
if compression == "infer":
|
| 535 |
+
compression = infer_compression(urlpath)
|
| 536 |
+
if compression is not None and compression not in compr:
|
| 537 |
+
raise ValueError(f"Compression type {compression} not supported")
|
| 538 |
+
return compression
|
| 539 |
+
|
| 540 |
+
|
| 541 |
+
def split_protocol(urlpath):
|
| 542 |
+
"""Return protocol, path pair"""
|
| 543 |
+
urlpath = stringify_path(urlpath)
|
| 544 |
+
if "://" in urlpath:
|
| 545 |
+
protocol, path = urlpath.split("://", 1)
|
| 546 |
+
if len(protocol) > 1:
|
| 547 |
+
# excludes Windows paths
|
| 548 |
+
return protocol, path
|
| 549 |
+
if urlpath.startswith("data:"):
|
| 550 |
+
return urlpath.split(":", 1)
|
| 551 |
+
return None, urlpath
|
| 552 |
+
|
| 553 |
+
|
| 554 |
+
def strip_protocol(urlpath):
|
| 555 |
+
"""Return only path part of full URL, according to appropriate backend"""
|
| 556 |
+
protocol, _ = split_protocol(urlpath)
|
| 557 |
+
cls = get_filesystem_class(protocol)
|
| 558 |
+
return cls._strip_protocol(urlpath)
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
def expand_paths_if_needed(paths, mode, num, fs, name_function):
|
| 562 |
+
"""Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
|
| 563 |
+
in them (read mode).
|
| 564 |
+
|
| 565 |
+
:param paths: list of paths
|
| 566 |
+
mode: str
|
| 567 |
+
Mode in which to open files.
|
| 568 |
+
num: int
|
| 569 |
+
If opening in writing mode, number of files we expect to create.
|
| 570 |
+
fs: filesystem object
|
| 571 |
+
name_function: callable
|
| 572 |
+
If opening in writing mode, this callable is used to generate path
|
| 573 |
+
names. Names are generated for each partition by
|
| 574 |
+
``urlpath.replace('*', name_function(partition_index))``.
|
| 575 |
+
:return: list of paths
|
| 576 |
+
"""
|
| 577 |
+
expanded_paths = []
|
| 578 |
+
paths = list(paths)
|
| 579 |
+
|
| 580 |
+
if "w" in mode: # read mode
|
| 581 |
+
if sum([1 for p in paths if "*" in p]) > 1:
|
| 582 |
+
raise ValueError(
|
| 583 |
+
"When writing data, only one filename mask can be specified."
|
| 584 |
+
)
|
| 585 |
+
num = max(num, len(paths))
|
| 586 |
+
|
| 587 |
+
for curr_path in paths:
|
| 588 |
+
if "*" in curr_path:
|
| 589 |
+
# expand using name_function
|
| 590 |
+
expanded_paths.extend(_expand_paths(curr_path, name_function, num))
|
| 591 |
+
else:
|
| 592 |
+
expanded_paths.append(curr_path)
|
| 593 |
+
# if we generated more paths that asked for, trim the list
|
| 594 |
+
if len(expanded_paths) > num:
|
| 595 |
+
expanded_paths = expanded_paths[:num]
|
| 596 |
+
|
| 597 |
+
else: # read mode
|
| 598 |
+
for curr_path in paths:
|
| 599 |
+
if has_magic(curr_path):
|
| 600 |
+
# expand using glob
|
| 601 |
+
expanded_paths.extend(fs.glob(curr_path))
|
| 602 |
+
else:
|
| 603 |
+
expanded_paths.append(curr_path)
|
| 604 |
+
|
| 605 |
+
return expanded_paths
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
def get_fs_token_paths(
|
| 609 |
+
urlpath,
|
| 610 |
+
mode="rb",
|
| 611 |
+
num=1,
|
| 612 |
+
name_function=None,
|
| 613 |
+
storage_options=None,
|
| 614 |
+
protocol=None,
|
| 615 |
+
expand=True,
|
| 616 |
+
):
|
| 617 |
+
"""Filesystem, deterministic token, and paths from a urlpath and options.
|
| 618 |
+
|
| 619 |
+
Parameters
|
| 620 |
+
----------
|
| 621 |
+
urlpath: string or iterable
|
| 622 |
+
Absolute or relative filepath, URL (may include protocols like
|
| 623 |
+
``s3://``), or globstring pointing to data.
|
| 624 |
+
mode: str, optional
|
| 625 |
+
Mode in which to open files.
|
| 626 |
+
num: int, optional
|
| 627 |
+
If opening in writing mode, number of files we expect to create.
|
| 628 |
+
name_function: callable, optional
|
| 629 |
+
If opening in writing mode, this callable is used to generate path
|
| 630 |
+
names. Names are generated for each partition by
|
| 631 |
+
``urlpath.replace('*', name_function(partition_index))``.
|
| 632 |
+
storage_options: dict, optional
|
| 633 |
+
Additional keywords to pass to the filesystem class.
|
| 634 |
+
protocol: str or None
|
| 635 |
+
To override the protocol specifier in the URL
|
| 636 |
+
expand: bool
|
| 637 |
+
Expand string paths for writing, assuming the path is a directory
|
| 638 |
+
"""
|
| 639 |
+
if isinstance(urlpath, (list, tuple, set)):
|
| 640 |
+
if not urlpath:
|
| 641 |
+
raise ValueError("empty urlpath sequence")
|
| 642 |
+
urlpath0 = stringify_path(list(urlpath)[0])
|
| 643 |
+
else:
|
| 644 |
+
urlpath0 = stringify_path(urlpath)
|
| 645 |
+
storage_options = storage_options or {}
|
| 646 |
+
if protocol:
|
| 647 |
+
storage_options["protocol"] = protocol
|
| 648 |
+
chain = _un_chain(urlpath0, storage_options or {})
|
| 649 |
+
inkwargs = {}
|
| 650 |
+
# Reverse iterate the chain, creating a nested target_* structure
|
| 651 |
+
for i, ch in enumerate(reversed(chain)):
|
| 652 |
+
urls, nested_protocol, kw = ch
|
| 653 |
+
if i == len(chain) - 1:
|
| 654 |
+
inkwargs = dict(**kw, **inkwargs)
|
| 655 |
+
continue
|
| 656 |
+
inkwargs["target_options"] = dict(**kw, **inkwargs)
|
| 657 |
+
inkwargs["target_protocol"] = nested_protocol
|
| 658 |
+
inkwargs["fo"] = urls
|
| 659 |
+
paths, protocol, _ = chain[0]
|
| 660 |
+
fs = filesystem(protocol, **inkwargs)
|
| 661 |
+
if isinstance(urlpath, (list, tuple, set)):
|
| 662 |
+
pchains = [
|
| 663 |
+
_un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
|
| 664 |
+
]
|
| 665 |
+
if len({pc[1] for pc in pchains}) > 1:
|
| 666 |
+
raise ValueError("Protocol mismatch getting fs from %s", urlpath)
|
| 667 |
+
paths = [pc[0] for pc in pchains]
|
| 668 |
+
else:
|
| 669 |
+
paths = fs._strip_protocol(paths)
|
| 670 |
+
if isinstance(paths, (list, tuple, set)):
|
| 671 |
+
if expand:
|
| 672 |
+
paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
|
| 673 |
+
elif not isinstance(paths, list):
|
| 674 |
+
paths = list(paths)
|
| 675 |
+
else:
|
| 676 |
+
if "w" in mode and expand:
|
| 677 |
+
paths = _expand_paths(paths, name_function, num)
|
| 678 |
+
elif "x" in mode and expand:
|
| 679 |
+
paths = _expand_paths(paths, name_function, num)
|
| 680 |
+
elif "*" in paths:
|
| 681 |
+
paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
|
| 682 |
+
else:
|
| 683 |
+
paths = [paths]
|
| 684 |
+
|
| 685 |
+
return fs, fs._fs_token, paths
|
| 686 |
+
|
| 687 |
+
|
| 688 |
+
def _expand_paths(path, name_function, num):
|
| 689 |
+
if isinstance(path, str):
|
| 690 |
+
if path.count("*") > 1:
|
| 691 |
+
raise ValueError("Output path spec must contain exactly one '*'.")
|
| 692 |
+
elif "*" not in path:
|
| 693 |
+
path = os.path.join(path, "*.part")
|
| 694 |
+
|
| 695 |
+
if name_function is None:
|
| 696 |
+
name_function = build_name_function(num - 1)
|
| 697 |
+
|
| 698 |
+
paths = [path.replace("*", name_function(i)) for i in range(num)]
|
| 699 |
+
if paths != sorted(paths):
|
| 700 |
+
logger.warning(
|
| 701 |
+
"In order to preserve order between partitions"
|
| 702 |
+
" paths created with ``name_function`` should "
|
| 703 |
+
"sort to partition order"
|
| 704 |
+
)
|
| 705 |
+
elif isinstance(path, (tuple, list)):
|
| 706 |
+
assert len(path) == num
|
| 707 |
+
paths = list(path)
|
| 708 |
+
else:
|
| 709 |
+
raise ValueError(
|
| 710 |
+
"Path should be either\n"
|
| 711 |
+
"1. A list of paths: ['foo.json', 'bar.json', ...]\n"
|
| 712 |
+
"2. A directory: 'foo/\n"
|
| 713 |
+
"3. A path with a '*' in it: 'foo.*.json'"
|
| 714 |
+
)
|
| 715 |
+
return paths
|
| 716 |
+
|
| 717 |
+
|
| 718 |
+
class PickleableTextIOWrapper(io.TextIOWrapper):
|
| 719 |
+
"""TextIOWrapper cannot be pickled. This solves it.
|
| 720 |
+
|
| 721 |
+
Requires that ``buffer`` be pickleable, which all instances of
|
| 722 |
+
AbstractBufferedFile are.
|
| 723 |
+
"""
|
| 724 |
+
|
| 725 |
+
def __init__(
|
| 726 |
+
self,
|
| 727 |
+
buffer,
|
| 728 |
+
encoding=None,
|
| 729 |
+
errors=None,
|
| 730 |
+
newline=None,
|
| 731 |
+
line_buffering=False,
|
| 732 |
+
write_through=False,
|
| 733 |
+
):
|
| 734 |
+
self.args = buffer, encoding, errors, newline, line_buffering, write_through
|
| 735 |
+
super().__init__(*self.args)
|
| 736 |
+
|
| 737 |
+
def __reduce__(self):
|
| 738 |
+
return PickleableTextIOWrapper, self.args
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/exceptions.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
fsspec user-defined exception classes
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import asyncio
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class BlocksizeMismatchError(ValueError):
|
| 9 |
+
"""
|
| 10 |
+
Raised when a cached file is opened with a different blocksize than it was
|
| 11 |
+
written with
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class FSTimeoutError(asyncio.TimeoutError):
|
| 16 |
+
"""
|
| 17 |
+
Raised when a fsspec function timed out occurs
|
| 18 |
+
"""
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/fuse.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
import stat
|
| 5 |
+
import threading
|
| 6 |
+
import time
|
| 7 |
+
from errno import EIO, ENOENT
|
| 8 |
+
|
| 9 |
+
from fuse import FUSE, FuseOSError, LoggingMixIn, Operations
|
| 10 |
+
|
| 11 |
+
from fsspec import __version__
|
| 12 |
+
from fsspec.core import url_to_fs
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger("fsspec.fuse")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class FUSEr(Operations):
|
| 18 |
+
def __init__(self, fs, path, ready_file=False):
|
| 19 |
+
self.fs = fs
|
| 20 |
+
self.cache = {}
|
| 21 |
+
self.root = path.rstrip("/") + "/"
|
| 22 |
+
self.counter = 0
|
| 23 |
+
logger.info("Starting FUSE at %s", path)
|
| 24 |
+
self._ready_file = ready_file
|
| 25 |
+
|
| 26 |
+
def getattr(self, path, fh=None):
|
| 27 |
+
logger.debug("getattr %s", path)
|
| 28 |
+
if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
|
| 29 |
+
return {"type": "file", "st_size": 5}
|
| 30 |
+
|
| 31 |
+
path = "".join([self.root, path.lstrip("/")]).rstrip("/")
|
| 32 |
+
try:
|
| 33 |
+
info = self.fs.info(path)
|
| 34 |
+
except FileNotFoundError:
|
| 35 |
+
raise FuseOSError(ENOENT)
|
| 36 |
+
|
| 37 |
+
data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
|
| 38 |
+
perm = info.get("mode", 0o777)
|
| 39 |
+
|
| 40 |
+
if info["type"] != "file":
|
| 41 |
+
data["st_mode"] = stat.S_IFDIR | perm
|
| 42 |
+
data["st_size"] = 0
|
| 43 |
+
data["st_blksize"] = 0
|
| 44 |
+
else:
|
| 45 |
+
data["st_mode"] = stat.S_IFREG | perm
|
| 46 |
+
data["st_size"] = info["size"]
|
| 47 |
+
data["st_blksize"] = 5 * 2**20
|
| 48 |
+
data["st_nlink"] = 1
|
| 49 |
+
data["st_atime"] = info["atime"] if "atime" in info else time.time()
|
| 50 |
+
data["st_ctime"] = info["ctime"] if "ctime" in info else time.time()
|
| 51 |
+
data["st_mtime"] = info["mtime"] if "mtime" in info else time.time()
|
| 52 |
+
return data
|
| 53 |
+
|
| 54 |
+
def readdir(self, path, fh):
|
| 55 |
+
logger.debug("readdir %s", path)
|
| 56 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 57 |
+
files = self.fs.ls(path, False)
|
| 58 |
+
files = [os.path.basename(f.rstrip("/")) for f in files]
|
| 59 |
+
return [".", ".."] + files
|
| 60 |
+
|
| 61 |
+
def mkdir(self, path, mode):
|
| 62 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 63 |
+
self.fs.mkdir(path)
|
| 64 |
+
return 0
|
| 65 |
+
|
| 66 |
+
def rmdir(self, path):
|
| 67 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 68 |
+
self.fs.rmdir(path)
|
| 69 |
+
return 0
|
| 70 |
+
|
| 71 |
+
def read(self, path, size, offset, fh):
|
| 72 |
+
logger.debug("read %s", (path, size, offset))
|
| 73 |
+
if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
|
| 74 |
+
# status indicator
|
| 75 |
+
return b"ready"
|
| 76 |
+
|
| 77 |
+
f = self.cache[fh]
|
| 78 |
+
f.seek(offset)
|
| 79 |
+
out = f.read(size)
|
| 80 |
+
return out
|
| 81 |
+
|
| 82 |
+
def write(self, path, data, offset, fh):
|
| 83 |
+
logger.debug("write %s", (path, offset))
|
| 84 |
+
f = self.cache[fh]
|
| 85 |
+
f.seek(offset)
|
| 86 |
+
f.write(data)
|
| 87 |
+
return len(data)
|
| 88 |
+
|
| 89 |
+
def create(self, path, flags, fi=None):
|
| 90 |
+
logger.debug("create %s", (path, flags))
|
| 91 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 92 |
+
self.fs.touch(fn) # OS will want to get attributes immediately
|
| 93 |
+
f = self.fs.open(fn, "wb")
|
| 94 |
+
self.cache[self.counter] = f
|
| 95 |
+
self.counter += 1
|
| 96 |
+
return self.counter - 1
|
| 97 |
+
|
| 98 |
+
def open(self, path, flags):
|
| 99 |
+
logger.debug("open %s", (path, flags))
|
| 100 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 101 |
+
if flags % 2 == 0:
|
| 102 |
+
# read
|
| 103 |
+
mode = "rb"
|
| 104 |
+
else:
|
| 105 |
+
# write/create
|
| 106 |
+
mode = "wb"
|
| 107 |
+
self.cache[self.counter] = self.fs.open(fn, mode)
|
| 108 |
+
self.counter += 1
|
| 109 |
+
return self.counter - 1
|
| 110 |
+
|
| 111 |
+
def truncate(self, path, length, fh=None):
|
| 112 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 113 |
+
if length != 0:
|
| 114 |
+
raise NotImplementedError
|
| 115 |
+
# maybe should be no-op since open with write sets size to zero anyway
|
| 116 |
+
self.fs.touch(fn)
|
| 117 |
+
|
| 118 |
+
def unlink(self, path):
|
| 119 |
+
fn = "".join([self.root, path.lstrip("/")])
|
| 120 |
+
try:
|
| 121 |
+
self.fs.rm(fn, False)
|
| 122 |
+
except (OSError, FileNotFoundError):
|
| 123 |
+
raise FuseOSError(EIO)
|
| 124 |
+
|
| 125 |
+
def release(self, path, fh):
|
| 126 |
+
try:
|
| 127 |
+
if fh in self.cache:
|
| 128 |
+
f = self.cache[fh]
|
| 129 |
+
f.close()
|
| 130 |
+
self.cache.pop(fh)
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(e)
|
| 133 |
+
return 0
|
| 134 |
+
|
| 135 |
+
def chmod(self, path, mode):
|
| 136 |
+
if hasattr(self.fs, "chmod"):
|
| 137 |
+
path = "".join([self.root, path.lstrip("/")])
|
| 138 |
+
return self.fs.chmod(path, mode)
|
| 139 |
+
raise NotImplementedError
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def run(
|
| 143 |
+
fs,
|
| 144 |
+
path,
|
| 145 |
+
mount_point,
|
| 146 |
+
foreground=True,
|
| 147 |
+
threads=False,
|
| 148 |
+
ready_file=False,
|
| 149 |
+
ops_class=FUSEr,
|
| 150 |
+
):
|
| 151 |
+
"""Mount stuff in a local directory
|
| 152 |
+
|
| 153 |
+
This uses fusepy to make it appear as if a given path on an fsspec
|
| 154 |
+
instance is in fact resident within the local file-system.
|
| 155 |
+
|
| 156 |
+
This requires that fusepy by installed, and that FUSE be available on
|
| 157 |
+
the system (typically requiring a package to be installed with
|
| 158 |
+
apt, yum, brew, etc.).
|
| 159 |
+
|
| 160 |
+
Parameters
|
| 161 |
+
----------
|
| 162 |
+
fs: file-system instance
|
| 163 |
+
From one of the compatible implementations
|
| 164 |
+
path: str
|
| 165 |
+
Location on that file-system to regard as the root directory to
|
| 166 |
+
mount. Note that you typically should include the terminating "/"
|
| 167 |
+
character.
|
| 168 |
+
mount_point: str
|
| 169 |
+
An empty directory on the local file-system where the contents of
|
| 170 |
+
the remote path will appear.
|
| 171 |
+
foreground: bool
|
| 172 |
+
Whether or not calling this function will block. Operation will
|
| 173 |
+
typically be more stable if True.
|
| 174 |
+
threads: bool
|
| 175 |
+
Whether or not to create threads when responding to file operations
|
| 176 |
+
within the mounter directory. Operation will typically be more
|
| 177 |
+
stable if False.
|
| 178 |
+
ready_file: bool
|
| 179 |
+
Whether the FUSE process is ready. The ``.fuse_ready`` file will
|
| 180 |
+
exist in the ``mount_point`` directory if True. Debugging purpose.
|
| 181 |
+
ops_class: FUSEr or Subclass of FUSEr
|
| 182 |
+
To override the default behavior of FUSEr. For Example, logging
|
| 183 |
+
to file.
|
| 184 |
+
|
| 185 |
+
"""
|
| 186 |
+
func = lambda: FUSE(
|
| 187 |
+
ops_class(fs, path, ready_file=ready_file),
|
| 188 |
+
mount_point,
|
| 189 |
+
nothreads=not threads,
|
| 190 |
+
foreground=foreground,
|
| 191 |
+
)
|
| 192 |
+
if not foreground:
|
| 193 |
+
th = threading.Thread(target=func)
|
| 194 |
+
th.daemon = True
|
| 195 |
+
th.start()
|
| 196 |
+
return th
|
| 197 |
+
else: # pragma: no cover
|
| 198 |
+
try:
|
| 199 |
+
func()
|
| 200 |
+
except KeyboardInterrupt:
|
| 201 |
+
pass
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def main(args):
|
| 205 |
+
"""Mount filesystem from chained URL to MOUNT_POINT.
|
| 206 |
+
|
| 207 |
+
Examples:
|
| 208 |
+
|
| 209 |
+
python3 -m fsspec.fuse memory /usr/share /tmp/mem
|
| 210 |
+
|
| 211 |
+
python3 -m fsspec.fuse local /tmp/source /tmp/local \\
|
| 212 |
+
-l /tmp/fsspecfuse.log
|
| 213 |
+
|
| 214 |
+
You can also mount chained-URLs and use special settings:
|
| 215 |
+
|
| 216 |
+
python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\
|
| 217 |
+
/ /tmp/zip \\
|
| 218 |
+
-o 'filecache-cache_storage=/tmp/simplecache'
|
| 219 |
+
|
| 220 |
+
You can specify the type of the setting by using `[int]` or `[bool]`,
|
| 221 |
+
(`true`, `yes`, `1` represents the Boolean value `True`):
|
| 222 |
+
|
| 223 |
+
python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\
|
| 224 |
+
/historic/packages/RPMS /tmp/ftp \\
|
| 225 |
+
-o 'simplecache-cache_storage=/tmp/simplecache' \\
|
| 226 |
+
-o 'simplecache-check_files=false[bool]' \\
|
| 227 |
+
-o 'ftp-listings_expiry_time=60[int]' \\
|
| 228 |
+
-o 'ftp-username=anonymous' \\
|
| 229 |
+
-o 'ftp-password=xieyanbo'
|
| 230 |
+
"""
|
| 231 |
+
|
| 232 |
+
class RawDescriptionArgumentParser(argparse.ArgumentParser):
|
| 233 |
+
def format_help(self):
|
| 234 |
+
usage = super().format_help()
|
| 235 |
+
parts = usage.split("\n\n")
|
| 236 |
+
parts[1] = self.description.rstrip()
|
| 237 |
+
return "\n\n".join(parts)
|
| 238 |
+
|
| 239 |
+
parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__)
|
| 240 |
+
parser.add_argument("--version", action="version", version=__version__)
|
| 241 |
+
parser.add_argument("url", type=str, help="fs url")
|
| 242 |
+
parser.add_argument("source_path", type=str, help="source directory in fs")
|
| 243 |
+
parser.add_argument("mount_point", type=str, help="local directory")
|
| 244 |
+
parser.add_argument(
|
| 245 |
+
"-o",
|
| 246 |
+
"--option",
|
| 247 |
+
action="append",
|
| 248 |
+
help="Any options of protocol included in the chained URL",
|
| 249 |
+
)
|
| 250 |
+
parser.add_argument(
|
| 251 |
+
"-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')"
|
| 252 |
+
)
|
| 253 |
+
parser.add_argument(
|
| 254 |
+
"-f",
|
| 255 |
+
"--foreground",
|
| 256 |
+
action="store_false",
|
| 257 |
+
help="Running in foreground or not (Default: False)",
|
| 258 |
+
)
|
| 259 |
+
parser.add_argument(
|
| 260 |
+
"-t",
|
| 261 |
+
"--threads",
|
| 262 |
+
action="store_false",
|
| 263 |
+
help="Running with threads support (Default: False)",
|
| 264 |
+
)
|
| 265 |
+
parser.add_argument(
|
| 266 |
+
"-r",
|
| 267 |
+
"--ready-file",
|
| 268 |
+
action="store_false",
|
| 269 |
+
help="The `.fuse_ready` file will exist after FUSE is ready. "
|
| 270 |
+
"(Debugging purpose, Default: False)",
|
| 271 |
+
)
|
| 272 |
+
args = parser.parse_args(args)
|
| 273 |
+
|
| 274 |
+
kwargs = {}
|
| 275 |
+
for item in args.option or []:
|
| 276 |
+
key, sep, value = item.partition("=")
|
| 277 |
+
if not sep:
|
| 278 |
+
parser.error(message=f"Wrong option: {item!r}")
|
| 279 |
+
val = value.lower()
|
| 280 |
+
if val.endswith("[int]"):
|
| 281 |
+
value = int(value[: -len("[int]")])
|
| 282 |
+
elif val.endswith("[bool]"):
|
| 283 |
+
value = val[: -len("[bool]")] in ["1", "yes", "true"]
|
| 284 |
+
|
| 285 |
+
if "-" in key:
|
| 286 |
+
fs_name, setting_name = key.split("-", 1)
|
| 287 |
+
if fs_name in kwargs:
|
| 288 |
+
kwargs[fs_name][setting_name] = value
|
| 289 |
+
else:
|
| 290 |
+
kwargs[fs_name] = {setting_name: value}
|
| 291 |
+
else:
|
| 292 |
+
kwargs[key] = value
|
| 293 |
+
|
| 294 |
+
if args.log_file:
|
| 295 |
+
logging.basicConfig(
|
| 296 |
+
level=logging.DEBUG,
|
| 297 |
+
filename=args.log_file,
|
| 298 |
+
format="%(asctime)s %(message)s",
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
class LoggingFUSEr(FUSEr, LoggingMixIn):
|
| 302 |
+
pass
|
| 303 |
+
|
| 304 |
+
fuser = LoggingFUSEr
|
| 305 |
+
else:
|
| 306 |
+
fuser = FUSEr
|
| 307 |
+
|
| 308 |
+
fs, url_path = url_to_fs(args.url, **kwargs)
|
| 309 |
+
logger.debug("Mounting %s to %s", url_path, str(args.mount_point))
|
| 310 |
+
run(
|
| 311 |
+
fs,
|
| 312 |
+
args.source_path,
|
| 313 |
+
args.mount_point,
|
| 314 |
+
foreground=args.foreground,
|
| 315 |
+
threads=args.threads,
|
| 316 |
+
ready_file=args.ready_file,
|
| 317 |
+
ops_class=fuser,
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
if __name__ == "__main__":
|
| 322 |
+
import sys
|
| 323 |
+
|
| 324 |
+
main(sys.argv[1:])
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/generic.py
ADDED
|
@@ -0,0 +1,408 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import inspect
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import shutil
|
| 7 |
+
import uuid
|
| 8 |
+
from typing import Optional
|
| 9 |
+
|
| 10 |
+
from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
|
| 11 |
+
from .callbacks import DEFAULT_CALLBACK
|
| 12 |
+
from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
|
| 13 |
+
|
| 14 |
+
_generic_fs = {}
|
| 15 |
+
logger = logging.getLogger("fsspec.generic")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def set_generic_fs(protocol, **storage_options):
|
| 19 |
+
_generic_fs[protocol] = filesystem(protocol, **storage_options)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
default_method = "default"
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _resolve_fs(url, method=None, protocol=None, storage_options=None):
|
| 26 |
+
"""Pick instance of backend FS"""
|
| 27 |
+
method = method or default_method
|
| 28 |
+
protocol = protocol or split_protocol(url)[0]
|
| 29 |
+
storage_options = storage_options or {}
|
| 30 |
+
if method == "default":
|
| 31 |
+
return filesystem(protocol)
|
| 32 |
+
if method == "generic":
|
| 33 |
+
return _generic_fs[protocol]
|
| 34 |
+
if method == "current":
|
| 35 |
+
cls = get_filesystem_class(protocol)
|
| 36 |
+
return cls.current()
|
| 37 |
+
if method == "options":
|
| 38 |
+
fs, _ = url_to_fs(url, **storage_options.get(protocol, {}))
|
| 39 |
+
return fs
|
| 40 |
+
raise ValueError(f"Unknown FS resolution method: {method}")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def rsync(
|
| 44 |
+
source,
|
| 45 |
+
destination,
|
| 46 |
+
delete_missing=False,
|
| 47 |
+
source_field="size",
|
| 48 |
+
dest_field="size",
|
| 49 |
+
update_cond="different",
|
| 50 |
+
inst_kwargs=None,
|
| 51 |
+
fs=None,
|
| 52 |
+
**kwargs,
|
| 53 |
+
):
|
| 54 |
+
"""Sync files between two directory trees
|
| 55 |
+
|
| 56 |
+
(experimental)
|
| 57 |
+
|
| 58 |
+
Parameters
|
| 59 |
+
----------
|
| 60 |
+
source: str
|
| 61 |
+
Root of the directory tree to take files from. This must be a directory, but
|
| 62 |
+
do not include any terminating "/" character
|
| 63 |
+
destination: str
|
| 64 |
+
Root path to copy into. The contents of this location should be
|
| 65 |
+
identical to the contents of ``source`` when done. This will be made a
|
| 66 |
+
directory, and the terminal "/" should not be included.
|
| 67 |
+
delete_missing: bool
|
| 68 |
+
If there are paths in the destination that don't exist in the
|
| 69 |
+
source and this is True, delete them. Otherwise, leave them alone.
|
| 70 |
+
source_field: str | callable
|
| 71 |
+
If ``update_field`` is "different", this is the key in the info
|
| 72 |
+
of source files to consider for difference. Maybe a function of the
|
| 73 |
+
info dict.
|
| 74 |
+
dest_field: str | callable
|
| 75 |
+
If ``update_field`` is "different", this is the key in the info
|
| 76 |
+
of destination files to consider for difference. May be a function of
|
| 77 |
+
the info dict.
|
| 78 |
+
update_cond: "different"|"always"|"never"
|
| 79 |
+
If "always", every file is copied, regardless of whether it exists in
|
| 80 |
+
the destination. If "never", files that exist in the destination are
|
| 81 |
+
not copied again. If "different" (default), only copy if the info
|
| 82 |
+
fields given by ``source_field`` and ``dest_field`` (usually "size")
|
| 83 |
+
are different. Other comparisons may be added in the future.
|
| 84 |
+
inst_kwargs: dict|None
|
| 85 |
+
If ``fs`` is None, use this set of keyword arguments to make a
|
| 86 |
+
GenericFileSystem instance
|
| 87 |
+
fs: GenericFileSystem|None
|
| 88 |
+
Instance to use if explicitly given. The instance defines how to
|
| 89 |
+
to make downstream file system instances from paths.
|
| 90 |
+
|
| 91 |
+
Returns
|
| 92 |
+
-------
|
| 93 |
+
dict of the copy operations that were performed, {source: destination}
|
| 94 |
+
"""
|
| 95 |
+
fs = fs or GenericFileSystem(**(inst_kwargs or {}))
|
| 96 |
+
source = fs._strip_protocol(source)
|
| 97 |
+
destination = fs._strip_protocol(destination)
|
| 98 |
+
allfiles = fs.find(source, withdirs=True, detail=True)
|
| 99 |
+
if not fs.isdir(source):
|
| 100 |
+
raise ValueError("Can only rsync on a directory")
|
| 101 |
+
otherfiles = fs.find(destination, withdirs=True, detail=True)
|
| 102 |
+
dirs = [
|
| 103 |
+
a
|
| 104 |
+
for a, v in allfiles.items()
|
| 105 |
+
if v["type"] == "directory" and a.replace(source, destination) not in otherfiles
|
| 106 |
+
]
|
| 107 |
+
logger.debug(f"{len(dirs)} directories to create")
|
| 108 |
+
if dirs:
|
| 109 |
+
fs.make_many_dirs(
|
| 110 |
+
[dirn.replace(source, destination) for dirn in dirs], exist_ok=True
|
| 111 |
+
)
|
| 112 |
+
allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"}
|
| 113 |
+
logger.debug(f"{len(allfiles)} files to consider for copy")
|
| 114 |
+
to_delete = [
|
| 115 |
+
o
|
| 116 |
+
for o, v in otherfiles.items()
|
| 117 |
+
if o.replace(destination, source) not in allfiles and v["type"] == "file"
|
| 118 |
+
]
|
| 119 |
+
for k, v in allfiles.copy().items():
|
| 120 |
+
otherfile = k.replace(source, destination)
|
| 121 |
+
if otherfile in otherfiles:
|
| 122 |
+
if update_cond == "always":
|
| 123 |
+
allfiles[k] = otherfile
|
| 124 |
+
elif update_cond == "different":
|
| 125 |
+
inf1 = source_field(v) if callable(source_field) else v[source_field]
|
| 126 |
+
v2 = otherfiles[otherfile]
|
| 127 |
+
inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field]
|
| 128 |
+
if inf1 != inf2:
|
| 129 |
+
# details mismatch, make copy
|
| 130 |
+
allfiles[k] = otherfile
|
| 131 |
+
else:
|
| 132 |
+
# details match, don't copy
|
| 133 |
+
allfiles.pop(k)
|
| 134 |
+
else:
|
| 135 |
+
# file not in target yet
|
| 136 |
+
allfiles[k] = otherfile
|
| 137 |
+
logger.debug(f"{len(allfiles)} files to copy")
|
| 138 |
+
if allfiles:
|
| 139 |
+
source_files, target_files = zip(*allfiles.items())
|
| 140 |
+
fs.cp(source_files, target_files, **kwargs)
|
| 141 |
+
logger.debug(f"{len(to_delete)} files to delete")
|
| 142 |
+
if delete_missing and to_delete:
|
| 143 |
+
fs.rm(to_delete)
|
| 144 |
+
return allfiles
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
class GenericFileSystem(AsyncFileSystem):
|
| 148 |
+
"""Wrapper over all other FS types
|
| 149 |
+
|
| 150 |
+
<experimental!>
|
| 151 |
+
|
| 152 |
+
This implementation is a single unified interface to be able to run FS operations
|
| 153 |
+
over generic URLs, and dispatch to the specific implementations using the URL
|
| 154 |
+
protocol prefix.
|
| 155 |
+
|
| 156 |
+
Note: instances of this FS are always async, even if you never use it with any async
|
| 157 |
+
backend.
|
| 158 |
+
"""
|
| 159 |
+
|
| 160 |
+
protocol = "generic" # there is no real reason to ever use a protocol with this FS
|
| 161 |
+
|
| 162 |
+
def __init__(self, default_method="default", **kwargs):
|
| 163 |
+
"""
|
| 164 |
+
|
| 165 |
+
Parameters
|
| 166 |
+
----------
|
| 167 |
+
default_method: str (optional)
|
| 168 |
+
Defines how to configure backend FS instances. Options are:
|
| 169 |
+
- "default": instantiate like FSClass(), with no
|
| 170 |
+
extra arguments; this is the default instance of that FS, and can be
|
| 171 |
+
configured via the config system
|
| 172 |
+
- "generic": takes instances from the `_generic_fs` dict in this module,
|
| 173 |
+
which you must populate before use. Keys are by protocol
|
| 174 |
+
- "current": takes the most recently instantiated version of each FS
|
| 175 |
+
"""
|
| 176 |
+
self.method = default_method
|
| 177 |
+
super().__init__(**kwargs)
|
| 178 |
+
|
| 179 |
+
def _parent(self, path):
|
| 180 |
+
fs = _resolve_fs(path, self.method)
|
| 181 |
+
return fs.unstrip_protocol(fs._parent(path))
|
| 182 |
+
|
| 183 |
+
def _strip_protocol(self, path):
|
| 184 |
+
# normalization only
|
| 185 |
+
fs = _resolve_fs(path, self.method)
|
| 186 |
+
return fs.unstrip_protocol(fs._strip_protocol(path))
|
| 187 |
+
|
| 188 |
+
async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
| 189 |
+
fs = _resolve_fs(path, self.method)
|
| 190 |
+
if fs.async_impl:
|
| 191 |
+
out = await fs._find(
|
| 192 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
|
| 193 |
+
)
|
| 194 |
+
else:
|
| 195 |
+
out = fs.find(
|
| 196 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
|
| 197 |
+
)
|
| 198 |
+
result = {}
|
| 199 |
+
for k, v in out.items():
|
| 200 |
+
name = fs.unstrip_protocol(k)
|
| 201 |
+
v["name"] = name
|
| 202 |
+
result[name] = v
|
| 203 |
+
if detail:
|
| 204 |
+
return result
|
| 205 |
+
return list(result)
|
| 206 |
+
|
| 207 |
+
async def _info(self, url, **kwargs):
|
| 208 |
+
fs = _resolve_fs(url, self.method)
|
| 209 |
+
if fs.async_impl:
|
| 210 |
+
out = await fs._info(url, **kwargs)
|
| 211 |
+
else:
|
| 212 |
+
out = fs.info(url, **kwargs)
|
| 213 |
+
out["name"] = fs.unstrip_protocol(out["name"])
|
| 214 |
+
return out
|
| 215 |
+
|
| 216 |
+
async def _ls(
|
| 217 |
+
self,
|
| 218 |
+
url,
|
| 219 |
+
detail=True,
|
| 220 |
+
**kwargs,
|
| 221 |
+
):
|
| 222 |
+
fs = _resolve_fs(url, self.method)
|
| 223 |
+
if fs.async_impl:
|
| 224 |
+
out = await fs._ls(url, detail=True, **kwargs)
|
| 225 |
+
else:
|
| 226 |
+
out = fs.ls(url, detail=True, **kwargs)
|
| 227 |
+
for o in out:
|
| 228 |
+
o["name"] = fs.unstrip_protocol(o["name"])
|
| 229 |
+
if detail:
|
| 230 |
+
return out
|
| 231 |
+
else:
|
| 232 |
+
return [o["name"] for o in out]
|
| 233 |
+
|
| 234 |
+
async def _cat_file(
|
| 235 |
+
self,
|
| 236 |
+
url,
|
| 237 |
+
**kwargs,
|
| 238 |
+
):
|
| 239 |
+
fs = _resolve_fs(url, self.method)
|
| 240 |
+
if fs.async_impl:
|
| 241 |
+
return await fs._cat_file(url, **kwargs)
|
| 242 |
+
else:
|
| 243 |
+
return fs.cat_file(url, **kwargs)
|
| 244 |
+
|
| 245 |
+
async def _pipe_file(
|
| 246 |
+
self,
|
| 247 |
+
path,
|
| 248 |
+
value,
|
| 249 |
+
**kwargs,
|
| 250 |
+
):
|
| 251 |
+
fs = _resolve_fs(path, self.method)
|
| 252 |
+
if fs.async_impl:
|
| 253 |
+
return await fs._pipe_file(path, value, **kwargs)
|
| 254 |
+
else:
|
| 255 |
+
return fs.pipe_file(path, value, **kwargs)
|
| 256 |
+
|
| 257 |
+
async def _rm(self, url, **kwargs):
|
| 258 |
+
urls = url
|
| 259 |
+
if isinstance(urls, str):
|
| 260 |
+
urls = [urls]
|
| 261 |
+
fs = _resolve_fs(urls[0], self.method)
|
| 262 |
+
if fs.async_impl:
|
| 263 |
+
await fs._rm(urls, **kwargs)
|
| 264 |
+
else:
|
| 265 |
+
fs.rm(url, **kwargs)
|
| 266 |
+
|
| 267 |
+
async def _makedirs(self, path, exist_ok=False):
|
| 268 |
+
logger.debug("Make dir %s", path)
|
| 269 |
+
fs = _resolve_fs(path, self.method)
|
| 270 |
+
if fs.async_impl:
|
| 271 |
+
await fs._makedirs(path, exist_ok=exist_ok)
|
| 272 |
+
else:
|
| 273 |
+
fs.makedirs(path, exist_ok=exist_ok)
|
| 274 |
+
|
| 275 |
+
def rsync(self, source, destination, **kwargs):
|
| 276 |
+
"""Sync files between two directory trees
|
| 277 |
+
|
| 278 |
+
See `func:rsync` for more details.
|
| 279 |
+
"""
|
| 280 |
+
rsync(source, destination, fs=self, **kwargs)
|
| 281 |
+
|
| 282 |
+
async def _cp_file(
|
| 283 |
+
self,
|
| 284 |
+
url,
|
| 285 |
+
url2,
|
| 286 |
+
blocksize=2**20,
|
| 287 |
+
callback=DEFAULT_CALLBACK,
|
| 288 |
+
**kwargs,
|
| 289 |
+
):
|
| 290 |
+
fs = _resolve_fs(url, self.method)
|
| 291 |
+
fs2 = _resolve_fs(url2, self.method)
|
| 292 |
+
if fs is fs2:
|
| 293 |
+
# pure remote
|
| 294 |
+
if fs.async_impl:
|
| 295 |
+
return await fs._cp_file(url, url2, **kwargs)
|
| 296 |
+
else:
|
| 297 |
+
return fs.cp_file(url, url2, **kwargs)
|
| 298 |
+
kw = {"blocksize": 0, "cache_type": "none"}
|
| 299 |
+
try:
|
| 300 |
+
f1 = (
|
| 301 |
+
await fs.open_async(url, "rb")
|
| 302 |
+
if hasattr(fs, "open_async")
|
| 303 |
+
else fs.open(url, "rb", **kw)
|
| 304 |
+
)
|
| 305 |
+
callback.set_size(await maybe_await(f1.size))
|
| 306 |
+
f2 = (
|
| 307 |
+
await fs2.open_async(url2, "wb")
|
| 308 |
+
if hasattr(fs2, "open_async")
|
| 309 |
+
else fs2.open(url2, "wb", **kw)
|
| 310 |
+
)
|
| 311 |
+
while f1.size is None or f2.tell() < f1.size:
|
| 312 |
+
data = await maybe_await(f1.read(blocksize))
|
| 313 |
+
if f1.size is None and not data:
|
| 314 |
+
break
|
| 315 |
+
await maybe_await(f2.write(data))
|
| 316 |
+
callback.absolute_update(f2.tell())
|
| 317 |
+
finally:
|
| 318 |
+
try:
|
| 319 |
+
await maybe_await(f2.close())
|
| 320 |
+
await maybe_await(f1.close())
|
| 321 |
+
except NameError:
|
| 322 |
+
# fail while opening f1 or f2
|
| 323 |
+
pass
|
| 324 |
+
|
| 325 |
+
async def _make_many_dirs(self, urls, exist_ok=True):
|
| 326 |
+
fs = _resolve_fs(urls[0], self.method)
|
| 327 |
+
if fs.async_impl:
|
| 328 |
+
coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls]
|
| 329 |
+
await _run_coros_in_chunks(coros)
|
| 330 |
+
else:
|
| 331 |
+
for u in urls:
|
| 332 |
+
fs.makedirs(u, exist_ok=exist_ok)
|
| 333 |
+
|
| 334 |
+
make_many_dirs = sync_wrapper(_make_many_dirs)
|
| 335 |
+
|
| 336 |
+
async def _copy(
|
| 337 |
+
self,
|
| 338 |
+
path1: list[str],
|
| 339 |
+
path2: list[str],
|
| 340 |
+
recursive: bool = False,
|
| 341 |
+
on_error: str = "ignore",
|
| 342 |
+
maxdepth: Optional[int] = None,
|
| 343 |
+
batch_size: Optional[int] = None,
|
| 344 |
+
tempdir: Optional[str] = None,
|
| 345 |
+
**kwargs,
|
| 346 |
+
):
|
| 347 |
+
if recursive:
|
| 348 |
+
raise NotImplementedError
|
| 349 |
+
fs = _resolve_fs(path1[0], self.method)
|
| 350 |
+
fs2 = _resolve_fs(path2[0], self.method)
|
| 351 |
+
# not expanding paths atm., assume call is from rsync()
|
| 352 |
+
if fs is fs2:
|
| 353 |
+
# pure remote
|
| 354 |
+
if fs.async_impl:
|
| 355 |
+
return await fs._copy(path1, path2, **kwargs)
|
| 356 |
+
else:
|
| 357 |
+
return fs.copy(path1, path2, **kwargs)
|
| 358 |
+
await copy_file_op(
|
| 359 |
+
fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
|
| 363 |
+
async def copy_file_op(
|
| 364 |
+
fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore"
|
| 365 |
+
):
|
| 366 |
+
import tempfile
|
| 367 |
+
|
| 368 |
+
tempdir = tempdir or tempfile.mkdtemp()
|
| 369 |
+
try:
|
| 370 |
+
coros = [
|
| 371 |
+
_copy_file_op(
|
| 372 |
+
fs1,
|
| 373 |
+
u1,
|
| 374 |
+
fs2,
|
| 375 |
+
u2,
|
| 376 |
+
os.path.join(tempdir, uuid.uuid4().hex),
|
| 377 |
+
on_error=on_error,
|
| 378 |
+
)
|
| 379 |
+
for u1, u2 in zip(url1, url2)
|
| 380 |
+
]
|
| 381 |
+
await _run_coros_in_chunks(coros, batch_size=batch_size)
|
| 382 |
+
finally:
|
| 383 |
+
shutil.rmtree(tempdir)
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
|
| 387 |
+
ex = () if on_error == "raise" else Exception
|
| 388 |
+
logger.debug("Copy %s -> %s", url1, url2)
|
| 389 |
+
try:
|
| 390 |
+
if fs1.async_impl:
|
| 391 |
+
await fs1._get_file(url1, local)
|
| 392 |
+
else:
|
| 393 |
+
fs1.get_file(url1, local)
|
| 394 |
+
if fs2.async_impl:
|
| 395 |
+
await fs2._put_file(local, url2)
|
| 396 |
+
else:
|
| 397 |
+
fs2.put_file(local, url2)
|
| 398 |
+
os.unlink(local)
|
| 399 |
+
logger.debug("Copy %s -> %s; done", url1, url2)
|
| 400 |
+
except ex as e:
|
| 401 |
+
logger.debug("ignoring cp exception for %s: %s", url1, e)
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
async def maybe_await(cor):
|
| 405 |
+
if inspect.iscoroutine(cor):
|
| 406 |
+
return await cor
|
| 407 |
+
else:
|
| 408 |
+
return cor
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/mapping.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import array
|
| 2 |
+
import logging
|
| 3 |
+
import posixpath
|
| 4 |
+
import warnings
|
| 5 |
+
from collections.abc import MutableMapping
|
| 6 |
+
from functools import cached_property
|
| 7 |
+
|
| 8 |
+
from fsspec.core import url_to_fs
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger("fsspec.mapping")
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class FSMap(MutableMapping):
|
| 14 |
+
"""Wrap a FileSystem instance as a mutable wrapping.
|
| 15 |
+
|
| 16 |
+
The keys of the mapping become files under the given root, and the
|
| 17 |
+
values (which must be bytes) the contents of those files.
|
| 18 |
+
|
| 19 |
+
Parameters
|
| 20 |
+
----------
|
| 21 |
+
root: string
|
| 22 |
+
prefix for all the files
|
| 23 |
+
fs: FileSystem instance
|
| 24 |
+
check: bool (=True)
|
| 25 |
+
performs a touch at the location, to check for write access.
|
| 26 |
+
|
| 27 |
+
Examples
|
| 28 |
+
--------
|
| 29 |
+
>>> fs = FileSystem(**parameters) # doctest: +SKIP
|
| 30 |
+
>>> d = FSMap('my-data/path/', fs) # doctest: +SKIP
|
| 31 |
+
or, more likely
|
| 32 |
+
>>> d = fs.get_mapper('my-data/path/')
|
| 33 |
+
|
| 34 |
+
>>> d['loc1'] = b'Hello World' # doctest: +SKIP
|
| 35 |
+
>>> list(d.keys()) # doctest: +SKIP
|
| 36 |
+
['loc1']
|
| 37 |
+
>>> d['loc1'] # doctest: +SKIP
|
| 38 |
+
b'Hello World'
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
|
| 42 |
+
self.fs = fs
|
| 43 |
+
self.root = fs._strip_protocol(root)
|
| 44 |
+
self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
|
| 45 |
+
if missing_exceptions is None:
|
| 46 |
+
missing_exceptions = (
|
| 47 |
+
FileNotFoundError,
|
| 48 |
+
IsADirectoryError,
|
| 49 |
+
NotADirectoryError,
|
| 50 |
+
)
|
| 51 |
+
self.missing_exceptions = missing_exceptions
|
| 52 |
+
self.check = check
|
| 53 |
+
self.create = create
|
| 54 |
+
if create:
|
| 55 |
+
if not self.fs.exists(root):
|
| 56 |
+
self.fs.mkdir(root)
|
| 57 |
+
if check:
|
| 58 |
+
if not self.fs.exists(root):
|
| 59 |
+
raise ValueError(
|
| 60 |
+
f"Path {root} does not exist. Create "
|
| 61 |
+
f" with the ``create=True`` keyword"
|
| 62 |
+
)
|
| 63 |
+
self.fs.touch(root + "/a")
|
| 64 |
+
self.fs.rm(root + "/a")
|
| 65 |
+
|
| 66 |
+
@cached_property
|
| 67 |
+
def dirfs(self):
|
| 68 |
+
"""dirfs instance that can be used with the same keys as the mapper"""
|
| 69 |
+
from .implementations.dirfs import DirFileSystem
|
| 70 |
+
|
| 71 |
+
return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
|
| 72 |
+
|
| 73 |
+
def clear(self):
|
| 74 |
+
"""Remove all keys below root - empties out mapping"""
|
| 75 |
+
logger.info("Clear mapping at %s", self.root)
|
| 76 |
+
try:
|
| 77 |
+
self.fs.rm(self.root, True)
|
| 78 |
+
self.fs.mkdir(self.root)
|
| 79 |
+
except: # noqa: E722
|
| 80 |
+
pass
|
| 81 |
+
|
| 82 |
+
def getitems(self, keys, on_error="raise"):
|
| 83 |
+
"""Fetch multiple items from the store
|
| 84 |
+
|
| 85 |
+
If the backend is async-able, this might proceed concurrently
|
| 86 |
+
|
| 87 |
+
Parameters
|
| 88 |
+
----------
|
| 89 |
+
keys: list(str)
|
| 90 |
+
They keys to be fetched
|
| 91 |
+
on_error : "raise", "omit", "return"
|
| 92 |
+
If raise, an underlying exception will be raised (converted to KeyError
|
| 93 |
+
if the type is in self.missing_exceptions); if omit, keys with exception
|
| 94 |
+
will simply not be included in the output; if "return", all keys are
|
| 95 |
+
included in the output, but the value will be bytes or an exception
|
| 96 |
+
instance.
|
| 97 |
+
|
| 98 |
+
Returns
|
| 99 |
+
-------
|
| 100 |
+
dict(key, bytes|exception)
|
| 101 |
+
"""
|
| 102 |
+
keys2 = [self._key_to_str(k) for k in keys]
|
| 103 |
+
oe = on_error if on_error == "raise" else "return"
|
| 104 |
+
try:
|
| 105 |
+
out = self.fs.cat(keys2, on_error=oe)
|
| 106 |
+
if isinstance(out, bytes):
|
| 107 |
+
out = {keys2[0]: out}
|
| 108 |
+
except self.missing_exceptions as e:
|
| 109 |
+
raise KeyError from e
|
| 110 |
+
out = {
|
| 111 |
+
k: (KeyError() if isinstance(v, self.missing_exceptions) else v)
|
| 112 |
+
for k, v in out.items()
|
| 113 |
+
}
|
| 114 |
+
return {
|
| 115 |
+
key: out[k2]
|
| 116 |
+
for key, k2 in zip(keys, keys2)
|
| 117 |
+
if on_error == "return" or not isinstance(out[k2], BaseException)
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
def setitems(self, values_dict):
|
| 121 |
+
"""Set the values of multiple items in the store
|
| 122 |
+
|
| 123 |
+
Parameters
|
| 124 |
+
----------
|
| 125 |
+
values_dict: dict(str, bytes)
|
| 126 |
+
"""
|
| 127 |
+
values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
|
| 128 |
+
self.fs.pipe(values)
|
| 129 |
+
|
| 130 |
+
def delitems(self, keys):
|
| 131 |
+
"""Remove multiple keys from the store"""
|
| 132 |
+
self.fs.rm([self._key_to_str(k) for k in keys])
|
| 133 |
+
|
| 134 |
+
def _key_to_str(self, key):
|
| 135 |
+
"""Generate full path for the key"""
|
| 136 |
+
if not isinstance(key, str):
|
| 137 |
+
# raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
|
| 138 |
+
warnings.warn(
|
| 139 |
+
"from fsspec 2023.5 onward FSMap non-str keys will raise TypeError",
|
| 140 |
+
DeprecationWarning,
|
| 141 |
+
)
|
| 142 |
+
if isinstance(key, list):
|
| 143 |
+
key = tuple(key)
|
| 144 |
+
key = str(key)
|
| 145 |
+
return f"{self._root_key_to_str}{key}".rstrip("/")
|
| 146 |
+
|
| 147 |
+
def _str_to_key(self, s):
|
| 148 |
+
"""Strip path of to leave key name"""
|
| 149 |
+
return s[len(self.root) :].lstrip("/")
|
| 150 |
+
|
| 151 |
+
def __getitem__(self, key, default=None):
|
| 152 |
+
"""Retrieve data"""
|
| 153 |
+
k = self._key_to_str(key)
|
| 154 |
+
try:
|
| 155 |
+
result = self.fs.cat(k)
|
| 156 |
+
except self.missing_exceptions:
|
| 157 |
+
if default is not None:
|
| 158 |
+
return default
|
| 159 |
+
raise KeyError(key)
|
| 160 |
+
return result
|
| 161 |
+
|
| 162 |
+
def pop(self, key, default=None):
|
| 163 |
+
"""Pop data"""
|
| 164 |
+
result = self.__getitem__(key, default)
|
| 165 |
+
try:
|
| 166 |
+
del self[key]
|
| 167 |
+
except KeyError:
|
| 168 |
+
pass
|
| 169 |
+
return result
|
| 170 |
+
|
| 171 |
+
def __setitem__(self, key, value):
|
| 172 |
+
"""Store value in key"""
|
| 173 |
+
key = self._key_to_str(key)
|
| 174 |
+
self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
|
| 175 |
+
self.fs.pipe_file(key, maybe_convert(value))
|
| 176 |
+
|
| 177 |
+
def __iter__(self):
|
| 178 |
+
return (self._str_to_key(x) for x in self.fs.find(self.root))
|
| 179 |
+
|
| 180 |
+
def __len__(self):
|
| 181 |
+
return len(self.fs.find(self.root))
|
| 182 |
+
|
| 183 |
+
def __delitem__(self, key):
|
| 184 |
+
"""Remove key"""
|
| 185 |
+
try:
|
| 186 |
+
self.fs.rm(self._key_to_str(key))
|
| 187 |
+
except: # noqa: E722
|
| 188 |
+
raise KeyError
|
| 189 |
+
|
| 190 |
+
def __contains__(self, key):
|
| 191 |
+
"""Does key exist in mapping?"""
|
| 192 |
+
path = self._key_to_str(key)
|
| 193 |
+
return self.fs.isfile(path)
|
| 194 |
+
|
| 195 |
+
def __reduce__(self):
|
| 196 |
+
return FSMap, (self.root, self.fs, False, False, self.missing_exceptions)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def maybe_convert(value):
|
| 200 |
+
if isinstance(value, array.array) or hasattr(value, "__array__"):
|
| 201 |
+
# bytes-like things
|
| 202 |
+
if hasattr(value, "dtype") and value.dtype.kind in "Mm":
|
| 203 |
+
# The buffer interface doesn't support datetime64/timdelta64 numpy
|
| 204 |
+
# arrays
|
| 205 |
+
value = value.view("int64")
|
| 206 |
+
value = bytes(memoryview(value))
|
| 207 |
+
return value
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def get_mapper(
|
| 211 |
+
url="",
|
| 212 |
+
check=False,
|
| 213 |
+
create=False,
|
| 214 |
+
missing_exceptions=None,
|
| 215 |
+
alternate_root=None,
|
| 216 |
+
**kwargs,
|
| 217 |
+
):
|
| 218 |
+
"""Create key-value interface for given URL and options
|
| 219 |
+
|
| 220 |
+
The URL will be of the form "protocol://location" and point to the root
|
| 221 |
+
of the mapper required. All keys will be file-names below this location,
|
| 222 |
+
and their values the contents of each key.
|
| 223 |
+
|
| 224 |
+
Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``.
|
| 225 |
+
|
| 226 |
+
Parameters
|
| 227 |
+
----------
|
| 228 |
+
url: str
|
| 229 |
+
Root URL of mapping
|
| 230 |
+
check: bool
|
| 231 |
+
Whether to attempt to read from the location before instantiation, to
|
| 232 |
+
check that the mapping does exist
|
| 233 |
+
create: bool
|
| 234 |
+
Whether to make the directory corresponding to the root before
|
| 235 |
+
instantiating
|
| 236 |
+
missing_exceptions: None or tuple
|
| 237 |
+
If given, these exception types will be regarded as missing keys and
|
| 238 |
+
return KeyError when trying to read data. By default, you get
|
| 239 |
+
(FileNotFoundError, IsADirectoryError, NotADirectoryError)
|
| 240 |
+
alternate_root: None or str
|
| 241 |
+
In cases of complex URLs, the parser may fail to pick the correct part
|
| 242 |
+
for the mapper root, so this arg can override
|
| 243 |
+
|
| 244 |
+
Returns
|
| 245 |
+
-------
|
| 246 |
+
``FSMap`` instance, the dict-like key-value store.
|
| 247 |
+
"""
|
| 248 |
+
# Removing protocol here - could defer to each open() on the backend
|
| 249 |
+
fs, urlpath = url_to_fs(url, **kwargs)
|
| 250 |
+
root = alternate_root if alternate_root is not None else urlpath
|
| 251 |
+
return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/parquet.py
ADDED
|
@@ -0,0 +1,541 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import json
|
| 3 |
+
import warnings
|
| 4 |
+
|
| 5 |
+
from .core import url_to_fs
|
| 6 |
+
from .utils import merge_offset_ranges
|
| 7 |
+
|
| 8 |
+
# Parquet-Specific Utilities for fsspec
|
| 9 |
+
#
|
| 10 |
+
# Most of the functions defined in this module are NOT
|
| 11 |
+
# intended for public consumption. The only exception
|
| 12 |
+
# to this is `open_parquet_file`, which should be used
|
| 13 |
+
# place of `fs.open()` to open parquet-formatted files
|
| 14 |
+
# on remote file systems.
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def open_parquet_file(
|
| 18 |
+
path,
|
| 19 |
+
mode="rb",
|
| 20 |
+
fs=None,
|
| 21 |
+
metadata=None,
|
| 22 |
+
columns=None,
|
| 23 |
+
row_groups=None,
|
| 24 |
+
storage_options=None,
|
| 25 |
+
strict=False,
|
| 26 |
+
engine="auto",
|
| 27 |
+
max_gap=64_000,
|
| 28 |
+
max_block=256_000_000,
|
| 29 |
+
footer_sample_size=1_000_000,
|
| 30 |
+
**kwargs,
|
| 31 |
+
):
|
| 32 |
+
"""
|
| 33 |
+
Return a file-like object for a single Parquet file.
|
| 34 |
+
|
| 35 |
+
The specified parquet `engine` will be used to parse the
|
| 36 |
+
footer metadata, and determine the required byte ranges
|
| 37 |
+
from the file. The target path will then be opened with
|
| 38 |
+
the "parts" (`KnownPartsOfAFile`) caching strategy.
|
| 39 |
+
|
| 40 |
+
Note that this method is intended for usage with remote
|
| 41 |
+
file systems, and is unlikely to improve parquet-read
|
| 42 |
+
performance on local file systems.
|
| 43 |
+
|
| 44 |
+
Parameters
|
| 45 |
+
----------
|
| 46 |
+
path: str
|
| 47 |
+
Target file path.
|
| 48 |
+
mode: str, optional
|
| 49 |
+
Mode option to be passed through to `fs.open`. Default is "rb".
|
| 50 |
+
metadata: Any, optional
|
| 51 |
+
Parquet metadata object. Object type must be supported
|
| 52 |
+
by the backend parquet engine. For now, only the "fastparquet"
|
| 53 |
+
engine supports an explicit `ParquetFile` metadata object.
|
| 54 |
+
If a metadata object is supplied, the remote footer metadata
|
| 55 |
+
will not need to be transferred into local memory.
|
| 56 |
+
fs: AbstractFileSystem, optional
|
| 57 |
+
Filesystem object to use for opening the file. If nothing is
|
| 58 |
+
specified, an `AbstractFileSystem` object will be inferred.
|
| 59 |
+
engine : str, default "auto"
|
| 60 |
+
Parquet engine to use for metadata parsing. Allowed options
|
| 61 |
+
include "fastparquet", "pyarrow", and "auto". The specified
|
| 62 |
+
engine must be installed in the current environment. If
|
| 63 |
+
"auto" is specified, and both engines are installed,
|
| 64 |
+
"fastparquet" will take precedence over "pyarrow".
|
| 65 |
+
columns: list, optional
|
| 66 |
+
List of all column names that may be read from the file.
|
| 67 |
+
row_groups : list, optional
|
| 68 |
+
List of all row-groups that may be read from the file. This
|
| 69 |
+
may be a list of row-group indices (integers), or it may be
|
| 70 |
+
a list of `RowGroup` metadata objects (if the "fastparquet"
|
| 71 |
+
engine is used).
|
| 72 |
+
storage_options : dict, optional
|
| 73 |
+
Used to generate an `AbstractFileSystem` object if `fs` was
|
| 74 |
+
not specified.
|
| 75 |
+
strict : bool, optional
|
| 76 |
+
Whether the resulting `KnownPartsOfAFile` cache should
|
| 77 |
+
fetch reads that go beyond a known byte-range boundary.
|
| 78 |
+
If `False` (the default), any read that ends outside a
|
| 79 |
+
known part will be zero padded. Note that using
|
| 80 |
+
`strict=True` may be useful for debugging.
|
| 81 |
+
max_gap : int, optional
|
| 82 |
+
Neighboring byte ranges will only be merged when their
|
| 83 |
+
inter-range gap is <= `max_gap`. Default is 64KB.
|
| 84 |
+
max_block : int, optional
|
| 85 |
+
Neighboring byte ranges will only be merged when the size of
|
| 86 |
+
the aggregated range is <= `max_block`. Default is 256MB.
|
| 87 |
+
footer_sample_size : int, optional
|
| 88 |
+
Number of bytes to read from the end of the path to look
|
| 89 |
+
for the footer metadata. If the sampled bytes do not contain
|
| 90 |
+
the footer, a second read request will be required, and
|
| 91 |
+
performance will suffer. Default is 1MB.
|
| 92 |
+
**kwargs :
|
| 93 |
+
Optional key-word arguments to pass to `fs.open`
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
# Make sure we have an `AbstractFileSystem` object
|
| 97 |
+
# to work with
|
| 98 |
+
if fs is None:
|
| 99 |
+
fs = url_to_fs(path, **(storage_options or {}))[0]
|
| 100 |
+
|
| 101 |
+
# For now, `columns == []` not supported. Just use
|
| 102 |
+
# default `open` command with `path` input
|
| 103 |
+
if columns is not None and len(columns) == 0:
|
| 104 |
+
return fs.open(path, mode=mode)
|
| 105 |
+
|
| 106 |
+
# Set the engine
|
| 107 |
+
engine = _set_engine(engine)
|
| 108 |
+
|
| 109 |
+
# Fetch the known byte ranges needed to read
|
| 110 |
+
# `columns` and/or `row_groups`
|
| 111 |
+
data = _get_parquet_byte_ranges(
|
| 112 |
+
[path],
|
| 113 |
+
fs,
|
| 114 |
+
metadata=metadata,
|
| 115 |
+
columns=columns,
|
| 116 |
+
row_groups=row_groups,
|
| 117 |
+
engine=engine,
|
| 118 |
+
max_gap=max_gap,
|
| 119 |
+
max_block=max_block,
|
| 120 |
+
footer_sample_size=footer_sample_size,
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# Extract file name from `data`
|
| 124 |
+
fn = next(iter(data)) if data else path
|
| 125 |
+
|
| 126 |
+
# Call self.open with "parts" caching
|
| 127 |
+
options = kwargs.pop("cache_options", {}).copy()
|
| 128 |
+
return fs.open(
|
| 129 |
+
fn,
|
| 130 |
+
mode=mode,
|
| 131 |
+
cache_type="parts",
|
| 132 |
+
cache_options={
|
| 133 |
+
**options,
|
| 134 |
+
"data": data.get(fn, {}),
|
| 135 |
+
"strict": strict,
|
| 136 |
+
},
|
| 137 |
+
**kwargs,
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def _get_parquet_byte_ranges(
|
| 142 |
+
paths,
|
| 143 |
+
fs,
|
| 144 |
+
metadata=None,
|
| 145 |
+
columns=None,
|
| 146 |
+
row_groups=None,
|
| 147 |
+
max_gap=64_000,
|
| 148 |
+
max_block=256_000_000,
|
| 149 |
+
footer_sample_size=1_000_000,
|
| 150 |
+
engine="auto",
|
| 151 |
+
):
|
| 152 |
+
"""Get a dictionary of the known byte ranges needed
|
| 153 |
+
to read a specific column/row-group selection from a
|
| 154 |
+
Parquet dataset. Each value in the output dictionary
|
| 155 |
+
is intended for use as the `data` argument for the
|
| 156 |
+
`KnownPartsOfAFile` caching strategy of a single path.
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
# Set engine if necessary
|
| 160 |
+
if isinstance(engine, str):
|
| 161 |
+
engine = _set_engine(engine)
|
| 162 |
+
|
| 163 |
+
# Pass to specialized function if metadata is defined
|
| 164 |
+
if metadata is not None:
|
| 165 |
+
# Use the provided parquet metadata object
|
| 166 |
+
# to avoid transferring/parsing footer metadata
|
| 167 |
+
return _get_parquet_byte_ranges_from_metadata(
|
| 168 |
+
metadata,
|
| 169 |
+
fs,
|
| 170 |
+
engine,
|
| 171 |
+
columns=columns,
|
| 172 |
+
row_groups=row_groups,
|
| 173 |
+
max_gap=max_gap,
|
| 174 |
+
max_block=max_block,
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
# Get file sizes asynchronously
|
| 178 |
+
file_sizes = fs.sizes(paths)
|
| 179 |
+
|
| 180 |
+
# Populate global paths, starts, & ends
|
| 181 |
+
result = {}
|
| 182 |
+
data_paths = []
|
| 183 |
+
data_starts = []
|
| 184 |
+
data_ends = []
|
| 185 |
+
add_header_magic = True
|
| 186 |
+
if columns is None and row_groups is None:
|
| 187 |
+
# We are NOT selecting specific columns or row-groups.
|
| 188 |
+
#
|
| 189 |
+
# We can avoid sampling the footers, and just transfer
|
| 190 |
+
# all file data with cat_ranges
|
| 191 |
+
for i, path in enumerate(paths):
|
| 192 |
+
result[path] = {}
|
| 193 |
+
for b in range(0, file_sizes[i], max_block):
|
| 194 |
+
data_paths.append(path)
|
| 195 |
+
data_starts.append(b)
|
| 196 |
+
data_ends.append(min(b + max_block, file_sizes[i]))
|
| 197 |
+
add_header_magic = False # "Magic" should already be included
|
| 198 |
+
else:
|
| 199 |
+
# We ARE selecting specific columns or row-groups.
|
| 200 |
+
#
|
| 201 |
+
# Gather file footers.
|
| 202 |
+
# We just take the last `footer_sample_size` bytes of each
|
| 203 |
+
# file (or the entire file if it is smaller than that)
|
| 204 |
+
footer_starts = []
|
| 205 |
+
footer_ends = []
|
| 206 |
+
for i, path in enumerate(paths):
|
| 207 |
+
footer_ends.append(file_sizes[i])
|
| 208 |
+
sample_size = max(0, file_sizes[i] - footer_sample_size)
|
| 209 |
+
footer_starts.append(sample_size)
|
| 210 |
+
footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
|
| 211 |
+
|
| 212 |
+
# Check our footer samples and re-sample if necessary.
|
| 213 |
+
missing_footer_starts = footer_starts.copy()
|
| 214 |
+
large_footer = 0
|
| 215 |
+
for i, path in enumerate(paths):
|
| 216 |
+
footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
|
| 217 |
+
real_footer_start = file_sizes[i] - (footer_size + 8)
|
| 218 |
+
if real_footer_start < footer_starts[i]:
|
| 219 |
+
missing_footer_starts[i] = real_footer_start
|
| 220 |
+
large_footer = max(large_footer, (footer_size + 8))
|
| 221 |
+
if large_footer:
|
| 222 |
+
warnings.warn(
|
| 223 |
+
f"Not enough data was used to sample the parquet footer. "
|
| 224 |
+
f"Try setting footer_sample_size >= {large_footer}."
|
| 225 |
+
)
|
| 226 |
+
for i, block in enumerate(
|
| 227 |
+
fs.cat_ranges(
|
| 228 |
+
paths,
|
| 229 |
+
missing_footer_starts,
|
| 230 |
+
footer_starts,
|
| 231 |
+
)
|
| 232 |
+
):
|
| 233 |
+
footer_samples[i] = block + footer_samples[i]
|
| 234 |
+
footer_starts[i] = missing_footer_starts[i]
|
| 235 |
+
|
| 236 |
+
# Calculate required byte ranges for each path
|
| 237 |
+
for i, path in enumerate(paths):
|
| 238 |
+
# Deal with small-file case.
|
| 239 |
+
# Just include all remaining bytes of the file
|
| 240 |
+
# in a single range.
|
| 241 |
+
if file_sizes[i] < max_block:
|
| 242 |
+
if footer_starts[i] > 0:
|
| 243 |
+
# Only need to transfer the data if the
|
| 244 |
+
# footer sample isn't already the whole file
|
| 245 |
+
data_paths.append(path)
|
| 246 |
+
data_starts.append(0)
|
| 247 |
+
data_ends.append(footer_starts[i])
|
| 248 |
+
continue
|
| 249 |
+
|
| 250 |
+
# Use "engine" to collect data byte ranges
|
| 251 |
+
path_data_starts, path_data_ends = engine._parquet_byte_ranges(
|
| 252 |
+
columns,
|
| 253 |
+
row_groups=row_groups,
|
| 254 |
+
footer=footer_samples[i],
|
| 255 |
+
footer_start=footer_starts[i],
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
data_paths += [path] * len(path_data_starts)
|
| 259 |
+
data_starts += path_data_starts
|
| 260 |
+
data_ends += path_data_ends
|
| 261 |
+
|
| 262 |
+
# Merge adjacent offset ranges
|
| 263 |
+
data_paths, data_starts, data_ends = merge_offset_ranges(
|
| 264 |
+
data_paths,
|
| 265 |
+
data_starts,
|
| 266 |
+
data_ends,
|
| 267 |
+
max_gap=max_gap,
|
| 268 |
+
max_block=max_block,
|
| 269 |
+
sort=False, # Should already be sorted
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# Start by populating `result` with footer samples
|
| 273 |
+
for i, path in enumerate(paths):
|
| 274 |
+
result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
|
| 275 |
+
|
| 276 |
+
# Transfer the data byte-ranges into local memory
|
| 277 |
+
_transfer_ranges(fs, result, data_paths, data_starts, data_ends)
|
| 278 |
+
|
| 279 |
+
# Add b"PAR1" to header if necessary
|
| 280 |
+
if add_header_magic:
|
| 281 |
+
_add_header_magic(result)
|
| 282 |
+
|
| 283 |
+
return result
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def _get_parquet_byte_ranges_from_metadata(
|
| 287 |
+
metadata,
|
| 288 |
+
fs,
|
| 289 |
+
engine,
|
| 290 |
+
columns=None,
|
| 291 |
+
row_groups=None,
|
| 292 |
+
max_gap=64_000,
|
| 293 |
+
max_block=256_000_000,
|
| 294 |
+
):
|
| 295 |
+
"""Simplified version of `_get_parquet_byte_ranges` for
|
| 296 |
+
the case that an engine-specific `metadata` object is
|
| 297 |
+
provided, and the remote footer metadata does not need to
|
| 298 |
+
be transferred before calculating the required byte ranges.
|
| 299 |
+
"""
|
| 300 |
+
|
| 301 |
+
# Use "engine" to collect data byte ranges
|
| 302 |
+
data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
|
| 303 |
+
columns,
|
| 304 |
+
row_groups=row_groups,
|
| 305 |
+
metadata=metadata,
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
# Merge adjacent offset ranges
|
| 309 |
+
data_paths, data_starts, data_ends = merge_offset_ranges(
|
| 310 |
+
data_paths,
|
| 311 |
+
data_starts,
|
| 312 |
+
data_ends,
|
| 313 |
+
max_gap=max_gap,
|
| 314 |
+
max_block=max_block,
|
| 315 |
+
sort=False, # Should be sorted
|
| 316 |
+
)
|
| 317 |
+
|
| 318 |
+
# Transfer the data byte-ranges into local memory
|
| 319 |
+
result = {fn: {} for fn in list(set(data_paths))}
|
| 320 |
+
_transfer_ranges(fs, result, data_paths, data_starts, data_ends)
|
| 321 |
+
|
| 322 |
+
# Add b"PAR1" to header
|
| 323 |
+
_add_header_magic(result)
|
| 324 |
+
|
| 325 |
+
return result
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def _transfer_ranges(fs, blocks, paths, starts, ends):
|
| 329 |
+
# Use cat_ranges to gather the data byte_ranges
|
| 330 |
+
ranges = (paths, starts, ends)
|
| 331 |
+
for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
|
| 332 |
+
blocks[path][(start, stop)] = data
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
def _add_header_magic(data):
|
| 336 |
+
# Add b"PAR1" to file headers
|
| 337 |
+
for path in list(data.keys()):
|
| 338 |
+
add_magic = True
|
| 339 |
+
for k in data[path].keys():
|
| 340 |
+
if k[0] == 0 and k[1] >= 4:
|
| 341 |
+
add_magic = False
|
| 342 |
+
break
|
| 343 |
+
if add_magic:
|
| 344 |
+
data[path][(0, 4)] = b"PAR1"
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def _set_engine(engine_str):
|
| 348 |
+
# Define a list of parquet engines to try
|
| 349 |
+
if engine_str == "auto":
|
| 350 |
+
try_engines = ("fastparquet", "pyarrow")
|
| 351 |
+
elif not isinstance(engine_str, str):
|
| 352 |
+
raise ValueError(
|
| 353 |
+
"Failed to set parquet engine! "
|
| 354 |
+
"Please pass 'fastparquet', 'pyarrow', or 'auto'"
|
| 355 |
+
)
|
| 356 |
+
elif engine_str not in ("fastparquet", "pyarrow"):
|
| 357 |
+
raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
|
| 358 |
+
else:
|
| 359 |
+
try_engines = [engine_str]
|
| 360 |
+
|
| 361 |
+
# Try importing the engines in `try_engines`,
|
| 362 |
+
# and choose the first one that succeeds
|
| 363 |
+
for engine in try_engines:
|
| 364 |
+
try:
|
| 365 |
+
if engine == "fastparquet":
|
| 366 |
+
return FastparquetEngine()
|
| 367 |
+
elif engine == "pyarrow":
|
| 368 |
+
return PyarrowEngine()
|
| 369 |
+
except ImportError:
|
| 370 |
+
pass
|
| 371 |
+
|
| 372 |
+
# Raise an error if a supported parquet engine
|
| 373 |
+
# was not found
|
| 374 |
+
raise ImportError(
|
| 375 |
+
f"The following parquet engines are not installed "
|
| 376 |
+
f"in your python environment: {try_engines}."
|
| 377 |
+
f"Please install 'fastparquert' or 'pyarrow' to "
|
| 378 |
+
f"utilize the `fsspec.parquet` module."
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
class FastparquetEngine:
|
| 383 |
+
# The purpose of the FastparquetEngine class is
|
| 384 |
+
# to check if fastparquet can be imported (on initialization)
|
| 385 |
+
# and to define a `_parquet_byte_ranges` method. In the
|
| 386 |
+
# future, this class may also be used to define other
|
| 387 |
+
# methods/logic that are specific to fastparquet.
|
| 388 |
+
|
| 389 |
+
def __init__(self):
|
| 390 |
+
import fastparquet as fp
|
| 391 |
+
|
| 392 |
+
self.fp = fp
|
| 393 |
+
|
| 394 |
+
def _row_group_filename(self, row_group, pf):
|
| 395 |
+
return pf.row_group_filename(row_group)
|
| 396 |
+
|
| 397 |
+
def _parquet_byte_ranges(
|
| 398 |
+
self,
|
| 399 |
+
columns,
|
| 400 |
+
row_groups=None,
|
| 401 |
+
metadata=None,
|
| 402 |
+
footer=None,
|
| 403 |
+
footer_start=None,
|
| 404 |
+
):
|
| 405 |
+
# Initialize offset ranges and define ParqetFile metadata
|
| 406 |
+
pf = metadata
|
| 407 |
+
data_paths, data_starts, data_ends = [], [], []
|
| 408 |
+
if pf is None:
|
| 409 |
+
pf = self.fp.ParquetFile(io.BytesIO(footer))
|
| 410 |
+
|
| 411 |
+
# Convert columns to a set and add any index columns
|
| 412 |
+
# specified in the pandas metadata (just in case)
|
| 413 |
+
column_set = None if columns is None else set(columns)
|
| 414 |
+
if column_set is not None and hasattr(pf, "pandas_metadata"):
|
| 415 |
+
md_index = [
|
| 416 |
+
ind
|
| 417 |
+
for ind in pf.pandas_metadata.get("index_columns", [])
|
| 418 |
+
# Ignore RangeIndex information
|
| 419 |
+
if not isinstance(ind, dict)
|
| 420 |
+
]
|
| 421 |
+
column_set |= set(md_index)
|
| 422 |
+
|
| 423 |
+
# Check if row_groups is a list of integers
|
| 424 |
+
# or a list of row-group metadata
|
| 425 |
+
if row_groups and not isinstance(row_groups[0], int):
|
| 426 |
+
# Input row_groups contains row-group metadata
|
| 427 |
+
row_group_indices = None
|
| 428 |
+
else:
|
| 429 |
+
# Input row_groups contains row-group indices
|
| 430 |
+
row_group_indices = row_groups
|
| 431 |
+
row_groups = pf.row_groups
|
| 432 |
+
|
| 433 |
+
# Loop through column chunks to add required byte ranges
|
| 434 |
+
for r, row_group in enumerate(row_groups):
|
| 435 |
+
# Skip this row-group if we are targeting
|
| 436 |
+
# specific row-groups
|
| 437 |
+
if row_group_indices is None or r in row_group_indices:
|
| 438 |
+
# Find the target parquet-file path for `row_group`
|
| 439 |
+
fn = self._row_group_filename(row_group, pf)
|
| 440 |
+
|
| 441 |
+
for column in row_group.columns:
|
| 442 |
+
name = column.meta_data.path_in_schema[0]
|
| 443 |
+
# Skip this column if we are targeting a
|
| 444 |
+
# specific columns
|
| 445 |
+
if column_set is None or name in column_set:
|
| 446 |
+
file_offset0 = column.meta_data.dictionary_page_offset
|
| 447 |
+
if file_offset0 is None:
|
| 448 |
+
file_offset0 = column.meta_data.data_page_offset
|
| 449 |
+
num_bytes = column.meta_data.total_compressed_size
|
| 450 |
+
if footer_start is None or file_offset0 < footer_start:
|
| 451 |
+
data_paths.append(fn)
|
| 452 |
+
data_starts.append(file_offset0)
|
| 453 |
+
data_ends.append(
|
| 454 |
+
min(
|
| 455 |
+
file_offset0 + num_bytes,
|
| 456 |
+
footer_start or (file_offset0 + num_bytes),
|
| 457 |
+
)
|
| 458 |
+
)
|
| 459 |
+
|
| 460 |
+
if metadata:
|
| 461 |
+
# The metadata in this call may map to multiple
|
| 462 |
+
# file paths. Need to include `data_paths`
|
| 463 |
+
return data_paths, data_starts, data_ends
|
| 464 |
+
return data_starts, data_ends
|
| 465 |
+
|
| 466 |
+
|
| 467 |
+
class PyarrowEngine:
|
| 468 |
+
# The purpose of the PyarrowEngine class is
|
| 469 |
+
# to check if pyarrow can be imported (on initialization)
|
| 470 |
+
# and to define a `_parquet_byte_ranges` method. In the
|
| 471 |
+
# future, this class may also be used to define other
|
| 472 |
+
# methods/logic that are specific to pyarrow.
|
| 473 |
+
|
| 474 |
+
def __init__(self):
|
| 475 |
+
import pyarrow.parquet as pq
|
| 476 |
+
|
| 477 |
+
self.pq = pq
|
| 478 |
+
|
| 479 |
+
def _row_group_filename(self, row_group, metadata):
|
| 480 |
+
raise NotImplementedError
|
| 481 |
+
|
| 482 |
+
def _parquet_byte_ranges(
|
| 483 |
+
self,
|
| 484 |
+
columns,
|
| 485 |
+
row_groups=None,
|
| 486 |
+
metadata=None,
|
| 487 |
+
footer=None,
|
| 488 |
+
footer_start=None,
|
| 489 |
+
):
|
| 490 |
+
if metadata is not None:
|
| 491 |
+
raise ValueError("metadata input not supported for PyarrowEngine")
|
| 492 |
+
|
| 493 |
+
data_starts, data_ends = [], []
|
| 494 |
+
md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
|
| 495 |
+
|
| 496 |
+
# Convert columns to a set and add any index columns
|
| 497 |
+
# specified in the pandas metadata (just in case)
|
| 498 |
+
column_set = None if columns is None else set(columns)
|
| 499 |
+
if column_set is not None:
|
| 500 |
+
schema = md.schema.to_arrow_schema()
|
| 501 |
+
has_pandas_metadata = (
|
| 502 |
+
schema.metadata is not None and b"pandas" in schema.metadata
|
| 503 |
+
)
|
| 504 |
+
if has_pandas_metadata:
|
| 505 |
+
md_index = [
|
| 506 |
+
ind
|
| 507 |
+
for ind in json.loads(
|
| 508 |
+
schema.metadata[b"pandas"].decode("utf8")
|
| 509 |
+
).get("index_columns", [])
|
| 510 |
+
# Ignore RangeIndex information
|
| 511 |
+
if not isinstance(ind, dict)
|
| 512 |
+
]
|
| 513 |
+
column_set |= set(md_index)
|
| 514 |
+
|
| 515 |
+
# Loop through column chunks to add required byte ranges
|
| 516 |
+
for r in range(md.num_row_groups):
|
| 517 |
+
# Skip this row-group if we are targeting
|
| 518 |
+
# specific row-groups
|
| 519 |
+
if row_groups is None or r in row_groups:
|
| 520 |
+
row_group = md.row_group(r)
|
| 521 |
+
for c in range(row_group.num_columns):
|
| 522 |
+
column = row_group.column(c)
|
| 523 |
+
name = column.path_in_schema
|
| 524 |
+
# Skip this column if we are targeting a
|
| 525 |
+
# specific columns
|
| 526 |
+
split_name = name.split(".")[0]
|
| 527 |
+
if (
|
| 528 |
+
column_set is None
|
| 529 |
+
or name in column_set
|
| 530 |
+
or split_name in column_set
|
| 531 |
+
):
|
| 532 |
+
file_offset0 = column.dictionary_page_offset
|
| 533 |
+
if file_offset0 is None:
|
| 534 |
+
file_offset0 = column.data_page_offset
|
| 535 |
+
num_bytes = column.total_compressed_size
|
| 536 |
+
if file_offset0 < footer_start:
|
| 537 |
+
data_starts.append(file_offset0)
|
| 538 |
+
data_ends.append(
|
| 539 |
+
min(file_offset0 + num_bytes, footer_start)
|
| 540 |
+
)
|
| 541 |
+
return data_starts, data_ends
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/registry.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import importlib
|
| 4 |
+
import types
|
| 5 |
+
import warnings
|
| 6 |
+
|
| 7 |
+
__all__ = ["registry", "get_filesystem_class", "default"]
|
| 8 |
+
|
| 9 |
+
# internal, mutable
|
| 10 |
+
_registry: dict[str, type] = {}
|
| 11 |
+
|
| 12 |
+
# external, immutable
|
| 13 |
+
registry = types.MappingProxyType(_registry)
|
| 14 |
+
default = "file"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def register_implementation(name, cls, clobber=False, errtxt=None):
|
| 18 |
+
"""Add implementation class to the registry
|
| 19 |
+
|
| 20 |
+
Parameters
|
| 21 |
+
----------
|
| 22 |
+
name: str
|
| 23 |
+
Protocol name to associate with the class
|
| 24 |
+
cls: class or str
|
| 25 |
+
if a class: fsspec-compliant implementation class (normally inherits from
|
| 26 |
+
``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
|
| 27 |
+
str, the full path to an implementation class like package.module.class,
|
| 28 |
+
which gets added to known_implementations,
|
| 29 |
+
so the import is deferred until the filesystem is actually used.
|
| 30 |
+
clobber: bool (optional)
|
| 31 |
+
Whether to overwrite a protocol with the same name; if False, will raise
|
| 32 |
+
instead.
|
| 33 |
+
errtxt: str (optional)
|
| 34 |
+
If given, then a failure to import the given class will result in this
|
| 35 |
+
text being given.
|
| 36 |
+
"""
|
| 37 |
+
if isinstance(cls, str):
|
| 38 |
+
if name in known_implementations and clobber is False:
|
| 39 |
+
if cls != known_implementations[name]["class"]:
|
| 40 |
+
raise ValueError(
|
| 41 |
+
f"Name ({name}) already in the known_implementations and clobber "
|
| 42 |
+
f"is False"
|
| 43 |
+
)
|
| 44 |
+
else:
|
| 45 |
+
known_implementations[name] = {
|
| 46 |
+
"class": cls,
|
| 47 |
+
"err": errtxt or f"{cls} import failed for protocol {name}",
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
else:
|
| 51 |
+
if name in registry and clobber is False:
|
| 52 |
+
if _registry[name] is not cls:
|
| 53 |
+
raise ValueError(
|
| 54 |
+
f"Name ({name}) already in the registry and clobber is False"
|
| 55 |
+
)
|
| 56 |
+
else:
|
| 57 |
+
_registry[name] = cls
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# protocols mapped to the class which implements them. This dict can be
|
| 61 |
+
# updated with register_implementation
|
| 62 |
+
known_implementations = {
|
| 63 |
+
"abfs": {
|
| 64 |
+
"class": "adlfs.AzureBlobFileSystem",
|
| 65 |
+
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
| 66 |
+
},
|
| 67 |
+
"adl": {
|
| 68 |
+
"class": "adlfs.AzureDatalakeFileSystem",
|
| 69 |
+
"err": "Install adlfs to access Azure Datalake Gen1",
|
| 70 |
+
},
|
| 71 |
+
"arrow_hdfs": {
|
| 72 |
+
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
| 73 |
+
"err": "pyarrow and local java libraries required for HDFS",
|
| 74 |
+
},
|
| 75 |
+
"asynclocal": {
|
| 76 |
+
"class": "morefs.asyn_local.AsyncLocalFileSystem",
|
| 77 |
+
"err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
|
| 78 |
+
},
|
| 79 |
+
"az": {
|
| 80 |
+
"class": "adlfs.AzureBlobFileSystem",
|
| 81 |
+
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
| 82 |
+
},
|
| 83 |
+
"blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
|
| 84 |
+
"box": {
|
| 85 |
+
"class": "boxfs.BoxFileSystem",
|
| 86 |
+
"err": "Please install boxfs to access BoxFileSystem",
|
| 87 |
+
},
|
| 88 |
+
"cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
|
| 89 |
+
"dask": {
|
| 90 |
+
"class": "fsspec.implementations.dask.DaskWorkerFileSystem",
|
| 91 |
+
"err": "Install dask distributed to access worker file system",
|
| 92 |
+
},
|
| 93 |
+
"data": {"class": "fsspec.implementations.data.DataFileSystem"},
|
| 94 |
+
"dbfs": {
|
| 95 |
+
"class": "fsspec.implementations.dbfs.DatabricksFileSystem",
|
| 96 |
+
"err": "Install the requests package to use the DatabricksFileSystem",
|
| 97 |
+
},
|
| 98 |
+
"dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
|
| 99 |
+
"dropbox": {
|
| 100 |
+
"class": "dropboxdrivefs.DropboxDriveFileSystem",
|
| 101 |
+
"err": (
|
| 102 |
+
'DropboxFileSystem requires "dropboxdrivefs","requests" and "'
|
| 103 |
+
'"dropbox" to be installed'
|
| 104 |
+
),
|
| 105 |
+
},
|
| 106 |
+
"dvc": {
|
| 107 |
+
"class": "dvc.api.DVCFileSystem",
|
| 108 |
+
"err": "Install dvc to access DVCFileSystem",
|
| 109 |
+
},
|
| 110 |
+
"file": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
| 111 |
+
"filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
|
| 112 |
+
"ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
|
| 113 |
+
"gcs": {
|
| 114 |
+
"class": "gcsfs.GCSFileSystem",
|
| 115 |
+
"err": "Please install gcsfs to access Google Storage",
|
| 116 |
+
},
|
| 117 |
+
"gdrive": {
|
| 118 |
+
"class": "gdrivefs.GoogleDriveFileSystem",
|
| 119 |
+
"err": "Please install gdrivefs for access to Google Drive",
|
| 120 |
+
},
|
| 121 |
+
"generic": {"class": "fsspec.generic.GenericFileSystem"},
|
| 122 |
+
"git": {
|
| 123 |
+
"class": "fsspec.implementations.git.GitFileSystem",
|
| 124 |
+
"err": "Install pygit2 to browse local git repos",
|
| 125 |
+
},
|
| 126 |
+
"github": {
|
| 127 |
+
"class": "fsspec.implementations.github.GithubFileSystem",
|
| 128 |
+
"err": "Install the requests package to use the github FS",
|
| 129 |
+
},
|
| 130 |
+
"gs": {
|
| 131 |
+
"class": "gcsfs.GCSFileSystem",
|
| 132 |
+
"err": "Please install gcsfs to access Google Storage",
|
| 133 |
+
},
|
| 134 |
+
"hdfs": {
|
| 135 |
+
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
| 136 |
+
"err": "pyarrow and local java libraries required for HDFS",
|
| 137 |
+
},
|
| 138 |
+
"hf": {
|
| 139 |
+
"class": "huggingface_hub.HfFileSystem",
|
| 140 |
+
"err": "Install huggingface_hub to access HfFileSystem",
|
| 141 |
+
},
|
| 142 |
+
"http": {
|
| 143 |
+
"class": "fsspec.implementations.http.HTTPFileSystem",
|
| 144 |
+
"err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
|
| 145 |
+
},
|
| 146 |
+
"https": {
|
| 147 |
+
"class": "fsspec.implementations.http.HTTPFileSystem",
|
| 148 |
+
"err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
|
| 149 |
+
},
|
| 150 |
+
"jlab": {
|
| 151 |
+
"class": "fsspec.implementations.jupyter.JupyterFileSystem",
|
| 152 |
+
"err": "Jupyter FS requires requests to be installed",
|
| 153 |
+
},
|
| 154 |
+
"jupyter": {
|
| 155 |
+
"class": "fsspec.implementations.jupyter.JupyterFileSystem",
|
| 156 |
+
"err": "Jupyter FS requires requests to be installed",
|
| 157 |
+
},
|
| 158 |
+
"lakefs": {
|
| 159 |
+
"class": "lakefs_spec.LakeFSFileSystem",
|
| 160 |
+
"err": "Please install lakefs-spec to access LakeFSFileSystem",
|
| 161 |
+
},
|
| 162 |
+
"libarchive": {
|
| 163 |
+
"class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
|
| 164 |
+
"err": "LibArchive requires to be installed",
|
| 165 |
+
},
|
| 166 |
+
"local": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
| 167 |
+
"memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
|
| 168 |
+
"oci": {
|
| 169 |
+
"class": "ocifs.OCIFileSystem",
|
| 170 |
+
"err": "Install ocifs to access OCI Object Storage",
|
| 171 |
+
},
|
| 172 |
+
"ocilake": {
|
| 173 |
+
"class": "ocifs.OCIFileSystem",
|
| 174 |
+
"err": "Install ocifs to access OCI Data Lake",
|
| 175 |
+
},
|
| 176 |
+
"oss": {
|
| 177 |
+
"class": "ossfs.OSSFileSystem",
|
| 178 |
+
"err": "Install ossfs to access Alibaba Object Storage System",
|
| 179 |
+
},
|
| 180 |
+
"reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
|
| 181 |
+
"root": {
|
| 182 |
+
"class": "fsspec_xrootd.XRootDFileSystem",
|
| 183 |
+
"err": (
|
| 184 |
+
"Install fsspec-xrootd to access xrootd storage system. "
|
| 185 |
+
"Note: 'root' is the protocol name for xrootd storage systems, "
|
| 186 |
+
"not referring to root directories"
|
| 187 |
+
),
|
| 188 |
+
},
|
| 189 |
+
"s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
|
| 190 |
+
"s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
|
| 191 |
+
"sftp": {
|
| 192 |
+
"class": "fsspec.implementations.sftp.SFTPFileSystem",
|
| 193 |
+
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
| 194 |
+
},
|
| 195 |
+
"simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
|
| 196 |
+
"smb": {
|
| 197 |
+
"class": "fsspec.implementations.smb.SMBFileSystem",
|
| 198 |
+
"err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
|
| 199 |
+
},
|
| 200 |
+
"ssh": {
|
| 201 |
+
"class": "fsspec.implementations.sftp.SFTPFileSystem",
|
| 202 |
+
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
| 203 |
+
},
|
| 204 |
+
"tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
|
| 205 |
+
"wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
|
| 206 |
+
"webdav": {
|
| 207 |
+
"class": "webdav4.fsspec.WebdavFileSystem",
|
| 208 |
+
"err": "Install webdav4 to access WebDAV",
|
| 209 |
+
},
|
| 210 |
+
"webhdfs": {
|
| 211 |
+
"class": "fsspec.implementations.webhdfs.WebHDFS",
|
| 212 |
+
"err": 'webHDFS access requires "requests" to be installed',
|
| 213 |
+
},
|
| 214 |
+
"zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
assert list(known_implementations) == sorted(
|
| 218 |
+
known_implementations
|
| 219 |
+
), "Not in alphabetical order"
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def get_filesystem_class(protocol):
|
| 223 |
+
"""Fetch named protocol implementation from the registry
|
| 224 |
+
|
| 225 |
+
The dict ``known_implementations`` maps protocol names to the locations
|
| 226 |
+
of classes implementing the corresponding file-system. When used for the
|
| 227 |
+
first time, appropriate imports will happen and the class will be placed in
|
| 228 |
+
the registry. All subsequent calls will fetch directly from the registry.
|
| 229 |
+
|
| 230 |
+
Some protocol implementations require additional dependencies, and so the
|
| 231 |
+
import may fail. In this case, the string in the "err" field of the
|
| 232 |
+
``known_implementations`` will be given as the error message.
|
| 233 |
+
"""
|
| 234 |
+
if not protocol:
|
| 235 |
+
protocol = default
|
| 236 |
+
|
| 237 |
+
if protocol not in registry:
|
| 238 |
+
if protocol not in known_implementations:
|
| 239 |
+
raise ValueError(f"Protocol not known: {protocol}")
|
| 240 |
+
bit = known_implementations[protocol]
|
| 241 |
+
try:
|
| 242 |
+
register_implementation(protocol, _import_class(bit["class"]))
|
| 243 |
+
except ImportError as e:
|
| 244 |
+
raise ImportError(bit["err"]) from e
|
| 245 |
+
cls = registry[protocol]
|
| 246 |
+
if getattr(cls, "protocol", None) in ("abstract", None):
|
| 247 |
+
cls.protocol = protocol
|
| 248 |
+
|
| 249 |
+
return cls
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
s3_msg = """Your installed version of s3fs is very old and known to cause
|
| 253 |
+
severe performance issues, see also https://github.com/dask/dask/issues/10276
|
| 254 |
+
|
| 255 |
+
To fix, you should specify a lower version bound on s3fs, or
|
| 256 |
+
update the current installation.
|
| 257 |
+
"""
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def _import_class(cls, minv=None):
|
| 261 |
+
"""Take a string FQP and return the imported class or identifier
|
| 262 |
+
|
| 263 |
+
cls is of the form "package.module.klass" or "package.module:subobject.klass"
|
| 264 |
+
"""
|
| 265 |
+
if ":" in cls:
|
| 266 |
+
mod, name = cls.rsplit(":", 1)
|
| 267 |
+
s3 = mod == "s3fs"
|
| 268 |
+
mod = importlib.import_module(mod)
|
| 269 |
+
if s3 and mod.__version__.split(".") < ["0", "5"]:
|
| 270 |
+
warnings.warn(s3_msg)
|
| 271 |
+
for part in name.split("."):
|
| 272 |
+
mod = getattr(mod, part)
|
| 273 |
+
return mod
|
| 274 |
+
else:
|
| 275 |
+
mod, name = cls.rsplit(".", 1)
|
| 276 |
+
s3 = mod == "s3fs"
|
| 277 |
+
mod = importlib.import_module(mod)
|
| 278 |
+
if s3 and mod.__version__.split(".") < ["0", "5"]:
|
| 279 |
+
warnings.warn(s3_msg)
|
| 280 |
+
return getattr(mod, name)
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
def filesystem(protocol, **storage_options):
|
| 284 |
+
"""Instantiate filesystems for given protocol and arguments
|
| 285 |
+
|
| 286 |
+
``storage_options`` are specific to the protocol being chosen, and are
|
| 287 |
+
passed directly to the class.
|
| 288 |
+
"""
|
| 289 |
+
if protocol == "arrow_hdfs":
|
| 290 |
+
warnings.warn(
|
| 291 |
+
"The 'arrow_hdfs' protocol has been deprecated and will be "
|
| 292 |
+
"removed in the future. Specify it as 'hdfs'.",
|
| 293 |
+
DeprecationWarning,
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
cls = get_filesystem_class(protocol)
|
| 297 |
+
return cls(**storage_options)
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
def available_protocols():
|
| 301 |
+
"""Return a list of the implemented protocols.
|
| 302 |
+
|
| 303 |
+
Note that any given protocol may require extra packages to be importable.
|
| 304 |
+
"""
|
| 305 |
+
return list(known_implementations)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/transaction.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import deque
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Transaction:
|
| 5 |
+
"""Filesystem transaction write context
|
| 6 |
+
|
| 7 |
+
Gathers files for deferred commit or discard, so that several write
|
| 8 |
+
operations can be finalized semi-atomically. This works by having this
|
| 9 |
+
instance as the ``.transaction`` attribute of the given filesystem
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, fs, **kwargs):
|
| 13 |
+
"""
|
| 14 |
+
Parameters
|
| 15 |
+
----------
|
| 16 |
+
fs: FileSystem instance
|
| 17 |
+
"""
|
| 18 |
+
self.fs = fs
|
| 19 |
+
self.files = deque()
|
| 20 |
+
|
| 21 |
+
def __enter__(self):
|
| 22 |
+
self.start()
|
| 23 |
+
return self
|
| 24 |
+
|
| 25 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 26 |
+
"""End transaction and commit, if exit is not due to exception"""
|
| 27 |
+
# only commit if there was no exception
|
| 28 |
+
self.complete(commit=exc_type is None)
|
| 29 |
+
if self.fs:
|
| 30 |
+
self.fs._intrans = False
|
| 31 |
+
self.fs._transaction = None
|
| 32 |
+
self.fs = None
|
| 33 |
+
|
| 34 |
+
def start(self):
|
| 35 |
+
"""Start a transaction on this FileSystem"""
|
| 36 |
+
self.files = deque() # clean up after previous failed completions
|
| 37 |
+
self.fs._intrans = True
|
| 38 |
+
|
| 39 |
+
def complete(self, commit=True):
|
| 40 |
+
"""Finish transaction: commit or discard all deferred files"""
|
| 41 |
+
while self.files:
|
| 42 |
+
f = self.files.popleft()
|
| 43 |
+
if commit:
|
| 44 |
+
f.commit()
|
| 45 |
+
else:
|
| 46 |
+
f.discard()
|
| 47 |
+
self.fs._intrans = False
|
| 48 |
+
self.fs._transaction = None
|
| 49 |
+
self.fs = None
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class FileActor:
|
| 53 |
+
def __init__(self):
|
| 54 |
+
self.files = []
|
| 55 |
+
|
| 56 |
+
def commit(self):
|
| 57 |
+
for f in self.files:
|
| 58 |
+
f.commit()
|
| 59 |
+
self.files.clear()
|
| 60 |
+
|
| 61 |
+
def discard(self):
|
| 62 |
+
for f in self.files:
|
| 63 |
+
f.discard()
|
| 64 |
+
self.files.clear()
|
| 65 |
+
|
| 66 |
+
def append(self, f):
|
| 67 |
+
self.files.append(f)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class DaskTransaction(Transaction):
|
| 71 |
+
def __init__(self, fs):
|
| 72 |
+
"""
|
| 73 |
+
Parameters
|
| 74 |
+
----------
|
| 75 |
+
fs: FileSystem instance
|
| 76 |
+
"""
|
| 77 |
+
import distributed
|
| 78 |
+
|
| 79 |
+
super().__init__(fs)
|
| 80 |
+
client = distributed.default_client()
|
| 81 |
+
self.files = client.submit(FileActor, actor=True).result()
|
| 82 |
+
|
| 83 |
+
def complete(self, commit=True):
|
| 84 |
+
"""Finish transaction: commit or discard all deferred files"""
|
| 85 |
+
if commit:
|
| 86 |
+
self.files.commit().result()
|
| 87 |
+
else:
|
| 88 |
+
self.files.discard().result()
|
| 89 |
+
self.fs._intrans = False
|
| 90 |
+
self.fs = None
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/utils.py
ADDED
|
@@ -0,0 +1,740 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import contextlib
|
| 4 |
+
import logging
|
| 5 |
+
import math
|
| 6 |
+
import os
|
| 7 |
+
import pathlib
|
| 8 |
+
import re
|
| 9 |
+
import sys
|
| 10 |
+
import tempfile
|
| 11 |
+
from functools import partial
|
| 12 |
+
from hashlib import md5
|
| 13 |
+
from importlib.metadata import version
|
| 14 |
+
from typing import (
|
| 15 |
+
IO,
|
| 16 |
+
TYPE_CHECKING,
|
| 17 |
+
Any,
|
| 18 |
+
Callable,
|
| 19 |
+
Iterable,
|
| 20 |
+
Iterator,
|
| 21 |
+
Sequence,
|
| 22 |
+
TypeVar,
|
| 23 |
+
)
|
| 24 |
+
from urllib.parse import urlsplit
|
| 25 |
+
|
| 26 |
+
if TYPE_CHECKING:
|
| 27 |
+
from typing_extensions import TypeGuard
|
| 28 |
+
|
| 29 |
+
from fsspec.spec import AbstractFileSystem
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
DEFAULT_BLOCK_SIZE = 5 * 2**20
|
| 33 |
+
|
| 34 |
+
T = TypeVar("T")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def infer_storage_options(
|
| 38 |
+
urlpath: str, inherit_storage_options: dict[str, Any] | None = None
|
| 39 |
+
) -> dict[str, Any]:
|
| 40 |
+
"""Infer storage options from URL path and merge it with existing storage
|
| 41 |
+
options.
|
| 42 |
+
|
| 43 |
+
Parameters
|
| 44 |
+
----------
|
| 45 |
+
urlpath: str or unicode
|
| 46 |
+
Either local absolute file path or URL (hdfs://namenode:8020/file.csv)
|
| 47 |
+
inherit_storage_options: dict (optional)
|
| 48 |
+
Its contents will get merged with the inferred information from the
|
| 49 |
+
given path
|
| 50 |
+
|
| 51 |
+
Returns
|
| 52 |
+
-------
|
| 53 |
+
Storage options dict.
|
| 54 |
+
|
| 55 |
+
Examples
|
| 56 |
+
--------
|
| 57 |
+
>>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP
|
| 58 |
+
{"protocol": "file", "path", "/mnt/datasets/test.csv"}
|
| 59 |
+
>>> infer_storage_options(
|
| 60 |
+
... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1',
|
| 61 |
+
... inherit_storage_options={'extra': 'value'},
|
| 62 |
+
... ) # doctest: +SKIP
|
| 63 |
+
{"protocol": "hdfs", "username": "username", "password": "pwd",
|
| 64 |
+
"host": "node", "port": 123, "path": "/mnt/datasets/test.csv",
|
| 65 |
+
"url_query": "q=1", "extra": "value"}
|
| 66 |
+
"""
|
| 67 |
+
# Handle Windows paths including disk name in this special case
|
| 68 |
+
if (
|
| 69 |
+
re.match(r"^[a-zA-Z]:[\\/]", urlpath)
|
| 70 |
+
or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None
|
| 71 |
+
):
|
| 72 |
+
return {"protocol": "file", "path": urlpath}
|
| 73 |
+
|
| 74 |
+
parsed_path = urlsplit(urlpath)
|
| 75 |
+
protocol = parsed_path.scheme or "file"
|
| 76 |
+
if parsed_path.fragment:
|
| 77 |
+
path = "#".join([parsed_path.path, parsed_path.fragment])
|
| 78 |
+
else:
|
| 79 |
+
path = parsed_path.path
|
| 80 |
+
if protocol == "file":
|
| 81 |
+
# Special case parsing file protocol URL on Windows according to:
|
| 82 |
+
# https://msdn.microsoft.com/en-us/library/jj710207.aspx
|
| 83 |
+
windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
|
| 84 |
+
if windows_path:
|
| 85 |
+
path = "%s:%s" % windows_path.groups()
|
| 86 |
+
|
| 87 |
+
if protocol in ["http", "https"]:
|
| 88 |
+
# for HTTP, we don't want to parse, as requests will anyway
|
| 89 |
+
return {"protocol": protocol, "path": urlpath}
|
| 90 |
+
|
| 91 |
+
options: dict[str, Any] = {"protocol": protocol, "path": path}
|
| 92 |
+
|
| 93 |
+
if parsed_path.netloc:
|
| 94 |
+
# Parse `hostname` from netloc manually because `parsed_path.hostname`
|
| 95 |
+
# lowercases the hostname which is not always desirable (e.g. in S3):
|
| 96 |
+
# https://github.com/dask/dask/issues/1417
|
| 97 |
+
options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
|
| 98 |
+
|
| 99 |
+
if protocol in ("s3", "s3a", "gcs", "gs"):
|
| 100 |
+
options["path"] = options["host"] + options["path"]
|
| 101 |
+
else:
|
| 102 |
+
options["host"] = options["host"]
|
| 103 |
+
if parsed_path.port:
|
| 104 |
+
options["port"] = parsed_path.port
|
| 105 |
+
if parsed_path.username:
|
| 106 |
+
options["username"] = parsed_path.username
|
| 107 |
+
if parsed_path.password:
|
| 108 |
+
options["password"] = parsed_path.password
|
| 109 |
+
|
| 110 |
+
if parsed_path.query:
|
| 111 |
+
options["url_query"] = parsed_path.query
|
| 112 |
+
if parsed_path.fragment:
|
| 113 |
+
options["url_fragment"] = parsed_path.fragment
|
| 114 |
+
|
| 115 |
+
if inherit_storage_options:
|
| 116 |
+
update_storage_options(options, inherit_storage_options)
|
| 117 |
+
|
| 118 |
+
return options
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def update_storage_options(
|
| 122 |
+
options: dict[str, Any], inherited: dict[str, Any] | None = None
|
| 123 |
+
) -> None:
|
| 124 |
+
if not inherited:
|
| 125 |
+
inherited = {}
|
| 126 |
+
collisions = set(options) & set(inherited)
|
| 127 |
+
if collisions:
|
| 128 |
+
for collision in collisions:
|
| 129 |
+
if options.get(collision) != inherited.get(collision):
|
| 130 |
+
raise KeyError(
|
| 131 |
+
f"Collision between inferred and specified storage "
|
| 132 |
+
f"option:\n{collision}"
|
| 133 |
+
)
|
| 134 |
+
options.update(inherited)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# Compression extensions registered via fsspec.compression.register_compression
|
| 138 |
+
compressions: dict[str, str] = {}
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def infer_compression(filename: str) -> str | None:
|
| 142 |
+
"""Infer compression, if available, from filename.
|
| 143 |
+
|
| 144 |
+
Infer a named compression type, if registered and available, from filename
|
| 145 |
+
extension. This includes builtin (gz, bz2, zip) compressions, as well as
|
| 146 |
+
optional compressions. See fsspec.compression.register_compression.
|
| 147 |
+
"""
|
| 148 |
+
extension = os.path.splitext(filename)[-1].strip(".").lower()
|
| 149 |
+
if extension in compressions:
|
| 150 |
+
return compressions[extension]
|
| 151 |
+
return None
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def build_name_function(max_int: float) -> Callable[[int], str]:
|
| 155 |
+
"""Returns a function that receives a single integer
|
| 156 |
+
and returns it as a string padded by enough zero characters
|
| 157 |
+
to align with maximum possible integer
|
| 158 |
+
|
| 159 |
+
>>> name_f = build_name_function(57)
|
| 160 |
+
|
| 161 |
+
>>> name_f(7)
|
| 162 |
+
'07'
|
| 163 |
+
>>> name_f(31)
|
| 164 |
+
'31'
|
| 165 |
+
>>> build_name_function(1000)(42)
|
| 166 |
+
'0042'
|
| 167 |
+
>>> build_name_function(999)(42)
|
| 168 |
+
'042'
|
| 169 |
+
>>> build_name_function(0)(0)
|
| 170 |
+
'0'
|
| 171 |
+
"""
|
| 172 |
+
# handle corner cases max_int is 0 or exact power of 10
|
| 173 |
+
max_int += 1e-8
|
| 174 |
+
|
| 175 |
+
pad_length = int(math.ceil(math.log10(max_int)))
|
| 176 |
+
|
| 177 |
+
def name_function(i: int) -> str:
|
| 178 |
+
return str(i).zfill(pad_length)
|
| 179 |
+
|
| 180 |
+
return name_function
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
|
| 184 |
+
r"""Seek current file to file start, file end, or byte after delimiter seq.
|
| 185 |
+
|
| 186 |
+
Seeks file to next chunk delimiter, where chunks are defined on file start,
|
| 187 |
+
a delimiting sequence, and file end. Use file.tell() to see location afterwards.
|
| 188 |
+
Note that file start is a valid split, so must be at offset > 0 to seek for
|
| 189 |
+
delimiter.
|
| 190 |
+
|
| 191 |
+
Parameters
|
| 192 |
+
----------
|
| 193 |
+
file: a file
|
| 194 |
+
delimiter: bytes
|
| 195 |
+
a delimiter like ``b'\n'`` or message sentinel, matching file .read() type
|
| 196 |
+
blocksize: int
|
| 197 |
+
Number of bytes to read from the file at once.
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
Returns
|
| 201 |
+
-------
|
| 202 |
+
Returns True if a delimiter was found, False if at file start or end.
|
| 203 |
+
|
| 204 |
+
"""
|
| 205 |
+
|
| 206 |
+
if file.tell() == 0:
|
| 207 |
+
# beginning-of-file, return without seek
|
| 208 |
+
return False
|
| 209 |
+
|
| 210 |
+
# Interface is for binary IO, with delimiter as bytes, but initialize last
|
| 211 |
+
# with result of file.read to preserve compatibility with text IO.
|
| 212 |
+
last: bytes | None = None
|
| 213 |
+
while True:
|
| 214 |
+
current = file.read(blocksize)
|
| 215 |
+
if not current:
|
| 216 |
+
# end-of-file without delimiter
|
| 217 |
+
return False
|
| 218 |
+
full = last + current if last else current
|
| 219 |
+
try:
|
| 220 |
+
if delimiter in full:
|
| 221 |
+
i = full.index(delimiter)
|
| 222 |
+
file.seek(file.tell() - (len(full) - i) + len(delimiter))
|
| 223 |
+
return True
|
| 224 |
+
elif len(current) < blocksize:
|
| 225 |
+
# end-of-file without delimiter
|
| 226 |
+
return False
|
| 227 |
+
except (OSError, ValueError):
|
| 228 |
+
pass
|
| 229 |
+
last = full[-len(delimiter) :]
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def read_block(
|
| 233 |
+
f: IO[bytes],
|
| 234 |
+
offset: int,
|
| 235 |
+
length: int | None,
|
| 236 |
+
delimiter: bytes | None = None,
|
| 237 |
+
split_before: bool = False,
|
| 238 |
+
) -> bytes:
|
| 239 |
+
"""Read a block of bytes from a file
|
| 240 |
+
|
| 241 |
+
Parameters
|
| 242 |
+
----------
|
| 243 |
+
f: File
|
| 244 |
+
Open file
|
| 245 |
+
offset: int
|
| 246 |
+
Byte offset to start read
|
| 247 |
+
length: int
|
| 248 |
+
Number of bytes to read, read through end of file if None
|
| 249 |
+
delimiter: bytes (optional)
|
| 250 |
+
Ensure reading starts and stops at delimiter bytestring
|
| 251 |
+
split_before: bool (optional)
|
| 252 |
+
Start/stop read *before* delimiter bytestring.
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
If using the ``delimiter=`` keyword argument we ensure that the read
|
| 256 |
+
starts and stops at delimiter boundaries that follow the locations
|
| 257 |
+
``offset`` and ``offset + length``. If ``offset`` is zero then we
|
| 258 |
+
start at zero, regardless of delimiter. The bytestring returned WILL
|
| 259 |
+
include the terminating delimiter string.
|
| 260 |
+
|
| 261 |
+
Examples
|
| 262 |
+
--------
|
| 263 |
+
|
| 264 |
+
>>> from io import BytesIO # doctest: +SKIP
|
| 265 |
+
>>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP
|
| 266 |
+
>>> read_block(f, 0, 13) # doctest: +SKIP
|
| 267 |
+
b'Alice, 100\\nBo'
|
| 268 |
+
|
| 269 |
+
>>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP
|
| 270 |
+
b'Alice, 100\\nBob, 200\\n'
|
| 271 |
+
|
| 272 |
+
>>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP
|
| 273 |
+
b'Bob, 200\\nCharlie, 300'
|
| 274 |
+
"""
|
| 275 |
+
if delimiter:
|
| 276 |
+
f.seek(offset)
|
| 277 |
+
found_start_delim = seek_delimiter(f, delimiter, 2**16)
|
| 278 |
+
if length is None:
|
| 279 |
+
return f.read()
|
| 280 |
+
start = f.tell()
|
| 281 |
+
length -= start - offset
|
| 282 |
+
|
| 283 |
+
f.seek(start + length)
|
| 284 |
+
found_end_delim = seek_delimiter(f, delimiter, 2**16)
|
| 285 |
+
end = f.tell()
|
| 286 |
+
|
| 287 |
+
# Adjust split location to before delimiter if seek found the
|
| 288 |
+
# delimiter sequence, not start or end of file.
|
| 289 |
+
if found_start_delim and split_before:
|
| 290 |
+
start -= len(delimiter)
|
| 291 |
+
|
| 292 |
+
if found_end_delim and split_before:
|
| 293 |
+
end -= len(delimiter)
|
| 294 |
+
|
| 295 |
+
offset = start
|
| 296 |
+
length = end - start
|
| 297 |
+
|
| 298 |
+
f.seek(offset)
|
| 299 |
+
|
| 300 |
+
# TODO: allow length to be None and read to the end of the file?
|
| 301 |
+
assert length is not None
|
| 302 |
+
b = f.read(length)
|
| 303 |
+
return b
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def tokenize(*args: Any, **kwargs: Any) -> str:
|
| 307 |
+
"""Deterministic token
|
| 308 |
+
|
| 309 |
+
(modified from dask.base)
|
| 310 |
+
|
| 311 |
+
>>> tokenize([1, 2, '3'])
|
| 312 |
+
'9d71491b50023b06fc76928e6eddb952'
|
| 313 |
+
|
| 314 |
+
>>> tokenize('Hello') == tokenize('Hello')
|
| 315 |
+
True
|
| 316 |
+
"""
|
| 317 |
+
if kwargs:
|
| 318 |
+
args += (kwargs,)
|
| 319 |
+
try:
|
| 320 |
+
h = md5(str(args).encode())
|
| 321 |
+
except ValueError:
|
| 322 |
+
# FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
|
| 323 |
+
h = md5(str(args).encode(), usedforsecurity=False)
|
| 324 |
+
return h.hexdigest()
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
|
| 328 |
+
"""Attempt to convert a path-like object to a string.
|
| 329 |
+
|
| 330 |
+
Parameters
|
| 331 |
+
----------
|
| 332 |
+
filepath: object to be converted
|
| 333 |
+
|
| 334 |
+
Returns
|
| 335 |
+
-------
|
| 336 |
+
filepath_str: maybe a string version of the object
|
| 337 |
+
|
| 338 |
+
Notes
|
| 339 |
+
-----
|
| 340 |
+
Objects supporting the fspath protocol are coerced according to its
|
| 341 |
+
__fspath__ method.
|
| 342 |
+
|
| 343 |
+
For backwards compatibility with older Python version, pathlib.Path
|
| 344 |
+
objects are specially coerced.
|
| 345 |
+
|
| 346 |
+
Any other object is passed through unchanged, which includes bytes,
|
| 347 |
+
strings, buffers, or anything else that's not even path-like.
|
| 348 |
+
"""
|
| 349 |
+
if isinstance(filepath, str):
|
| 350 |
+
return filepath
|
| 351 |
+
elif hasattr(filepath, "__fspath__"):
|
| 352 |
+
return filepath.__fspath__()
|
| 353 |
+
elif hasattr(filepath, "path"):
|
| 354 |
+
return filepath.path
|
| 355 |
+
else:
|
| 356 |
+
return filepath # type: ignore[return-value]
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
def make_instance(
|
| 360 |
+
cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
|
| 361 |
+
) -> T:
|
| 362 |
+
inst = cls(*args, **kwargs)
|
| 363 |
+
inst._determine_worker() # type: ignore[attr-defined]
|
| 364 |
+
return inst
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def common_prefix(paths: Iterable[str]) -> str:
|
| 368 |
+
"""For a list of paths, find the shortest prefix common to all"""
|
| 369 |
+
parts = [p.split("/") for p in paths]
|
| 370 |
+
lmax = min(len(p) for p in parts)
|
| 371 |
+
end = 0
|
| 372 |
+
for i in range(lmax):
|
| 373 |
+
end = all(p[i] == parts[0][i] for p in parts)
|
| 374 |
+
if not end:
|
| 375 |
+
break
|
| 376 |
+
i += end
|
| 377 |
+
return "/".join(parts[0][:i])
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def other_paths(
|
| 381 |
+
paths: list[str],
|
| 382 |
+
path2: str | list[str],
|
| 383 |
+
exists: bool = False,
|
| 384 |
+
flatten: bool = False,
|
| 385 |
+
) -> list[str]:
|
| 386 |
+
"""In bulk file operations, construct a new file tree from a list of files
|
| 387 |
+
|
| 388 |
+
Parameters
|
| 389 |
+
----------
|
| 390 |
+
paths: list of str
|
| 391 |
+
The input file tree
|
| 392 |
+
path2: str or list of str
|
| 393 |
+
Root to construct the new list in. If this is already a list of str, we just
|
| 394 |
+
assert it has the right number of elements.
|
| 395 |
+
exists: bool (optional)
|
| 396 |
+
For a str destination, it is already exists (and is a dir), files should
|
| 397 |
+
end up inside.
|
| 398 |
+
flatten: bool (optional)
|
| 399 |
+
Whether to flatten the input directory tree structure so that the output files
|
| 400 |
+
are in the same directory.
|
| 401 |
+
|
| 402 |
+
Returns
|
| 403 |
+
-------
|
| 404 |
+
list of str
|
| 405 |
+
"""
|
| 406 |
+
|
| 407 |
+
if isinstance(path2, str):
|
| 408 |
+
path2 = path2.rstrip("/")
|
| 409 |
+
|
| 410 |
+
if flatten:
|
| 411 |
+
path2 = ["/".join((path2, p.split("/")[-1])) for p in paths]
|
| 412 |
+
else:
|
| 413 |
+
cp = common_prefix(paths)
|
| 414 |
+
if exists:
|
| 415 |
+
cp = cp.rsplit("/", 1)[0]
|
| 416 |
+
if not cp and all(not s.startswith("/") for s in paths):
|
| 417 |
+
path2 = ["/".join([path2, p]) for p in paths]
|
| 418 |
+
else:
|
| 419 |
+
path2 = [p.replace(cp, path2, 1) for p in paths]
|
| 420 |
+
else:
|
| 421 |
+
assert len(paths) == len(path2)
|
| 422 |
+
return path2
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
def is_exception(obj: Any) -> bool:
|
| 426 |
+
return isinstance(obj, BaseException)
|
| 427 |
+
|
| 428 |
+
|
| 429 |
+
def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
|
| 430 |
+
for attr in ["read", "close", "tell"]:
|
| 431 |
+
if not hasattr(f, attr):
|
| 432 |
+
return False
|
| 433 |
+
return True
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
def get_protocol(url: str) -> str:
|
| 437 |
+
url = stringify_path(url)
|
| 438 |
+
parts = re.split(r"(\:\:|\://)", url, 1)
|
| 439 |
+
if len(parts) > 1:
|
| 440 |
+
return parts[0]
|
| 441 |
+
return "file"
|
| 442 |
+
|
| 443 |
+
|
| 444 |
+
def can_be_local(path: str) -> bool:
|
| 445 |
+
"""Can the given URL be used with open_local?"""
|
| 446 |
+
from fsspec import get_filesystem_class
|
| 447 |
+
|
| 448 |
+
try:
|
| 449 |
+
return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
|
| 450 |
+
except (ValueError, ImportError):
|
| 451 |
+
# not in registry or import failed
|
| 452 |
+
return False
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
def get_package_version_without_import(name: str) -> str | None:
|
| 456 |
+
"""For given package name, try to find the version without importing it
|
| 457 |
+
|
| 458 |
+
Import and package.__version__ is still the backup here, so an import
|
| 459 |
+
*might* happen.
|
| 460 |
+
|
| 461 |
+
Returns either the version string, or None if the package
|
| 462 |
+
or the version was not readily found.
|
| 463 |
+
"""
|
| 464 |
+
if name in sys.modules:
|
| 465 |
+
mod = sys.modules[name]
|
| 466 |
+
if hasattr(mod, "__version__"):
|
| 467 |
+
return mod.__version__
|
| 468 |
+
try:
|
| 469 |
+
return version(name)
|
| 470 |
+
except: # noqa: E722
|
| 471 |
+
pass
|
| 472 |
+
try:
|
| 473 |
+
import importlib
|
| 474 |
+
|
| 475 |
+
mod = importlib.import_module(name)
|
| 476 |
+
return mod.__version__
|
| 477 |
+
except (ImportError, AttributeError):
|
| 478 |
+
return None
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
def setup_logging(
|
| 482 |
+
logger: logging.Logger | None = None,
|
| 483 |
+
logger_name: str | None = None,
|
| 484 |
+
level: str = "DEBUG",
|
| 485 |
+
clear: bool = True,
|
| 486 |
+
) -> logging.Logger:
|
| 487 |
+
if logger is None and logger_name is None:
|
| 488 |
+
raise ValueError("Provide either logger object or logger name")
|
| 489 |
+
logger = logger or logging.getLogger(logger_name)
|
| 490 |
+
handle = logging.StreamHandler()
|
| 491 |
+
formatter = logging.Formatter(
|
| 492 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s"
|
| 493 |
+
)
|
| 494 |
+
handle.setFormatter(formatter)
|
| 495 |
+
if clear:
|
| 496 |
+
logger.handlers.clear()
|
| 497 |
+
logger.addHandler(handle)
|
| 498 |
+
logger.setLevel(level)
|
| 499 |
+
return logger
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
|
| 503 |
+
return fs.unstrip_protocol(name)
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
def mirror_from(
|
| 507 |
+
origin_name: str, methods: Iterable[str]
|
| 508 |
+
) -> Callable[[type[T]], type[T]]:
|
| 509 |
+
"""Mirror attributes and methods from the given
|
| 510 |
+
origin_name attribute of the instance to the
|
| 511 |
+
decorated class"""
|
| 512 |
+
|
| 513 |
+
def origin_getter(method: str, self: Any) -> Any:
|
| 514 |
+
origin = getattr(self, origin_name)
|
| 515 |
+
return getattr(origin, method)
|
| 516 |
+
|
| 517 |
+
def wrapper(cls: type[T]) -> type[T]:
|
| 518 |
+
for method in methods:
|
| 519 |
+
wrapped_method = partial(origin_getter, method)
|
| 520 |
+
setattr(cls, method, property(wrapped_method))
|
| 521 |
+
return cls
|
| 522 |
+
|
| 523 |
+
return wrapper
|
| 524 |
+
|
| 525 |
+
|
| 526 |
+
@contextlib.contextmanager
|
| 527 |
+
def nullcontext(obj: T) -> Iterator[T]:
|
| 528 |
+
yield obj
|
| 529 |
+
|
| 530 |
+
|
| 531 |
+
def merge_offset_ranges(
|
| 532 |
+
paths: list[str],
|
| 533 |
+
starts: list[int] | int,
|
| 534 |
+
ends: list[int] | int,
|
| 535 |
+
max_gap: int = 0,
|
| 536 |
+
max_block: int | None = None,
|
| 537 |
+
sort: bool = True,
|
| 538 |
+
) -> tuple[list[str], list[int], list[int]]:
|
| 539 |
+
"""Merge adjacent byte-offset ranges when the inter-range
|
| 540 |
+
gap is <= `max_gap`, and when the merged byte range does not
|
| 541 |
+
exceed `max_block` (if specified). By default, this function
|
| 542 |
+
will re-order the input paths and byte ranges to ensure sorted
|
| 543 |
+
order. If the user can guarantee that the inputs are already
|
| 544 |
+
sorted, passing `sort=False` will skip the re-ordering.
|
| 545 |
+
"""
|
| 546 |
+
# Check input
|
| 547 |
+
if not isinstance(paths, list):
|
| 548 |
+
raise TypeError
|
| 549 |
+
if not isinstance(starts, list):
|
| 550 |
+
starts = [starts] * len(paths)
|
| 551 |
+
if not isinstance(ends, list):
|
| 552 |
+
ends = [ends] * len(paths)
|
| 553 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
| 554 |
+
raise ValueError
|
| 555 |
+
|
| 556 |
+
# Early Return
|
| 557 |
+
if len(starts) <= 1:
|
| 558 |
+
return paths, starts, ends
|
| 559 |
+
|
| 560 |
+
starts = [s or 0 for s in starts]
|
| 561 |
+
# Sort by paths and then ranges if `sort=True`
|
| 562 |
+
if sort:
|
| 563 |
+
paths, starts, ends = (
|
| 564 |
+
list(v)
|
| 565 |
+
for v in zip(
|
| 566 |
+
*sorted(
|
| 567 |
+
zip(paths, starts, ends),
|
| 568 |
+
)
|
| 569 |
+
)
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
if paths:
|
| 573 |
+
# Loop through the coupled `paths`, `starts`, and
|
| 574 |
+
# `ends`, and merge adjacent blocks when appropriate
|
| 575 |
+
new_paths = paths[:1]
|
| 576 |
+
new_starts = starts[:1]
|
| 577 |
+
new_ends = ends[:1]
|
| 578 |
+
for i in range(1, len(paths)):
|
| 579 |
+
if paths[i] == paths[i - 1] and new_ends[-1] is None:
|
| 580 |
+
continue
|
| 581 |
+
elif (
|
| 582 |
+
paths[i] != paths[i - 1]
|
| 583 |
+
or ((starts[i] - new_ends[-1]) > max_gap)
|
| 584 |
+
or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
|
| 585 |
+
):
|
| 586 |
+
# Cannot merge with previous block.
|
| 587 |
+
# Add new `paths`, `starts`, and `ends` elements
|
| 588 |
+
new_paths.append(paths[i])
|
| 589 |
+
new_starts.append(starts[i])
|
| 590 |
+
new_ends.append(ends[i])
|
| 591 |
+
else:
|
| 592 |
+
# Merge with previous block by updating the
|
| 593 |
+
# last element of `ends`
|
| 594 |
+
new_ends[-1] = ends[i]
|
| 595 |
+
return new_paths, new_starts, new_ends
|
| 596 |
+
|
| 597 |
+
# `paths` is empty. Just return input lists
|
| 598 |
+
return paths, starts, ends
|
| 599 |
+
|
| 600 |
+
|
| 601 |
+
def file_size(filelike: IO[bytes]) -> int:
|
| 602 |
+
"""Find length of any open read-mode file-like"""
|
| 603 |
+
pos = filelike.tell()
|
| 604 |
+
try:
|
| 605 |
+
return filelike.seek(0, 2)
|
| 606 |
+
finally:
|
| 607 |
+
filelike.seek(pos)
|
| 608 |
+
|
| 609 |
+
|
| 610 |
+
@contextlib.contextmanager
|
| 611 |
+
def atomic_write(path: str, mode: str = "wb"):
|
| 612 |
+
"""
|
| 613 |
+
A context manager that opens a temporary file next to `path` and, on exit,
|
| 614 |
+
replaces `path` with the temporary file, thereby updating `path`
|
| 615 |
+
atomically.
|
| 616 |
+
"""
|
| 617 |
+
fd, fn = tempfile.mkstemp(
|
| 618 |
+
dir=os.path.dirname(path), prefix=os.path.basename(path) + "-"
|
| 619 |
+
)
|
| 620 |
+
try:
|
| 621 |
+
with open(fd, mode) as fp:
|
| 622 |
+
yield fp
|
| 623 |
+
except BaseException:
|
| 624 |
+
with contextlib.suppress(FileNotFoundError):
|
| 625 |
+
os.unlink(fn)
|
| 626 |
+
raise
|
| 627 |
+
else:
|
| 628 |
+
os.replace(fn, path)
|
| 629 |
+
|
| 630 |
+
|
| 631 |
+
def _translate(pat, STAR, QUESTION_MARK):
|
| 632 |
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
| 633 |
+
res: list[str] = []
|
| 634 |
+
add = res.append
|
| 635 |
+
i, n = 0, len(pat)
|
| 636 |
+
while i < n:
|
| 637 |
+
c = pat[i]
|
| 638 |
+
i = i + 1
|
| 639 |
+
if c == "*":
|
| 640 |
+
# compress consecutive `*` into one
|
| 641 |
+
if (not res) or res[-1] is not STAR:
|
| 642 |
+
add(STAR)
|
| 643 |
+
elif c == "?":
|
| 644 |
+
add(QUESTION_MARK)
|
| 645 |
+
elif c == "[":
|
| 646 |
+
j = i
|
| 647 |
+
if j < n and pat[j] == "!":
|
| 648 |
+
j = j + 1
|
| 649 |
+
if j < n and pat[j] == "]":
|
| 650 |
+
j = j + 1
|
| 651 |
+
while j < n and pat[j] != "]":
|
| 652 |
+
j = j + 1
|
| 653 |
+
if j >= n:
|
| 654 |
+
add("\\[")
|
| 655 |
+
else:
|
| 656 |
+
stuff = pat[i:j]
|
| 657 |
+
if "-" not in stuff:
|
| 658 |
+
stuff = stuff.replace("\\", r"\\")
|
| 659 |
+
else:
|
| 660 |
+
chunks = []
|
| 661 |
+
k = i + 2 if pat[i] == "!" else i + 1
|
| 662 |
+
while True:
|
| 663 |
+
k = pat.find("-", k, j)
|
| 664 |
+
if k < 0:
|
| 665 |
+
break
|
| 666 |
+
chunks.append(pat[i:k])
|
| 667 |
+
i = k + 1
|
| 668 |
+
k = k + 3
|
| 669 |
+
chunk = pat[i:j]
|
| 670 |
+
if chunk:
|
| 671 |
+
chunks.append(chunk)
|
| 672 |
+
else:
|
| 673 |
+
chunks[-1] += "-"
|
| 674 |
+
# Remove empty ranges -- invalid in RE.
|
| 675 |
+
for k in range(len(chunks) - 1, 0, -1):
|
| 676 |
+
if chunks[k - 1][-1] > chunks[k][0]:
|
| 677 |
+
chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
|
| 678 |
+
del chunks[k]
|
| 679 |
+
# Escape backslashes and hyphens for set difference (--).
|
| 680 |
+
# Hyphens that create ranges shouldn't be escaped.
|
| 681 |
+
stuff = "-".join(
|
| 682 |
+
s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
|
| 683 |
+
)
|
| 684 |
+
# Escape set operations (&&, ~~ and ||).
|
| 685 |
+
stuff = re.sub(r"([&~|])", r"\\\1", stuff)
|
| 686 |
+
i = j + 1
|
| 687 |
+
if not stuff:
|
| 688 |
+
# Empty range: never match.
|
| 689 |
+
add("(?!)")
|
| 690 |
+
elif stuff == "!":
|
| 691 |
+
# Negated empty range: match any character.
|
| 692 |
+
add(".")
|
| 693 |
+
else:
|
| 694 |
+
if stuff[0] == "!":
|
| 695 |
+
stuff = "^" + stuff[1:]
|
| 696 |
+
elif stuff[0] in ("^", "["):
|
| 697 |
+
stuff = "\\" + stuff
|
| 698 |
+
add(f"[{stuff}]")
|
| 699 |
+
else:
|
| 700 |
+
add(re.escape(c))
|
| 701 |
+
assert i == n
|
| 702 |
+
return res
|
| 703 |
+
|
| 704 |
+
|
| 705 |
+
def glob_translate(pat):
|
| 706 |
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
| 707 |
+
# The keyword parameters' values are fixed to:
|
| 708 |
+
# recursive=True, include_hidden=True, seps=None
|
| 709 |
+
"""Translate a pathname with shell wildcards to a regular expression."""
|
| 710 |
+
if os.path.altsep:
|
| 711 |
+
seps = os.path.sep + os.path.altsep
|
| 712 |
+
else:
|
| 713 |
+
seps = os.path.sep
|
| 714 |
+
escaped_seps = "".join(map(re.escape, seps))
|
| 715 |
+
any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
|
| 716 |
+
not_sep = f"[^{escaped_seps}]"
|
| 717 |
+
one_last_segment = f"{not_sep}+"
|
| 718 |
+
one_segment = f"{one_last_segment}{any_sep}"
|
| 719 |
+
any_segments = f"(?:.+{any_sep})?"
|
| 720 |
+
any_last_segments = ".*"
|
| 721 |
+
results = []
|
| 722 |
+
parts = re.split(any_sep, pat)
|
| 723 |
+
last_part_idx = len(parts) - 1
|
| 724 |
+
for idx, part in enumerate(parts):
|
| 725 |
+
if part == "*":
|
| 726 |
+
results.append(one_segment if idx < last_part_idx else one_last_segment)
|
| 727 |
+
continue
|
| 728 |
+
if part == "**":
|
| 729 |
+
results.append(any_segments if idx < last_part_idx else any_last_segments)
|
| 730 |
+
continue
|
| 731 |
+
elif "**" in part:
|
| 732 |
+
raise ValueError(
|
| 733 |
+
"Invalid pattern: '**' can only be an entire path component"
|
| 734 |
+
)
|
| 735 |
+
if part:
|
| 736 |
+
results.extend(_translate(part, f"{not_sep}*", not_sep))
|
| 737 |
+
if idx < last_part_idx:
|
| 738 |
+
results.append(any_sep)
|
| 739 |
+
res = "".join(results)
|
| 740 |
+
return rf"(?s:{res})\Z"
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/METADATA
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: huggingface_hub
|
| 3 |
+
Version: 0.36.2
|
| 4 |
+
Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
|
| 5 |
+
Home-page: https://github.com/huggingface/huggingface_hub
|
| 6 |
+
Author: Hugging Face, Inc.
|
| 7 |
+
Author-email: julien@huggingface.co
|
| 8 |
+
License: Apache
|
| 9 |
+
Keywords: model-hub machine-learning models natural-language-processing deep-learning pytorch pretrained-models
|
| 10 |
+
Classifier: Intended Audience :: Developers
|
| 11 |
+
Classifier: Intended Audience :: Education
|
| 12 |
+
Classifier: Intended Audience :: Science/Research
|
| 13 |
+
Classifier: License :: OSI Approved :: Apache Software License
|
| 14 |
+
Classifier: Operating System :: OS Independent
|
| 15 |
+
Classifier: Programming Language :: Python :: 3
|
| 16 |
+
Classifier: Programming Language :: Python :: 3 :: Only
|
| 17 |
+
Classifier: Programming Language :: Python :: 3.8
|
| 18 |
+
Classifier: Programming Language :: Python :: 3.9
|
| 19 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 20 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 21 |
+
Classifier: Programming Language :: Python :: 3.12
|
| 22 |
+
Classifier: Programming Language :: Python :: 3.13
|
| 23 |
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
| 24 |
+
Requires-Python: >=3.8.0
|
| 25 |
+
Description-Content-Type: text/markdown
|
| 26 |
+
License-File: LICENSE
|
| 27 |
+
Requires-Dist: filelock
|
| 28 |
+
Requires-Dist: fsspec>=2023.5.0
|
| 29 |
+
Requires-Dist: hf-xet<2.0.0,>=1.1.3; platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "arm64" or platform_machine == "aarch64"
|
| 30 |
+
Requires-Dist: packaging>=20.9
|
| 31 |
+
Requires-Dist: pyyaml>=5.1
|
| 32 |
+
Requires-Dist: requests
|
| 33 |
+
Requires-Dist: tqdm>=4.42.1
|
| 34 |
+
Requires-Dist: typing-extensions>=3.7.4.3
|
| 35 |
+
Provides-Extra: cli
|
| 36 |
+
Requires-Dist: InquirerPy==0.3.4; extra == "cli"
|
| 37 |
+
Provides-Extra: inference
|
| 38 |
+
Requires-Dist: aiohttp; extra == "inference"
|
| 39 |
+
Provides-Extra: oauth
|
| 40 |
+
Requires-Dist: authlib>=1.3.2; extra == "oauth"
|
| 41 |
+
Requires-Dist: fastapi; extra == "oauth"
|
| 42 |
+
Requires-Dist: httpx; extra == "oauth"
|
| 43 |
+
Requires-Dist: itsdangerous; extra == "oauth"
|
| 44 |
+
Provides-Extra: torch
|
| 45 |
+
Requires-Dist: torch; extra == "torch"
|
| 46 |
+
Requires-Dist: safetensors[torch]; extra == "torch"
|
| 47 |
+
Provides-Extra: hf-transfer
|
| 48 |
+
Requires-Dist: hf_transfer>=0.1.4; extra == "hf-transfer"
|
| 49 |
+
Provides-Extra: fastai
|
| 50 |
+
Requires-Dist: toml; extra == "fastai"
|
| 51 |
+
Requires-Dist: fastai>=2.4; extra == "fastai"
|
| 52 |
+
Requires-Dist: fastcore>=1.3.27; extra == "fastai"
|
| 53 |
+
Provides-Extra: tensorflow
|
| 54 |
+
Requires-Dist: tensorflow; extra == "tensorflow"
|
| 55 |
+
Requires-Dist: pydot; extra == "tensorflow"
|
| 56 |
+
Requires-Dist: graphviz; extra == "tensorflow"
|
| 57 |
+
Provides-Extra: tensorflow-testing
|
| 58 |
+
Requires-Dist: tensorflow; extra == "tensorflow-testing"
|
| 59 |
+
Requires-Dist: keras<3.0; extra == "tensorflow-testing"
|
| 60 |
+
Provides-Extra: hf-xet
|
| 61 |
+
Requires-Dist: hf-xet<2.0.0,>=1.1.2; extra == "hf-xet"
|
| 62 |
+
Provides-Extra: mcp
|
| 63 |
+
Requires-Dist: mcp>=1.8.0; extra == "mcp"
|
| 64 |
+
Requires-Dist: typer; extra == "mcp"
|
| 65 |
+
Requires-Dist: aiohttp; extra == "mcp"
|
| 66 |
+
Provides-Extra: testing
|
| 67 |
+
Requires-Dist: InquirerPy==0.3.4; extra == "testing"
|
| 68 |
+
Requires-Dist: aiohttp; extra == "testing"
|
| 69 |
+
Requires-Dist: authlib>=1.3.2; extra == "testing"
|
| 70 |
+
Requires-Dist: fastapi; extra == "testing"
|
| 71 |
+
Requires-Dist: httpx; extra == "testing"
|
| 72 |
+
Requires-Dist: itsdangerous; extra == "testing"
|
| 73 |
+
Requires-Dist: jedi; extra == "testing"
|
| 74 |
+
Requires-Dist: Jinja2; extra == "testing"
|
| 75 |
+
Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "testing"
|
| 76 |
+
Requires-Dist: pytest-cov; extra == "testing"
|
| 77 |
+
Requires-Dist: pytest-env; extra == "testing"
|
| 78 |
+
Requires-Dist: pytest-xdist; extra == "testing"
|
| 79 |
+
Requires-Dist: pytest-vcr; extra == "testing"
|
| 80 |
+
Requires-Dist: pytest-asyncio; extra == "testing"
|
| 81 |
+
Requires-Dist: pytest-rerunfailures<16.0; extra == "testing"
|
| 82 |
+
Requires-Dist: pytest-mock; extra == "testing"
|
| 83 |
+
Requires-Dist: urllib3<2.0; extra == "testing"
|
| 84 |
+
Requires-Dist: soundfile; extra == "testing"
|
| 85 |
+
Requires-Dist: Pillow; extra == "testing"
|
| 86 |
+
Requires-Dist: gradio>=4.0.0; extra == "testing"
|
| 87 |
+
Requires-Dist: numpy; extra == "testing"
|
| 88 |
+
Requires-Dist: fastapi; extra == "testing"
|
| 89 |
+
Provides-Extra: typing
|
| 90 |
+
Requires-Dist: typing-extensions>=4.8.0; extra == "typing"
|
| 91 |
+
Requires-Dist: types-PyYAML; extra == "typing"
|
| 92 |
+
Requires-Dist: types-requests; extra == "typing"
|
| 93 |
+
Requires-Dist: types-simplejson; extra == "typing"
|
| 94 |
+
Requires-Dist: types-toml; extra == "typing"
|
| 95 |
+
Requires-Dist: types-tqdm; extra == "typing"
|
| 96 |
+
Requires-Dist: types-urllib3; extra == "typing"
|
| 97 |
+
Provides-Extra: quality
|
| 98 |
+
Requires-Dist: ruff>=0.9.0; extra == "quality"
|
| 99 |
+
Requires-Dist: mypy<1.15.0,>=1.14.1; python_version == "3.8" and extra == "quality"
|
| 100 |
+
Requires-Dist: mypy==1.15.0; python_version >= "3.9" and extra == "quality"
|
| 101 |
+
Requires-Dist: libcst>=1.4.0; extra == "quality"
|
| 102 |
+
Requires-Dist: ty; extra == "quality"
|
| 103 |
+
Provides-Extra: all
|
| 104 |
+
Requires-Dist: InquirerPy==0.3.4; extra == "all"
|
| 105 |
+
Requires-Dist: aiohttp; extra == "all"
|
| 106 |
+
Requires-Dist: authlib>=1.3.2; extra == "all"
|
| 107 |
+
Requires-Dist: fastapi; extra == "all"
|
| 108 |
+
Requires-Dist: httpx; extra == "all"
|
| 109 |
+
Requires-Dist: itsdangerous; extra == "all"
|
| 110 |
+
Requires-Dist: jedi; extra == "all"
|
| 111 |
+
Requires-Dist: Jinja2; extra == "all"
|
| 112 |
+
Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "all"
|
| 113 |
+
Requires-Dist: pytest-cov; extra == "all"
|
| 114 |
+
Requires-Dist: pytest-env; extra == "all"
|
| 115 |
+
Requires-Dist: pytest-xdist; extra == "all"
|
| 116 |
+
Requires-Dist: pytest-vcr; extra == "all"
|
| 117 |
+
Requires-Dist: pytest-asyncio; extra == "all"
|
| 118 |
+
Requires-Dist: pytest-rerunfailures<16.0; extra == "all"
|
| 119 |
+
Requires-Dist: pytest-mock; extra == "all"
|
| 120 |
+
Requires-Dist: urllib3<2.0; extra == "all"
|
| 121 |
+
Requires-Dist: soundfile; extra == "all"
|
| 122 |
+
Requires-Dist: Pillow; extra == "all"
|
| 123 |
+
Requires-Dist: gradio>=4.0.0; extra == "all"
|
| 124 |
+
Requires-Dist: numpy; extra == "all"
|
| 125 |
+
Requires-Dist: fastapi; extra == "all"
|
| 126 |
+
Requires-Dist: ruff>=0.9.0; extra == "all"
|
| 127 |
+
Requires-Dist: mypy<1.15.0,>=1.14.1; python_version == "3.8" and extra == "all"
|
| 128 |
+
Requires-Dist: mypy==1.15.0; python_version >= "3.9" and extra == "all"
|
| 129 |
+
Requires-Dist: libcst>=1.4.0; extra == "all"
|
| 130 |
+
Requires-Dist: ty; extra == "all"
|
| 131 |
+
Requires-Dist: typing-extensions>=4.8.0; extra == "all"
|
| 132 |
+
Requires-Dist: types-PyYAML; extra == "all"
|
| 133 |
+
Requires-Dist: types-requests; extra == "all"
|
| 134 |
+
Requires-Dist: types-simplejson; extra == "all"
|
| 135 |
+
Requires-Dist: types-toml; extra == "all"
|
| 136 |
+
Requires-Dist: types-tqdm; extra == "all"
|
| 137 |
+
Requires-Dist: types-urllib3; extra == "all"
|
| 138 |
+
Provides-Extra: dev
|
| 139 |
+
Requires-Dist: InquirerPy==0.3.4; extra == "dev"
|
| 140 |
+
Requires-Dist: aiohttp; extra == "dev"
|
| 141 |
+
Requires-Dist: authlib>=1.3.2; extra == "dev"
|
| 142 |
+
Requires-Dist: fastapi; extra == "dev"
|
| 143 |
+
Requires-Dist: httpx; extra == "dev"
|
| 144 |
+
Requires-Dist: itsdangerous; extra == "dev"
|
| 145 |
+
Requires-Dist: jedi; extra == "dev"
|
| 146 |
+
Requires-Dist: Jinja2; extra == "dev"
|
| 147 |
+
Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "dev"
|
| 148 |
+
Requires-Dist: pytest-cov; extra == "dev"
|
| 149 |
+
Requires-Dist: pytest-env; extra == "dev"
|
| 150 |
+
Requires-Dist: pytest-xdist; extra == "dev"
|
| 151 |
+
Requires-Dist: pytest-vcr; extra == "dev"
|
| 152 |
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
| 153 |
+
Requires-Dist: pytest-rerunfailures<16.0; extra == "dev"
|
| 154 |
+
Requires-Dist: pytest-mock; extra == "dev"
|
| 155 |
+
Requires-Dist: urllib3<2.0; extra == "dev"
|
| 156 |
+
Requires-Dist: soundfile; extra == "dev"
|
| 157 |
+
Requires-Dist: Pillow; extra == "dev"
|
| 158 |
+
Requires-Dist: gradio>=4.0.0; extra == "dev"
|
| 159 |
+
Requires-Dist: numpy; extra == "dev"
|
| 160 |
+
Requires-Dist: fastapi; extra == "dev"
|
| 161 |
+
Requires-Dist: ruff>=0.9.0; extra == "dev"
|
| 162 |
+
Requires-Dist: mypy<1.15.0,>=1.14.1; python_version == "3.8" and extra == "dev"
|
| 163 |
+
Requires-Dist: mypy==1.15.0; python_version >= "3.9" and extra == "dev"
|
| 164 |
+
Requires-Dist: libcst>=1.4.0; extra == "dev"
|
| 165 |
+
Requires-Dist: ty; extra == "dev"
|
| 166 |
+
Requires-Dist: typing-extensions>=4.8.0; extra == "dev"
|
| 167 |
+
Requires-Dist: types-PyYAML; extra == "dev"
|
| 168 |
+
Requires-Dist: types-requests; extra == "dev"
|
| 169 |
+
Requires-Dist: types-simplejson; extra == "dev"
|
| 170 |
+
Requires-Dist: types-toml; extra == "dev"
|
| 171 |
+
Requires-Dist: types-tqdm; extra == "dev"
|
| 172 |
+
Requires-Dist: types-urllib3; extra == "dev"
|
| 173 |
+
Dynamic: author
|
| 174 |
+
Dynamic: author-email
|
| 175 |
+
Dynamic: classifier
|
| 176 |
+
Dynamic: description
|
| 177 |
+
Dynamic: description-content-type
|
| 178 |
+
Dynamic: home-page
|
| 179 |
+
Dynamic: keywords
|
| 180 |
+
Dynamic: license
|
| 181 |
+
Dynamic: license-file
|
| 182 |
+
Dynamic: provides-extra
|
| 183 |
+
Dynamic: requires-dist
|
| 184 |
+
Dynamic: requires-python
|
| 185 |
+
Dynamic: summary
|
| 186 |
+
|
| 187 |
+
<p align="center">
|
| 188 |
+
<picture>
|
| 189 |
+
<source media="(prefers-color-scheme: dark)" srcset="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/huggingface_hub-dark.svg">
|
| 190 |
+
<source media="(prefers-color-scheme: light)" srcset="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/huggingface_hub.svg">
|
| 191 |
+
<img alt="huggingface_hub library logo" src="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/huggingface_hub.svg" width="352" height="59" style="max-width: 100%;">
|
| 192 |
+
</picture>
|
| 193 |
+
<br/>
|
| 194 |
+
<br/>
|
| 195 |
+
</p>
|
| 196 |
+
|
| 197 |
+
<p align="center">
|
| 198 |
+
<i>The official Python client for the Huggingface Hub.</i>
|
| 199 |
+
</p>
|
| 200 |
+
|
| 201 |
+
<p align="center">
|
| 202 |
+
<a href="https://huggingface.co/docs/huggingface_hub/en/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/huggingface_hub/index.svg?down_color=red&down_message=offline&up_message=online&label=doc"></a>
|
| 203 |
+
<a href="https://github.com/huggingface/huggingface_hub/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/huggingface_hub.svg"></a>
|
| 204 |
+
<a href="https://github.com/huggingface/huggingface_hub"><img alt="PyPi version" src="https://img.shields.io/pypi/pyversions/huggingface_hub.svg"></a>
|
| 205 |
+
<a href="https://pypi.org/project/huggingface-hub"><img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dm/huggingface_hub"></a>
|
| 206 |
+
<a href="https://codecov.io/gh/huggingface/huggingface_hub"><img alt="Code coverage" src="https://codecov.io/gh/huggingface/huggingface_hub/branch/main/graph/badge.svg?token=RXP95LE2XL"></a>
|
| 207 |
+
</p>
|
| 208 |
+
|
| 209 |
+
<h4 align="center">
|
| 210 |
+
<p>
|
| 211 |
+
<b>English</b> |
|
| 212 |
+
<a href="https://github.com/huggingface/huggingface_hub/blob/main/i18n/README_de.md">Deutsch</a> |
|
| 213 |
+
<a href="https://github.com/huggingface/huggingface_hub/blob/main/i18n/README_hi.md">हिंदी</a> |
|
| 214 |
+
<a href="https://github.com/huggingface/huggingface_hub/blob/main/i18n/README_ko.md">한국어</a> |
|
| 215 |
+
<a href="https://github.com/huggingface/huggingface_hub/blob/main/i18n/README_cn.md">中文(简体)</a>
|
| 216 |
+
<p>
|
| 217 |
+
</h4>
|
| 218 |
+
|
| 219 |
+
---
|
| 220 |
+
|
| 221 |
+
**Documentation**: <a href="https://hf.co/docs/huggingface_hub" target="_blank">https://hf.co/docs/huggingface_hub</a>
|
| 222 |
+
|
| 223 |
+
**Source Code**: <a href="https://github.com/huggingface/huggingface_hub" target="_blank">https://github.com/huggingface/huggingface_hub</a>
|
| 224 |
+
|
| 225 |
+
---
|
| 226 |
+
|
| 227 |
+
## Welcome to the huggingface_hub library
|
| 228 |
+
|
| 229 |
+
The `huggingface_hub` library allows you to interact with the [Hugging Face Hub](https://huggingface.co/), a platform democratizing open-source Machine Learning for creators and collaborators. Discover pre-trained models and datasets for your projects or play with the thousands of machine learning apps hosted on the Hub. You can also create and share your own models, datasets and demos with the community. The `huggingface_hub` library provides a simple way to do all these things with Python.
|
| 230 |
+
|
| 231 |
+
## Key features
|
| 232 |
+
|
| 233 |
+
- [Download files](https://huggingface.co/docs/huggingface_hub/en/guides/download) from the Hub.
|
| 234 |
+
- [Upload files](https://huggingface.co/docs/huggingface_hub/en/guides/upload) to the Hub.
|
| 235 |
+
- [Manage your repositories](https://huggingface.co/docs/huggingface_hub/en/guides/repository).
|
| 236 |
+
- [Run Inference](https://huggingface.co/docs/huggingface_hub/en/guides/inference) on deployed models.
|
| 237 |
+
- [Search](https://huggingface.co/docs/huggingface_hub/en/guides/search) for models, datasets and Spaces.
|
| 238 |
+
- [Share Model Cards](https://huggingface.co/docs/huggingface_hub/en/guides/model-cards) to document your models.
|
| 239 |
+
- [Engage with the community](https://huggingface.co/docs/huggingface_hub/en/guides/community) through PRs and comments.
|
| 240 |
+
|
| 241 |
+
## Installation
|
| 242 |
+
|
| 243 |
+
Install the `huggingface_hub` package with [pip](https://pypi.org/project/huggingface-hub/):
|
| 244 |
+
|
| 245 |
+
```bash
|
| 246 |
+
pip install huggingface_hub
|
| 247 |
+
```
|
| 248 |
+
|
| 249 |
+
If you prefer, you can also install it with [conda](https://huggingface.co/docs/huggingface_hub/en/installation#install-with-conda).
|
| 250 |
+
|
| 251 |
+
In order to keep the package minimal by default, `huggingface_hub` comes with optional dependencies useful for some use cases. For example, if you want have a complete experience for Inference, run:
|
| 252 |
+
|
| 253 |
+
```bash
|
| 254 |
+
pip install "huggingface_hub[inference]"
|
| 255 |
+
```
|
| 256 |
+
|
| 257 |
+
To learn more installation and optional dependencies, check out the [installation guide](https://huggingface.co/docs/huggingface_hub/en/installation).
|
| 258 |
+
|
| 259 |
+
## Quick start
|
| 260 |
+
|
| 261 |
+
### Download files
|
| 262 |
+
|
| 263 |
+
Download a single file
|
| 264 |
+
|
| 265 |
+
```py
|
| 266 |
+
from huggingface_hub import hf_hub_download
|
| 267 |
+
|
| 268 |
+
hf_hub_download(repo_id="tiiuae/falcon-7b-instruct", filename="config.json")
|
| 269 |
+
```
|
| 270 |
+
|
| 271 |
+
Or an entire repository
|
| 272 |
+
|
| 273 |
+
```py
|
| 274 |
+
from huggingface_hub import snapshot_download
|
| 275 |
+
|
| 276 |
+
snapshot_download("stabilityai/stable-diffusion-2-1")
|
| 277 |
+
```
|
| 278 |
+
|
| 279 |
+
Files will be downloaded in a local cache folder. More details in [this guide](https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache).
|
| 280 |
+
|
| 281 |
+
### Login
|
| 282 |
+
|
| 283 |
+
The Hugging Face Hub uses tokens to authenticate applications (see [docs](https://huggingface.co/docs/hub/security-tokens)). To log in your machine, run the following CLI:
|
| 284 |
+
|
| 285 |
+
```bash
|
| 286 |
+
hf auth login
|
| 287 |
+
# or using an environment variable
|
| 288 |
+
hf auth login --token $HUGGINGFACE_TOKEN
|
| 289 |
+
```
|
| 290 |
+
|
| 291 |
+
### Create a repository
|
| 292 |
+
|
| 293 |
+
```py
|
| 294 |
+
from huggingface_hub import create_repo
|
| 295 |
+
|
| 296 |
+
create_repo(repo_id="super-cool-model")
|
| 297 |
+
```
|
| 298 |
+
|
| 299 |
+
### Upload files
|
| 300 |
+
|
| 301 |
+
Upload a single file
|
| 302 |
+
|
| 303 |
+
```py
|
| 304 |
+
from huggingface_hub import upload_file
|
| 305 |
+
|
| 306 |
+
upload_file(
|
| 307 |
+
path_or_fileobj="/home/lysandre/dummy-test/README.md",
|
| 308 |
+
path_in_repo="README.md",
|
| 309 |
+
repo_id="lysandre/test-model",
|
| 310 |
+
)
|
| 311 |
+
```
|
| 312 |
+
|
| 313 |
+
Or an entire folder
|
| 314 |
+
|
| 315 |
+
```py
|
| 316 |
+
from huggingface_hub import upload_folder
|
| 317 |
+
|
| 318 |
+
upload_folder(
|
| 319 |
+
folder_path="/path/to/local/space",
|
| 320 |
+
repo_id="username/my-cool-space",
|
| 321 |
+
repo_type="space",
|
| 322 |
+
)
|
| 323 |
+
```
|
| 324 |
+
|
| 325 |
+
For details in the [upload guide](https://huggingface.co/docs/huggingface_hub/en/guides/upload).
|
| 326 |
+
|
| 327 |
+
## Integrating to the Hub.
|
| 328 |
+
|
| 329 |
+
We're partnering with cool open source ML libraries to provide free model hosting and versioning. You can find the existing integrations [here](https://huggingface.co/docs/hub/libraries).
|
| 330 |
+
|
| 331 |
+
The advantages are:
|
| 332 |
+
|
| 333 |
+
- Free model or dataset hosting for libraries and their users.
|
| 334 |
+
- Built-in file versioning, even with very large files, thanks to a git-based approach.
|
| 335 |
+
- In-browser widgets to play with the uploaded models.
|
| 336 |
+
- Anyone can upload a new model for your library, they just need to add the corresponding tag for the model to be discoverable.
|
| 337 |
+
- Fast downloads! We use Cloudfront (a CDN) to geo-replicate downloads so they're blazing fast from anywhere on the globe.
|
| 338 |
+
- Usage stats and more features to come.
|
| 339 |
+
|
| 340 |
+
If you would like to integrate your library, feel free to open an issue to begin the discussion. We wrote a [step-by-step guide](https://huggingface.co/docs/hub/adding-a-library) with ❤️ showing how to do this integration.
|
| 341 |
+
|
| 342 |
+
## Contributions (feature requests, bugs, etc.) are super welcome 💙💚💛💜🧡❤️
|
| 343 |
+
|
| 344 |
+
Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community.
|
| 345 |
+
Answering questions, helping others, reaching out and improving the documentations are immensely valuable to the community.
|
| 346 |
+
We wrote a [contribution guide](https://github.com/huggingface/huggingface_hub/blob/main/CONTRIBUTING.md) to summarize
|
| 347 |
+
how to get started to contribute to this repository.
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/RECORD
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
../../../bin/hf,sha256=9ozTW_4otW8kP3AUHvO8MGxkYj-SzMz1_GPKhDOsiro,283
|
| 2 |
+
../../../bin/huggingface-cli,sha256=NKTBg-JNllcLHRYGmFlm5IuQ-2CHqu6StEwS5IBObDg,301
|
| 3 |
+
../../../bin/tiny-agents,sha256=VjA5kJPuGbyFCRTRyYbs-nVl-hOSN1RzWwDBKKp4cn4,293
|
| 4 |
+
huggingface_hub-0.36.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 5 |
+
huggingface_hub-0.36.2.dist-info/METADATA,sha256=TcKwo_snvLqLCR-YvM_0uUNp0mnJdorq7NC5nZcGZdk,15201
|
| 6 |
+
huggingface_hub-0.36.2.dist-info/RECORD,,
|
| 7 |
+
huggingface_hub-0.36.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 8 |
+
huggingface_hub-0.36.2.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
| 9 |
+
huggingface_hub-0.36.2.dist-info/entry_points.txt,sha256=FGUdvu8z-x7lvoJ4udumhcg3AtzigPraCn_ZbjEhIto,218
|
| 10 |
+
huggingface_hub-0.36.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
| 11 |
+
huggingface_hub-0.36.2.dist-info/top_level.txt,sha256=8KzlQJAY4miUvjAssOAJodqKOw3harNzuiwGQ9qLSSk,16
|
| 12 |
+
huggingface_hub/__init__.py,sha256=5Ya2RhJIISj9JBGTZLxNxtypOEBxNORUpeh2NBs2TjU,52675
|
| 13 |
+
huggingface_hub/__pycache__/__init__.cpython-312.pyc,,
|
| 14 |
+
huggingface_hub/__pycache__/_commit_api.cpython-312.pyc,,
|
| 15 |
+
huggingface_hub/__pycache__/_commit_scheduler.cpython-312.pyc,,
|
| 16 |
+
huggingface_hub/__pycache__/_inference_endpoints.cpython-312.pyc,,
|
| 17 |
+
huggingface_hub/__pycache__/_jobs_api.cpython-312.pyc,,
|
| 18 |
+
huggingface_hub/__pycache__/_local_folder.cpython-312.pyc,,
|
| 19 |
+
huggingface_hub/__pycache__/_login.cpython-312.pyc,,
|
| 20 |
+
huggingface_hub/__pycache__/_oauth.cpython-312.pyc,,
|
| 21 |
+
huggingface_hub/__pycache__/_snapshot_download.cpython-312.pyc,,
|
| 22 |
+
huggingface_hub/__pycache__/_space_api.cpython-312.pyc,,
|
| 23 |
+
huggingface_hub/__pycache__/_tensorboard_logger.cpython-312.pyc,,
|
| 24 |
+
huggingface_hub/__pycache__/_upload_large_folder.cpython-312.pyc,,
|
| 25 |
+
huggingface_hub/__pycache__/_webhooks_payload.cpython-312.pyc,,
|
| 26 |
+
huggingface_hub/__pycache__/_webhooks_server.cpython-312.pyc,,
|
| 27 |
+
huggingface_hub/__pycache__/community.cpython-312.pyc,,
|
| 28 |
+
huggingface_hub/__pycache__/constants.cpython-312.pyc,,
|
| 29 |
+
huggingface_hub/__pycache__/dataclasses.cpython-312.pyc,,
|
| 30 |
+
huggingface_hub/__pycache__/errors.cpython-312.pyc,,
|
| 31 |
+
huggingface_hub/__pycache__/fastai_utils.cpython-312.pyc,,
|
| 32 |
+
huggingface_hub/__pycache__/file_download.cpython-312.pyc,,
|
| 33 |
+
huggingface_hub/__pycache__/hf_api.cpython-312.pyc,,
|
| 34 |
+
huggingface_hub/__pycache__/hf_file_system.cpython-312.pyc,,
|
| 35 |
+
huggingface_hub/__pycache__/hub_mixin.cpython-312.pyc,,
|
| 36 |
+
huggingface_hub/__pycache__/inference_api.cpython-312.pyc,,
|
| 37 |
+
huggingface_hub/__pycache__/keras_mixin.cpython-312.pyc,,
|
| 38 |
+
huggingface_hub/__pycache__/lfs.cpython-312.pyc,,
|
| 39 |
+
huggingface_hub/__pycache__/repocard.cpython-312.pyc,,
|
| 40 |
+
huggingface_hub/__pycache__/repocard_data.cpython-312.pyc,,
|
| 41 |
+
huggingface_hub/__pycache__/repository.cpython-312.pyc,,
|
| 42 |
+
huggingface_hub/_commit_api.py,sha256=pGESDsicpWMeZnct-71635KgTfvUoyok_hPl9ZgIIWI,41010
|
| 43 |
+
huggingface_hub/_commit_scheduler.py,sha256=P64poLZoTJnSyR39SN6w5s9bLyngKstWee03fpoVETQ,14660
|
| 44 |
+
huggingface_hub/_inference_endpoints.py,sha256=ahmbPcEXsJ_JcMb9TDgdkD8Z2z9uytkFG3_1o6dTm8g,17598
|
| 45 |
+
huggingface_hub/_jobs_api.py,sha256=OFcbChcXsLvaX4oGumsHscZKAzsueYIhh0Z6Y4ycpio,10883
|
| 46 |
+
huggingface_hub/_local_folder.py,sha256=2iHXNgIT3UdSt2PvCovd0NzgVxTRypKb-rvAFLK-gZU,17305
|
| 47 |
+
huggingface_hub/_login.py,sha256=TWNkZpMPkDuttQ36uoi-ozLQ1IcXVsZ42tbcQ-b-h0Q,20248
|
| 48 |
+
huggingface_hub/_oauth.py,sha256=75ya9toHxC0WRKsLOAI212CrssRjTSxs16mHWWNMb3w,18714
|
| 49 |
+
huggingface_hub/_snapshot_download.py,sha256=b-NzYQcvktsAirIfGQKgzQwu8w0S6lhBTvnJ5S6saw8,16166
|
| 50 |
+
huggingface_hub/_space_api.py,sha256=jb6rF8qLtjaNU12D-8ygAPM26xDiHCu8CHXHowhGTmg,5470
|
| 51 |
+
huggingface_hub/_tensorboard_logger.py,sha256=tUdQzx-wXF4yjoGJG2izqZrn-IPMflMBWMkl1sKYzo0,8420
|
| 52 |
+
huggingface_hub/_upload_large_folder.py,sha256=l2YWLZttOw69EGdihT3y_Nhr5mweLGooZG9L8smNoHY,30066
|
| 53 |
+
huggingface_hub/_webhooks_payload.py,sha256=Xm3KaK7tCOGBlXkuZvbym6zjHXrT1XCrbUFWuXiBmNY,3617
|
| 54 |
+
huggingface_hub/_webhooks_server.py,sha256=RLrQuCHlDH_qUQJQOm11fKFDEhIUR2IxwazuKy-T9Uo,15672
|
| 55 |
+
huggingface_hub/cli/__init__.py,sha256=xzX1qgAvrtAX4gP59WrPlvOZFLuzuTgcjvanQvcpgHc,928
|
| 56 |
+
huggingface_hub/cli/__pycache__/__init__.cpython-312.pyc,,
|
| 57 |
+
huggingface_hub/cli/__pycache__/_cli_utils.cpython-312.pyc,,
|
| 58 |
+
huggingface_hub/cli/__pycache__/auth.cpython-312.pyc,,
|
| 59 |
+
huggingface_hub/cli/__pycache__/cache.cpython-312.pyc,,
|
| 60 |
+
huggingface_hub/cli/__pycache__/download.cpython-312.pyc,,
|
| 61 |
+
huggingface_hub/cli/__pycache__/hf.cpython-312.pyc,,
|
| 62 |
+
huggingface_hub/cli/__pycache__/jobs.cpython-312.pyc,,
|
| 63 |
+
huggingface_hub/cli/__pycache__/lfs.cpython-312.pyc,,
|
| 64 |
+
huggingface_hub/cli/__pycache__/repo.cpython-312.pyc,,
|
| 65 |
+
huggingface_hub/cli/__pycache__/repo_files.cpython-312.pyc,,
|
| 66 |
+
huggingface_hub/cli/__pycache__/system.cpython-312.pyc,,
|
| 67 |
+
huggingface_hub/cli/__pycache__/upload.cpython-312.pyc,,
|
| 68 |
+
huggingface_hub/cli/__pycache__/upload_large_folder.cpython-312.pyc,,
|
| 69 |
+
huggingface_hub/cli/_cli_utils.py,sha256=Nt6CjbkYqQQRuh70bUXVA6rZpbZt_Sa1WqBUxjQLu6g,2095
|
| 70 |
+
huggingface_hub/cli/auth.py,sha256=XSsbU7-_TS5IXdASkgUCdQeoXVG82VUyGYvOS4oLLRs,7317
|
| 71 |
+
huggingface_hub/cli/cache.py,sha256=fQjYfbRUapeHsK10Y6w_Ixu9JKyuZyM7pJzExJGd_2c,15855
|
| 72 |
+
huggingface_hub/cli/download.py,sha256=8b5wqhMYg3X9tar9EEeWdPZk9um1kZTI_WgBqyiatqs,7141
|
| 73 |
+
huggingface_hub/cli/hf.py,sha256=SQ73_SXEQnWVJkhKT_6bwNQBHQXGOdI5qqlTTtI0XH0,2328
|
| 74 |
+
huggingface_hub/cli/jobs.py,sha256=eA6Q7iy_-7vjU4SjYPvn71b2aVo2qt3q-pVxLyXCWqg,44317
|
| 75 |
+
huggingface_hub/cli/lfs.py,sha256=J9MkKOGUW6GjBrKs2zZUCOaAGxpatxsEoSbBjuhDJV8,7230
|
| 76 |
+
huggingface_hub/cli/repo.py,sha256=CuOqQZ7WELLk9Raf3tnyXILt9e93OrlS8Dyxx3BqdQA,10618
|
| 77 |
+
huggingface_hub/cli/repo_files.py,sha256=9oeeQJx8Z0ygbTElw1o5T6dGtRbeolcXENt_ouEBvjk,4844
|
| 78 |
+
huggingface_hub/cli/system.py,sha256=eLSYME7ywt5Ae3tYQnS43Tai2pR2JLtA1KGImzPt5pM,1707
|
| 79 |
+
huggingface_hub/cli/upload.py,sha256=lOHR_JzfM2XL_pYK3Z1HlGnaAI-fw7xGY46Lccvbsy4,14362
|
| 80 |
+
huggingface_hub/cli/upload_large_folder.py,sha256=w4RIW0yZKTnNnhDOB6yISnIo_h_Hy13KwWVzrFzczpY,6164
|
| 81 |
+
huggingface_hub/commands/__init__.py,sha256=AkbM2a-iGh0Vq_xAWhK3mu3uZ44km8-X5uWjKcvcrUQ,928
|
| 82 |
+
huggingface_hub/commands/__pycache__/__init__.cpython-312.pyc,,
|
| 83 |
+
huggingface_hub/commands/__pycache__/_cli_utils.cpython-312.pyc,,
|
| 84 |
+
huggingface_hub/commands/__pycache__/delete_cache.cpython-312.pyc,,
|
| 85 |
+
huggingface_hub/commands/__pycache__/download.cpython-312.pyc,,
|
| 86 |
+
huggingface_hub/commands/__pycache__/env.cpython-312.pyc,,
|
| 87 |
+
huggingface_hub/commands/__pycache__/huggingface_cli.cpython-312.pyc,,
|
| 88 |
+
huggingface_hub/commands/__pycache__/lfs.cpython-312.pyc,,
|
| 89 |
+
huggingface_hub/commands/__pycache__/repo.cpython-312.pyc,,
|
| 90 |
+
huggingface_hub/commands/__pycache__/repo_files.cpython-312.pyc,,
|
| 91 |
+
huggingface_hub/commands/__pycache__/scan_cache.cpython-312.pyc,,
|
| 92 |
+
huggingface_hub/commands/__pycache__/tag.cpython-312.pyc,,
|
| 93 |
+
huggingface_hub/commands/__pycache__/upload.cpython-312.pyc,,
|
| 94 |
+
huggingface_hub/commands/__pycache__/upload_large_folder.cpython-312.pyc,,
|
| 95 |
+
huggingface_hub/commands/__pycache__/user.cpython-312.pyc,,
|
| 96 |
+
huggingface_hub/commands/__pycache__/version.cpython-312.pyc,,
|
| 97 |
+
huggingface_hub/commands/_cli_utils.py,sha256=ePYTIEWnU677nPvdNC5AdYcEB1400L6qYEUxMkVUzME,2329
|
| 98 |
+
huggingface_hub/commands/delete_cache.py,sha256=035yACUtVUIG8tEtc5vexDoFFphzdk5IXkFTlD4WMiw,17738
|
| 99 |
+
huggingface_hub/commands/download.py,sha256=0QY9ho7eiAPvFndBPttGtH6vXNk3r9AioltNwc8h1Z4,8310
|
| 100 |
+
huggingface_hub/commands/env.py,sha256=qv4SmjuzUz9exo4RDMY2HqabLCKE1oRb55cBA6LN9R4,1342
|
| 101 |
+
huggingface_hub/commands/huggingface_cli.py,sha256=gDi7JueyiLD0bGclTEYfHPQWpAY_WBdPfHT7vkqa5v0,2654
|
| 102 |
+
huggingface_hub/commands/lfs.py,sha256=xdbnNRO04UuQemEhUGT809jFgQn9Rj-SnyT_0Ph-VYg,7342
|
| 103 |
+
huggingface_hub/commands/repo.py,sha256=WcRDFqUYKB0Kz0zFopegiG614ot6VOYTAf6jht0BMss,6042
|
| 104 |
+
huggingface_hub/commands/repo_files.py,sha256=ftjLCC3XCY-AMmiYiZPIdRMmIqZbqVZw-BSjBLcZup4,5054
|
| 105 |
+
huggingface_hub/commands/scan_cache.py,sha256=gQlhBZgWkUzH4wrIYnvgV7CA4C7rvV2SuY0x2JCB7g0,8675
|
| 106 |
+
huggingface_hub/commands/tag.py,sha256=4fgQuXJHG59lTVyOjIUZjxdJDL4JZW4q10XDPSo-gss,6382
|
| 107 |
+
huggingface_hub/commands/upload.py,sha256=eAJIig4ljtO9FRyGjiz6HbHS-Q4MOQziRgzjQrl5Koo,14576
|
| 108 |
+
huggingface_hub/commands/upload_large_folder.py,sha256=_1id84BFtbL8HgFRKZ-el_uPrijamz1qWlzO16KbUAc,6254
|
| 109 |
+
huggingface_hub/commands/user.py,sha256=dDpi0mLYvTeYf0fhPVQyEJsn7Wrk6gWvR5YHC6RgebU,7516
|
| 110 |
+
huggingface_hub/commands/version.py,sha256=rGpCbvxImY9eQqXrshYt609Iws27R75WARmKQrIo6Ok,1390
|
| 111 |
+
huggingface_hub/community.py,sha256=exJxrySnXURAijkVOcreuwM5JAuuz2L1xTSDkd223wk,12365
|
| 112 |
+
huggingface_hub/constants.py,sha256=nILseAp4rqLu_KQTZDpPGOhepVAPanD7azbomAvovj0,10313
|
| 113 |
+
huggingface_hub/dataclasses.py,sha256=rjQfuX9MeTXZQrCQC8JvkjpARDehOiSluE7Kz1L7Ueg,17337
|
| 114 |
+
huggingface_hub/errors.py,sha256=HVqmnJODe1wy1cYsx7AfjrwE4DD-gdKVvMTYTBfLjpA,11265
|
| 115 |
+
huggingface_hub/fastai_utils.py,sha256=m7wwWk-TdhIB1CJMigAzzUBP4eLQALutEzwjWf9Ej-o,16755
|
| 116 |
+
huggingface_hub/file_download.py,sha256=C76FMg1Rg7401K9UpwOAnFd1UG2ko0bL9AES2mM7Ntg,79254
|
| 117 |
+
huggingface_hub/hf_api.py,sha256=REMm9AFgUtyizI6tkEy6glX2Aa7-TH7-uWhlhl0q0fE,487935
|
| 118 |
+
huggingface_hub/hf_file_system.py,sha256=uLeublBZhWd4309fE3eFHIN8G7RCrX2_6_gr0BYjuzQ,48338
|
| 119 |
+
huggingface_hub/hub_mixin.py,sha256=Ii3w9o7XgGbj6UNPnieW5IDfaCd8OEKpIH1hRkncRDQ,38208
|
| 120 |
+
huggingface_hub/inference/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 121 |
+
huggingface_hub/inference/__pycache__/__init__.cpython-312.pyc,,
|
| 122 |
+
huggingface_hub/inference/__pycache__/_client.cpython-312.pyc,,
|
| 123 |
+
huggingface_hub/inference/__pycache__/_common.cpython-312.pyc,,
|
| 124 |
+
huggingface_hub/inference/_client.py,sha256=9cAIkBFuzFC5f6jVp62MJNDSUcPqxsFluhQLi6FqXdc,157536
|
| 125 |
+
huggingface_hub/inference/_common.py,sha256=dI3OPg0320OOB0FRy_kqftW9F3ghEnBVA5Gi4VaSctg,15778
|
| 126 |
+
huggingface_hub/inference/_generated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 127 |
+
huggingface_hub/inference/_generated/__pycache__/__init__.cpython-312.pyc,,
|
| 128 |
+
huggingface_hub/inference/_generated/__pycache__/_async_client.cpython-312.pyc,,
|
| 129 |
+
huggingface_hub/inference/_generated/_async_client.py,sha256=DSOAXJ_TxRubPisWnVKzepXalDA7PcE-NG3oczo8iMw,163445
|
| 130 |
+
huggingface_hub/inference/_generated/types/__init__.py,sha256=9WvrGQ8aThtKSNzZF06j-CIE2ZuItne8FFnea1p1u38,6557
|
| 131 |
+
huggingface_hub/inference/_generated/types/__pycache__/__init__.cpython-312.pyc,,
|
| 132 |
+
huggingface_hub/inference/_generated/types/__pycache__/audio_classification.cpython-312.pyc,,
|
| 133 |
+
huggingface_hub/inference/_generated/types/__pycache__/audio_to_audio.cpython-312.pyc,,
|
| 134 |
+
huggingface_hub/inference/_generated/types/__pycache__/automatic_speech_recognition.cpython-312.pyc,,
|
| 135 |
+
huggingface_hub/inference/_generated/types/__pycache__/base.cpython-312.pyc,,
|
| 136 |
+
huggingface_hub/inference/_generated/types/__pycache__/chat_completion.cpython-312.pyc,,
|
| 137 |
+
huggingface_hub/inference/_generated/types/__pycache__/depth_estimation.cpython-312.pyc,,
|
| 138 |
+
huggingface_hub/inference/_generated/types/__pycache__/document_question_answering.cpython-312.pyc,,
|
| 139 |
+
huggingface_hub/inference/_generated/types/__pycache__/feature_extraction.cpython-312.pyc,,
|
| 140 |
+
huggingface_hub/inference/_generated/types/__pycache__/fill_mask.cpython-312.pyc,,
|
| 141 |
+
huggingface_hub/inference/_generated/types/__pycache__/image_classification.cpython-312.pyc,,
|
| 142 |
+
huggingface_hub/inference/_generated/types/__pycache__/image_segmentation.cpython-312.pyc,,
|
| 143 |
+
huggingface_hub/inference/_generated/types/__pycache__/image_to_image.cpython-312.pyc,,
|
| 144 |
+
huggingface_hub/inference/_generated/types/__pycache__/image_to_text.cpython-312.pyc,,
|
| 145 |
+
huggingface_hub/inference/_generated/types/__pycache__/image_to_video.cpython-312.pyc,,
|
| 146 |
+
huggingface_hub/inference/_generated/types/__pycache__/object_detection.cpython-312.pyc,,
|
| 147 |
+
huggingface_hub/inference/_generated/types/__pycache__/question_answering.cpython-312.pyc,,
|
| 148 |
+
huggingface_hub/inference/_generated/types/__pycache__/sentence_similarity.cpython-312.pyc,,
|
| 149 |
+
huggingface_hub/inference/_generated/types/__pycache__/summarization.cpython-312.pyc,,
|
| 150 |
+
huggingface_hub/inference/_generated/types/__pycache__/table_question_answering.cpython-312.pyc,,
|
| 151 |
+
huggingface_hub/inference/_generated/types/__pycache__/text2text_generation.cpython-312.pyc,,
|
| 152 |
+
huggingface_hub/inference/_generated/types/__pycache__/text_classification.cpython-312.pyc,,
|
| 153 |
+
huggingface_hub/inference/_generated/types/__pycache__/text_generation.cpython-312.pyc,,
|
| 154 |
+
huggingface_hub/inference/_generated/types/__pycache__/text_to_audio.cpython-312.pyc,,
|
| 155 |
+
huggingface_hub/inference/_generated/types/__pycache__/text_to_image.cpython-312.pyc,,
|
| 156 |
+
huggingface_hub/inference/_generated/types/__pycache__/text_to_speech.cpython-312.pyc,,
|
| 157 |
+
huggingface_hub/inference/_generated/types/__pycache__/text_to_video.cpython-312.pyc,,
|
| 158 |
+
huggingface_hub/inference/_generated/types/__pycache__/token_classification.cpython-312.pyc,,
|
| 159 |
+
huggingface_hub/inference/_generated/types/__pycache__/translation.cpython-312.pyc,,
|
| 160 |
+
huggingface_hub/inference/_generated/types/__pycache__/video_classification.cpython-312.pyc,,
|
| 161 |
+
huggingface_hub/inference/_generated/types/__pycache__/visual_question_answering.cpython-312.pyc,,
|
| 162 |
+
huggingface_hub/inference/_generated/types/__pycache__/zero_shot_classification.cpython-312.pyc,,
|
| 163 |
+
huggingface_hub/inference/_generated/types/__pycache__/zero_shot_image_classification.cpython-312.pyc,,
|
| 164 |
+
huggingface_hub/inference/_generated/types/__pycache__/zero_shot_object_detection.cpython-312.pyc,,
|
| 165 |
+
huggingface_hub/inference/_generated/types/audio_classification.py,sha256=Jg3mzfGhCSH6CfvVvgJSiFpkz6v4nNA0G4LJXacEgNc,1573
|
| 166 |
+
huggingface_hub/inference/_generated/types/audio_to_audio.py,sha256=2Ep4WkePL7oJwcp5nRJqApwviumGHbft9HhXE9XLHj4,891
|
| 167 |
+
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py,sha256=8CEphr6rvRHgq1L5Md3tq14V0tEAmzJkemh1_7gSswo,5515
|
| 168 |
+
huggingface_hub/inference/_generated/types/base.py,sha256=4XG49q0-2SOftYQ8HXQnWLxiJktou-a7IoG3kdOv-kg,6751
|
| 169 |
+
huggingface_hub/inference/_generated/types/chat_completion.py,sha256=j1Y8G4g5yGs4g7N4sXWbipF8TwkQG0J-ftL9OxejkBw,11254
|
| 170 |
+
huggingface_hub/inference/_generated/types/depth_estimation.py,sha256=rcpe9MhYMeLjflOwBs3KMZPr6WjOH3FYEThStG-FJ3M,929
|
| 171 |
+
huggingface_hub/inference/_generated/types/document_question_answering.py,sha256=6BEYGwJcqGlah4RBJDAvWFTEXkO0mosBiMy82432nAM,3202
|
| 172 |
+
huggingface_hub/inference/_generated/types/feature_extraction.py,sha256=NMWVL_TLSG5SS5bdt1-fflkZ75UMlMKeTMtmdnUTADc,1537
|
| 173 |
+
huggingface_hub/inference/_generated/types/fill_mask.py,sha256=OrTgQ7Ndn0_dWK5thQhZwTOHbQni8j0iJcx9llyhRds,1708
|
| 174 |
+
huggingface_hub/inference/_generated/types/image_classification.py,sha256=A-Y024o8723_n8mGVos4TwdAkVL62McGeL1iIo4VzNs,1585
|
| 175 |
+
huggingface_hub/inference/_generated/types/image_segmentation.py,sha256=vrkI4SuP1Iq_iLXc-2pQhYY3SHN4gzvFBoZqbUHxU7o,1950
|
| 176 |
+
huggingface_hub/inference/_generated/types/image_to_image.py,sha256=snvGbmCdqchxGef25MceD7LSKAmVkIgnoX5t71rdlAQ,2290
|
| 177 |
+
huggingface_hub/inference/_generated/types/image_to_text.py,sha256=OaFEBAfgT-fOVzJ7xVermGf7VODhrc9-Jg38WrM7-2o,4810
|
| 178 |
+
huggingface_hub/inference/_generated/types/image_to_video.py,sha256=bC-L_cNsDhk4s_IdSiprJ9d1NeMGePLcUp7UPpco21w,2240
|
| 179 |
+
huggingface_hub/inference/_generated/types/object_detection.py,sha256=VuFlb1281qTXoSgJDmquGz-VNfEZLo2H0Rh_F6MF6ts,2000
|
| 180 |
+
huggingface_hub/inference/_generated/types/question_answering.py,sha256=zw38a9_9l2k1ifYZefjkioqZ4asfSRM9M4nU3gSCmAQ,2898
|
| 181 |
+
huggingface_hub/inference/_generated/types/sentence_similarity.py,sha256=w5Nj1g18eBzopZwxuDLI-fEsyaCK2KrHA5yf_XfSjgo,1052
|
| 182 |
+
huggingface_hub/inference/_generated/types/summarization.py,sha256=WGGr8uDLrZg8JQgF9ZMUP9euw6uZo6zwkVZ-IfvCFI0,1487
|
| 183 |
+
huggingface_hub/inference/_generated/types/table_question_answering.py,sha256=cJnIPA2fIbQP2Ejn7X_esY48qGWoXg30fnNOqCXiOVQ,2293
|
| 184 |
+
huggingface_hub/inference/_generated/types/text2text_generation.py,sha256=v-418w1JNNSZ2tuW9DUl6a36TQQCADa438A3ufvcbOw,1609
|
| 185 |
+
huggingface_hub/inference/_generated/types/text_classification.py,sha256=FarAjygLEfPofLfKeabzJ7PKEBItlHGoUNUOzyLRpL4,1445
|
| 186 |
+
huggingface_hub/inference/_generated/types/text_generation.py,sha256=28u-1zU7elk2teP3y4u1VAtDDHzY0JZ2KEEJe5d5uvg,5922
|
| 187 |
+
huggingface_hub/inference/_generated/types/text_to_audio.py,sha256=1HR9Q6s9MXqtKGTvHPLGVMum5-eg7O-Pgv6Nd0v8_HU,4741
|
| 188 |
+
huggingface_hub/inference/_generated/types/text_to_image.py,sha256=sGGi1Fa0n5Pmd6G3I-F2SBJcJ1M7Gmqnng6sfi0AVzs,1903
|
| 189 |
+
huggingface_hub/inference/_generated/types/text_to_speech.py,sha256=ROFuR32ijROCeqbv81Jos0lmaA8SRWyIUsWrdD4yWow,4760
|
| 190 |
+
huggingface_hub/inference/_generated/types/text_to_video.py,sha256=yHXVNs3t6aYO7visrBlB5cH7kjoysxF9510aofcf_18,1790
|
| 191 |
+
huggingface_hub/inference/_generated/types/token_classification.py,sha256=iblAcgfxXeaLYJ14NdiiCMIQuBlarUknLkXUklhvcLI,1915
|
| 192 |
+
huggingface_hub/inference/_generated/types/translation.py,sha256=xww4X5cfCYv_F0oINWLwqJRPCT6SV3VBAJuPjTs_j7o,1763
|
| 193 |
+
huggingface_hub/inference/_generated/types/video_classification.py,sha256=TyydjQw2NRLK9sDGzJUVnkDeo848ebmCx588Ur8I9q0,1680
|
| 194 |
+
huggingface_hub/inference/_generated/types/visual_question_answering.py,sha256=AWrQ6qo4gZa3PGedaNpzDFqx5yOYyjhnUB6iuZEj_uo,1673
|
| 195 |
+
huggingface_hub/inference/_generated/types/zero_shot_classification.py,sha256=BAiebPjsqoNa8EU35Dx0pfIv8W2c4GSl-TJckV1MaxQ,1738
|
| 196 |
+
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py,sha256=8J9n6VqFARkWvPfAZNWEG70AlrMGldU95EGQQwn06zI,1487
|
| 197 |
+
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py,sha256=GUd81LIV7oEbRWayDlAVgyLmY596r1M3AW0jXDp1yTA,1630
|
| 198 |
+
huggingface_hub/inference/_mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 199 |
+
huggingface_hub/inference/_mcp/__pycache__/__init__.cpython-312.pyc,,
|
| 200 |
+
huggingface_hub/inference/_mcp/__pycache__/_cli_hacks.cpython-312.pyc,,
|
| 201 |
+
huggingface_hub/inference/_mcp/__pycache__/agent.cpython-312.pyc,,
|
| 202 |
+
huggingface_hub/inference/_mcp/__pycache__/cli.cpython-312.pyc,,
|
| 203 |
+
huggingface_hub/inference/_mcp/__pycache__/constants.cpython-312.pyc,,
|
| 204 |
+
huggingface_hub/inference/_mcp/__pycache__/mcp_client.cpython-312.pyc,,
|
| 205 |
+
huggingface_hub/inference/_mcp/__pycache__/types.cpython-312.pyc,,
|
| 206 |
+
huggingface_hub/inference/_mcp/__pycache__/utils.cpython-312.pyc,,
|
| 207 |
+
huggingface_hub/inference/_mcp/_cli_hacks.py,sha256=KX9HZJPa1p8ngY3mtYGGlVUXfg4vYbbBRs-8HLToP04,3284
|
| 208 |
+
huggingface_hub/inference/_mcp/agent.py,sha256=jqvQwOajY41RIhCtD-XgVfuWbTouSYCQkIWJ1gHRrJQ,4262
|
| 209 |
+
huggingface_hub/inference/_mcp/cli.py,sha256=AmSUT6wXlE6EWmI0SfQgTWYnL07322zGwwk2yMZZlBc,9640
|
| 210 |
+
huggingface_hub/inference/_mcp/constants.py,sha256=kldRfaidXMdyMl_jLosaQomgWDv4shvnFe3dnQNwXSU,2511
|
| 211 |
+
huggingface_hub/inference/_mcp/mcp_client.py,sha256=9rcwOO7L2Ih0oGLkeY9o5gbkwEBmsDkHKf4XAmp4Mvc,16784
|
| 212 |
+
huggingface_hub/inference/_mcp/types.py,sha256=3gq-P_mrmvPI6KWBqjCxavtMPiGz10YXog7wg4oJYAo,941
|
| 213 |
+
huggingface_hub/inference/_mcp/utils.py,sha256=KFsGOC8dytS3VgaugBzibdteWasZ9CAnp83U2SyIlMw,4188
|
| 214 |
+
huggingface_hub/inference/_providers/__init__.py,sha256=UxPnzOdVcJgroPEatuahb4fsHaObUYPrwUCzv5ADCa4,9019
|
| 215 |
+
huggingface_hub/inference/_providers/__pycache__/__init__.cpython-312.pyc,,
|
| 216 |
+
huggingface_hub/inference/_providers/__pycache__/_common.cpython-312.pyc,,
|
| 217 |
+
huggingface_hub/inference/_providers/__pycache__/black_forest_labs.cpython-312.pyc,,
|
| 218 |
+
huggingface_hub/inference/_providers/__pycache__/cerebras.cpython-312.pyc,,
|
| 219 |
+
huggingface_hub/inference/_providers/__pycache__/clarifai.cpython-312.pyc,,
|
| 220 |
+
huggingface_hub/inference/_providers/__pycache__/cohere.cpython-312.pyc,,
|
| 221 |
+
huggingface_hub/inference/_providers/__pycache__/fal_ai.cpython-312.pyc,,
|
| 222 |
+
huggingface_hub/inference/_providers/__pycache__/featherless_ai.cpython-312.pyc,,
|
| 223 |
+
huggingface_hub/inference/_providers/__pycache__/fireworks_ai.cpython-312.pyc,,
|
| 224 |
+
huggingface_hub/inference/_providers/__pycache__/groq.cpython-312.pyc,,
|
| 225 |
+
huggingface_hub/inference/_providers/__pycache__/hf_inference.cpython-312.pyc,,
|
| 226 |
+
huggingface_hub/inference/_providers/__pycache__/hyperbolic.cpython-312.pyc,,
|
| 227 |
+
huggingface_hub/inference/_providers/__pycache__/nebius.cpython-312.pyc,,
|
| 228 |
+
huggingface_hub/inference/_providers/__pycache__/novita.cpython-312.pyc,,
|
| 229 |
+
huggingface_hub/inference/_providers/__pycache__/nscale.cpython-312.pyc,,
|
| 230 |
+
huggingface_hub/inference/_providers/__pycache__/openai.cpython-312.pyc,,
|
| 231 |
+
huggingface_hub/inference/_providers/__pycache__/publicai.cpython-312.pyc,,
|
| 232 |
+
huggingface_hub/inference/_providers/__pycache__/replicate.cpython-312.pyc,,
|
| 233 |
+
huggingface_hub/inference/_providers/__pycache__/sambanova.cpython-312.pyc,,
|
| 234 |
+
huggingface_hub/inference/_providers/__pycache__/scaleway.cpython-312.pyc,,
|
| 235 |
+
huggingface_hub/inference/_providers/__pycache__/together.cpython-312.pyc,,
|
| 236 |
+
huggingface_hub/inference/_providers/__pycache__/zai_org.cpython-312.pyc,,
|
| 237 |
+
huggingface_hub/inference/_providers/_common.py,sha256=brZJ1CUxDKooPdmVlm4cuKjvaW_refVY0Y7CbGQe7e4,12373
|
| 238 |
+
huggingface_hub/inference/_providers/black_forest_labs.py,sha256=FIukZoIFt_FDrTTDfpF-Vko5sXnmH0QvVIsMtV2Jzm8,2852
|
| 239 |
+
huggingface_hub/inference/_providers/cerebras.py,sha256=QOJ-1U-os7uE7p6eUnn_P_APq-yQhx28be7c3Tq2EuA,210
|
| 240 |
+
huggingface_hub/inference/_providers/clarifai.py,sha256=1cEXQwhGk4DRKiPCQUa5y-L6okTo4781EImQC8yJVOw,380
|
| 241 |
+
huggingface_hub/inference/_providers/cohere.py,sha256=O3tC-qIUL91mx_mE8bOHCtDWcQuKOUauhUoXSUBUCZ8,1253
|
| 242 |
+
huggingface_hub/inference/_providers/fal_ai.py,sha256=pCr5qP6R1W1CrEw-_nKdNuP3UqsUi58yL18w4r7mXRo,9989
|
| 243 |
+
huggingface_hub/inference/_providers/featherless_ai.py,sha256=QxBz-32O4PztxixrIjrfKuTOzvfqyUi-cVsw0Hf_zlY,1382
|
| 244 |
+
huggingface_hub/inference/_providers/fireworks_ai.py,sha256=Id226ITfPkOcFMFzly3MW9l-dZl9l4qizL4JEHWkBFk,1215
|
| 245 |
+
huggingface_hub/inference/_providers/groq.py,sha256=JTk2JV4ZOlaohho7zLAFQtk92kGVsPmLJ1hmzcwsqvQ,315
|
| 246 |
+
huggingface_hub/inference/_providers/hf_inference.py,sha256=0yi3cR-EJ4HYx3mSzOsMOTVmvVBkaajTzTfKB8JXQpk,9540
|
| 247 |
+
huggingface_hub/inference/_providers/hyperbolic.py,sha256=OQIBi2j3aNvuaSQ8BUK1K1PVeRXdrxc80G-6YmBa-ns,1985
|
| 248 |
+
huggingface_hub/inference/_providers/nebius.py,sha256=VJpTF2JZ58rznc9wxdk-57vwF8sV2vESw_WkXjXqCho,3580
|
| 249 |
+
huggingface_hub/inference/_providers/novita.py,sha256=HGVC8wPraRQUuI5uBoye1Y4Wqe4X116B71GhhbWy5yM,2514
|
| 250 |
+
huggingface_hub/inference/_providers/nscale.py,sha256=qWUsWinQmUbNUqehyKn34tVoWehu8gd-OZ2F4uj2SWM,1802
|
| 251 |
+
huggingface_hub/inference/_providers/openai.py,sha256=GCVYeNdjWIgpQQ7E_Xv8IebmdhTi0S6WfFosz3nLtps,1089
|
| 252 |
+
huggingface_hub/inference/_providers/publicai.py,sha256=1I2W6rORloB5QHSvky4njZO2XKLTwA-kPdNoauoT5rg,210
|
| 253 |
+
huggingface_hub/inference/_providers/replicate.py,sha256=otVfPkfBtlWrpjQub4V__t7g_w8Ewc7ZU3efiOauW-I,3820
|
| 254 |
+
huggingface_hub/inference/_providers/sambanova.py,sha256=Unt3H3jr_kgI9vzRjmmW1DFyoEuPkKCcgIIloiOj3j8,2037
|
| 255 |
+
huggingface_hub/inference/_providers/scaleway.py,sha256=Jy81kXWbXCHBpx6xmyzdEfXGSyhUfjKOLHuDSvhHWGo,1209
|
| 256 |
+
huggingface_hub/inference/_providers/together.py,sha256=KHF19CS3qXS7G1-CwcMiD8Z5wzPKEKi4F2DzqAthbBE,3439
|
| 257 |
+
huggingface_hub/inference/_providers/zai_org.py,sha256=plGzMZuLrChZvgpS3CCPqI6ImotZZxNLgfxnR7v6tw8,646
|
| 258 |
+
huggingface_hub/inference_api.py,sha256=b4-NhPSn9b44nYKV8tDKXodmE4JVdEymMWL4CVGkzlE,8323
|
| 259 |
+
huggingface_hub/keras_mixin.py,sha256=gDm8PBcTqYhfrEvhu1_ptxzxbVOF3h0wAArn90UyzRA,19547
|
| 260 |
+
huggingface_hub/lfs.py,sha256=v0mTThnULTmFv8MVWfrkQEwkiFXzWWx7xyp2VLf-EPo,17020
|
| 261 |
+
huggingface_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 262 |
+
huggingface_hub/repocard.py,sha256=8tmR7SYVQZ4iBFYCmOj0yl6Ohc9Vv136s-KQKkxBq7U,34865
|
| 263 |
+
huggingface_hub/repocard_data.py,sha256=hr4ReFpEQMNdh_9Dx-L-IJoI1ElHyk-h-8ZRqwVYYOE,34082
|
| 264 |
+
huggingface_hub/repository.py,sha256=axZcbAh4ufXEaMgPbrS1WWgvshd-mFvYnRZAZ_yYljQ,54541
|
| 265 |
+
huggingface_hub/serialization/__init__.py,sha256=kn-Fa-m4FzMnN8lNsF-SwFcfzug4CucexybGKyvZ8S0,1041
|
| 266 |
+
huggingface_hub/serialization/__pycache__/__init__.cpython-312.pyc,,
|
| 267 |
+
huggingface_hub/serialization/__pycache__/_base.cpython-312.pyc,,
|
| 268 |
+
huggingface_hub/serialization/__pycache__/_dduf.cpython-312.pyc,,
|
| 269 |
+
huggingface_hub/serialization/__pycache__/_tensorflow.cpython-312.pyc,,
|
| 270 |
+
huggingface_hub/serialization/__pycache__/_torch.cpython-312.pyc,,
|
| 271 |
+
huggingface_hub/serialization/_base.py,sha256=VGQ4Z9Abg2gsL_1rTGSS9p-3tkkG9eaERjlzBTLGkdU,8109
|
| 272 |
+
huggingface_hub/serialization/_dduf.py,sha256=s42239rLiHwaJE36QDEmS5GH7DSmQ__BffiHJO5RjIg,15424
|
| 273 |
+
huggingface_hub/serialization/_tensorflow.py,sha256=Ea3wN1bKgyb_9opj-FtH-WpIp0ptkovKimroZOudX5c,3608
|
| 274 |
+
huggingface_hub/serialization/_torch.py,sha256=dw3RMkr0CYAr_TwPG_rma-ueHBRTXpfEJtrVKAvvtN4,45143
|
| 275 |
+
huggingface_hub/templates/datasetcard_template.md,sha256=W-EMqR6wndbrnZorkVv56URWPG49l7MATGeI015kTvs,5503
|
| 276 |
+
huggingface_hub/templates/modelcard_template.md,sha256=4AqArS3cqdtbit5Bo-DhjcnDFR-pza5hErLLTPM4Yuc,6870
|
| 277 |
+
huggingface_hub/utils/__init__.py,sha256=ORfVkn5D0wuLIq12jjhTzn5_c4F8fRPxB7TG-iednuQ,3722
|
| 278 |
+
huggingface_hub/utils/__pycache__/__init__.cpython-312.pyc,,
|
| 279 |
+
huggingface_hub/utils/__pycache__/_auth.cpython-312.pyc,,
|
| 280 |
+
huggingface_hub/utils/__pycache__/_cache_assets.cpython-312.pyc,,
|
| 281 |
+
huggingface_hub/utils/__pycache__/_cache_manager.cpython-312.pyc,,
|
| 282 |
+
huggingface_hub/utils/__pycache__/_chunk_utils.cpython-312.pyc,,
|
| 283 |
+
huggingface_hub/utils/__pycache__/_datetime.cpython-312.pyc,,
|
| 284 |
+
huggingface_hub/utils/__pycache__/_deprecation.cpython-312.pyc,,
|
| 285 |
+
huggingface_hub/utils/__pycache__/_dotenv.cpython-312.pyc,,
|
| 286 |
+
huggingface_hub/utils/__pycache__/_experimental.cpython-312.pyc,,
|
| 287 |
+
huggingface_hub/utils/__pycache__/_fixes.cpython-312.pyc,,
|
| 288 |
+
huggingface_hub/utils/__pycache__/_git_credential.cpython-312.pyc,,
|
| 289 |
+
huggingface_hub/utils/__pycache__/_headers.cpython-312.pyc,,
|
| 290 |
+
huggingface_hub/utils/__pycache__/_hf_folder.cpython-312.pyc,,
|
| 291 |
+
huggingface_hub/utils/__pycache__/_http.cpython-312.pyc,,
|
| 292 |
+
huggingface_hub/utils/__pycache__/_lfs.cpython-312.pyc,,
|
| 293 |
+
huggingface_hub/utils/__pycache__/_pagination.cpython-312.pyc,,
|
| 294 |
+
huggingface_hub/utils/__pycache__/_paths.cpython-312.pyc,,
|
| 295 |
+
huggingface_hub/utils/__pycache__/_runtime.cpython-312.pyc,,
|
| 296 |
+
huggingface_hub/utils/__pycache__/_safetensors.cpython-312.pyc,,
|
| 297 |
+
huggingface_hub/utils/__pycache__/_subprocess.cpython-312.pyc,,
|
| 298 |
+
huggingface_hub/utils/__pycache__/_telemetry.cpython-312.pyc,,
|
| 299 |
+
huggingface_hub/utils/__pycache__/_typing.cpython-312.pyc,,
|
| 300 |
+
huggingface_hub/utils/__pycache__/_validators.cpython-312.pyc,,
|
| 301 |
+
huggingface_hub/utils/__pycache__/_xet.cpython-312.pyc,,
|
| 302 |
+
huggingface_hub/utils/__pycache__/_xet_progress_reporting.cpython-312.pyc,,
|
| 303 |
+
huggingface_hub/utils/__pycache__/endpoint_helpers.cpython-312.pyc,,
|
| 304 |
+
huggingface_hub/utils/__pycache__/insecure_hashlib.cpython-312.pyc,,
|
| 305 |
+
huggingface_hub/utils/__pycache__/logging.cpython-312.pyc,,
|
| 306 |
+
huggingface_hub/utils/__pycache__/sha.cpython-312.pyc,,
|
| 307 |
+
huggingface_hub/utils/__pycache__/tqdm.cpython-312.pyc,,
|
| 308 |
+
huggingface_hub/utils/_auth.py,sha256=Ixve2vxdftHXXk2R2vfyLzlVoDT39Tkq-Hrou9KCUvw,8286
|
| 309 |
+
huggingface_hub/utils/_cache_assets.py,sha256=kai77HPQMfYpROouMBQCr_gdBCaeTm996Sqj0dExbNg,5728
|
| 310 |
+
huggingface_hub/utils/_cache_manager.py,sha256=XbeYoZMj8_JCl6eqRviHO6DxGSS29r5Pj38xLlao96Y,34364
|
| 311 |
+
huggingface_hub/utils/_chunk_utils.py,sha256=MH7-6FwCDZ8noV6dGRytCOJGSfcZmDBvsvVotdI8TvQ,2109
|
| 312 |
+
huggingface_hub/utils/_datetime.py,sha256=kCS5jaKV25kOncX1xujbXsz5iDLcjLcLw85semGNzxQ,2770
|
| 313 |
+
huggingface_hub/utils/_deprecation.py,sha256=HZhRGGUX_QMKBBBwHHlffLtmCSK01TOpeXHefZbPfwI,4872
|
| 314 |
+
huggingface_hub/utils/_dotenv.py,sha256=RzHqC8HgzVxE-N4DFBcnemvX0NHmXcV0My2ASK0U1OQ,2017
|
| 315 |
+
huggingface_hub/utils/_experimental.py,sha256=3-c8irbn9sJr2CwWbzhGkIrdXKg8_x7BifhHFy32ei8,2470
|
| 316 |
+
huggingface_hub/utils/_fixes.py,sha256=xQV1QkUn2WpLqLjtXNiyn9gh-454K6AF-Q3kwkYAQD8,4437
|
| 317 |
+
huggingface_hub/utils/_git_credential.py,sha256=ao9rq-rVHn8lghSVZEjDAX4kIkNi7bayY361TDSgSpg,4619
|
| 318 |
+
huggingface_hub/utils/_headers.py,sha256=w4ayq4hLGaZ3B7nwdEi5Zu23SmmDuOwv58It78wkakk,8868
|
| 319 |
+
huggingface_hub/utils/_hf_folder.py,sha256=WNjTnu0Q7tqcSS9EsP4ssCJrrJMcCvAt8P_-LEtmOU8,2487
|
| 320 |
+
huggingface_hub/utils/_http.py,sha256=Cx8MxnXVvlOfg1w30RR03KcFSoIE0WjV1ZX2svwWmx4,25671
|
| 321 |
+
huggingface_hub/utils/_lfs.py,sha256=EC0Oz6Wiwl8foRNkUOzrETXzAWlbgpnpxo5a410ovFY,3957
|
| 322 |
+
huggingface_hub/utils/_pagination.py,sha256=EX5tRasSuQDaKbXuGYbInBK2odnSWNHgzw2tSgqeBRI,1906
|
| 323 |
+
huggingface_hub/utils/_paths.py,sha256=w1ZhFmmD5ykWjp_hAvhjtOoa2ZUcOXJrF4a6O3QpAWo,5042
|
| 324 |
+
huggingface_hub/utils/_runtime.py,sha256=L7SOYezdxKcwd4DovAY0UGY3qt27toXO-QjceIDwExk,11634
|
| 325 |
+
huggingface_hub/utils/_safetensors.py,sha256=GW3nyv7xQcuwObKYeYoT9VhURVzG1DZTbKBKho8Bbos,4458
|
| 326 |
+
huggingface_hub/utils/_subprocess.py,sha256=u9FFUDE7TrzQTiuEzlUnHx7S2P57GbYRV8u16GJwrFw,4625
|
| 327 |
+
huggingface_hub/utils/_telemetry.py,sha256=54LXeIJU5pEGghPAh06gqNAR-UoxOjVLvKqAQscwqZs,4890
|
| 328 |
+
huggingface_hub/utils/_typing.py,sha256=z-134-HG_qJc0cjdSXkmDm3vIRyF5aEfbZgJCB_Qp2Y,3628
|
| 329 |
+
huggingface_hub/utils/_validators.py,sha256=u8AacmA9xCCyer8efmzl1EpQUWTe3zVzsWSJSv3uxTU,9190
|
| 330 |
+
huggingface_hub/utils/_xet.py,sha256=f8qfk8YKePAeGUL6lQiQ1w_3bcs78oWwbeACYdUeg5k,7312
|
| 331 |
+
huggingface_hub/utils/_xet_progress_reporting.py,sha256=JK64hv8orABfNnk1_Wd0YyD_5FfeyVeBvelKpjaNIvs,6169
|
| 332 |
+
huggingface_hub/utils/endpoint_helpers.py,sha256=9VtIAlxQ5H_4y30sjCAgbu7XCqAtNLC7aRYxaNn0hLI,2366
|
| 333 |
+
huggingface_hub/utils/insecure_hashlib.py,sha256=iAaepavFZ5Dhfa5n8KozRfQprKmvcjSnt3X58OUl9fQ,1142
|
| 334 |
+
huggingface_hub/utils/logging.py,sha256=N6NXaCcbPbZSF-Oe-TY3ZnmkpmdFVyTOV8ASo-yVXLE,4916
|
| 335 |
+
huggingface_hub/utils/sha.py,sha256=OFnNGCba0sNcT2gUwaVCJnldxlltrHHe0DS_PCpV3C4,2134
|
| 336 |
+
huggingface_hub/utils/tqdm.py,sha256=xAKcyfnNHsZ7L09WuEM5Ew5-MDhiahLACbbN2zMmcLs,10671
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/REQUESTED
ADDED
|
File without changes
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: setuptools (79.0.1)
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|
| 5 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/entry_points.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
hf = huggingface_hub.cli.hf:main
|
| 3 |
+
huggingface-cli = huggingface_hub.commands.huggingface_cli:main
|
| 4 |
+
tiny-agents = huggingface_hub.inference._mcp.cli:app
|
| 5 |
+
|
| 6 |
+
[fsspec.specs]
|
| 7 |
+
hf = huggingface_hub.HfFileSystem
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
huggingface_hub
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/debug.pxi
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@cython.final
|
| 2 |
+
@cython.internal
|
| 3 |
+
cdef class _MemDebug:
|
| 4 |
+
"""Debugging support for the memory allocation in libxml2.
|
| 5 |
+
"""
|
| 6 |
+
def bytes_used(self):
|
| 7 |
+
"""bytes_used(self)
|
| 8 |
+
|
| 9 |
+
Returns the total amount of memory (in bytes) currently used by libxml2.
|
| 10 |
+
Note that libxml2 constrains this value to a C int, which limits
|
| 11 |
+
the accuracy on 64 bit systems.
|
| 12 |
+
"""
|
| 13 |
+
return tree.xmlMemUsed()
|
| 14 |
+
|
| 15 |
+
def blocks_used(self):
|
| 16 |
+
"""blocks_used(self)
|
| 17 |
+
|
| 18 |
+
Returns the total number of memory blocks currently allocated by libxml2.
|
| 19 |
+
Note that libxml2 constrains this value to a C int, which limits
|
| 20 |
+
the accuracy on 64 bit systems.
|
| 21 |
+
"""
|
| 22 |
+
return tree.xmlMemBlocks()
|
| 23 |
+
|
| 24 |
+
def dict_size(self):
|
| 25 |
+
"""dict_size(self)
|
| 26 |
+
|
| 27 |
+
Returns the current size of the global name dictionary used by libxml2
|
| 28 |
+
for the current thread. Each thread has its own dictionary.
|
| 29 |
+
"""
|
| 30 |
+
c_dict = __GLOBAL_PARSER_CONTEXT._getThreadDict(NULL)
|
| 31 |
+
if c_dict is NULL:
|
| 32 |
+
raise MemoryError()
|
| 33 |
+
return tree.xmlDictSize(c_dict)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
memory_debugger = _MemDebug()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/docloader.pxi
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Custom resolver API
|
| 2 |
+
|
| 3 |
+
ctypedef enum _InputDocumentDataType:
|
| 4 |
+
PARSER_DATA_INVALID
|
| 5 |
+
PARSER_DATA_EMPTY
|
| 6 |
+
PARSER_DATA_STRING
|
| 7 |
+
PARSER_DATA_FILENAME
|
| 8 |
+
PARSER_DATA_FILE
|
| 9 |
+
|
| 10 |
+
@cython.final
|
| 11 |
+
@cython.internal
|
| 12 |
+
cdef class _InputDocument:
|
| 13 |
+
cdef _InputDocumentDataType _type
|
| 14 |
+
cdef bytes _data_bytes
|
| 15 |
+
cdef object _filename
|
| 16 |
+
cdef object _file
|
| 17 |
+
cdef bint _close_file
|
| 18 |
+
|
| 19 |
+
def __cinit__(self):
|
| 20 |
+
self._type = PARSER_DATA_INVALID
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
cdef class Resolver:
|
| 24 |
+
"This is the base class of all resolvers."
|
| 25 |
+
def resolve(self, system_url, public_id, context):
|
| 26 |
+
"""resolve(self, system_url, public_id, context)
|
| 27 |
+
|
| 28 |
+
Override this method to resolve an external source by
|
| 29 |
+
``system_url`` and ``public_id``. The third argument is an
|
| 30 |
+
opaque context object.
|
| 31 |
+
|
| 32 |
+
Return the result of one of the ``resolve_*()`` methods.
|
| 33 |
+
"""
|
| 34 |
+
return None
|
| 35 |
+
|
| 36 |
+
def resolve_empty(self, context):
|
| 37 |
+
"""resolve_empty(self, context)
|
| 38 |
+
|
| 39 |
+
Return an empty input document.
|
| 40 |
+
|
| 41 |
+
Pass context as parameter.
|
| 42 |
+
"""
|
| 43 |
+
cdef _InputDocument doc_ref
|
| 44 |
+
doc_ref = _InputDocument()
|
| 45 |
+
doc_ref._type = PARSER_DATA_EMPTY
|
| 46 |
+
return doc_ref
|
| 47 |
+
|
| 48 |
+
def resolve_string(self, string, context, *, base_url=None):
|
| 49 |
+
"""resolve_string(self, string, context, base_url=None)
|
| 50 |
+
|
| 51 |
+
Return a parsable string as input document.
|
| 52 |
+
|
| 53 |
+
Pass data string and context as parameters. You can pass the
|
| 54 |
+
source URL or filename through the ``base_url`` keyword
|
| 55 |
+
argument.
|
| 56 |
+
"""
|
| 57 |
+
cdef _InputDocument doc_ref
|
| 58 |
+
if isinstance(string, unicode):
|
| 59 |
+
string = (<unicode>string).encode('utf8')
|
| 60 |
+
elif not isinstance(string, bytes):
|
| 61 |
+
raise TypeError, "argument must be a byte string or unicode string"
|
| 62 |
+
doc_ref = _InputDocument()
|
| 63 |
+
doc_ref._type = PARSER_DATA_STRING
|
| 64 |
+
doc_ref._data_bytes = string
|
| 65 |
+
if base_url is not None:
|
| 66 |
+
doc_ref._filename = _encodeFilename(base_url)
|
| 67 |
+
return doc_ref
|
| 68 |
+
|
| 69 |
+
def resolve_filename(self, filename, context):
|
| 70 |
+
"""resolve_filename(self, filename, context)
|
| 71 |
+
|
| 72 |
+
Return the name of a parsable file as input document.
|
| 73 |
+
|
| 74 |
+
Pass filename and context as parameters. You can also pass a
|
| 75 |
+
URL with an HTTP, FTP or file target.
|
| 76 |
+
"""
|
| 77 |
+
cdef _InputDocument doc_ref
|
| 78 |
+
doc_ref = _InputDocument()
|
| 79 |
+
doc_ref._type = PARSER_DATA_FILENAME
|
| 80 |
+
doc_ref._filename = _encodeFilename(filename)
|
| 81 |
+
return doc_ref
|
| 82 |
+
|
| 83 |
+
def resolve_file(self, f, context, *, base_url=None, bint close=True):
|
| 84 |
+
"""resolve_file(self, f, context, base_url=None, close=True)
|
| 85 |
+
|
| 86 |
+
Return an open file-like object as input document.
|
| 87 |
+
|
| 88 |
+
Pass open file and context as parameters. You can pass the
|
| 89 |
+
base URL or filename of the file through the ``base_url``
|
| 90 |
+
keyword argument. If the ``close`` flag is True (the
|
| 91 |
+
default), the file will be closed after reading.
|
| 92 |
+
|
| 93 |
+
Note that using ``.resolve_filename()`` is more efficient,
|
| 94 |
+
especially in threaded environments.
|
| 95 |
+
"""
|
| 96 |
+
cdef _InputDocument doc_ref
|
| 97 |
+
try:
|
| 98 |
+
f.read
|
| 99 |
+
except AttributeError:
|
| 100 |
+
raise TypeError, "Argument is not a file-like object"
|
| 101 |
+
doc_ref = _InputDocument()
|
| 102 |
+
doc_ref._type = PARSER_DATA_FILE
|
| 103 |
+
if base_url is not None:
|
| 104 |
+
doc_ref._filename = _encodeFilename(base_url)
|
| 105 |
+
else:
|
| 106 |
+
doc_ref._filename = _getFilenameForFile(f)
|
| 107 |
+
doc_ref._close_file = close
|
| 108 |
+
doc_ref._file = f
|
| 109 |
+
return doc_ref
|
| 110 |
+
|
| 111 |
+
@cython.final
|
| 112 |
+
@cython.internal
|
| 113 |
+
cdef class _ResolverRegistry:
|
| 114 |
+
cdef object _resolvers
|
| 115 |
+
cdef Resolver _default_resolver
|
| 116 |
+
def __cinit__(self, Resolver default_resolver=None):
|
| 117 |
+
self._resolvers = set()
|
| 118 |
+
self._default_resolver = default_resolver
|
| 119 |
+
|
| 120 |
+
def add(self, Resolver resolver not None):
|
| 121 |
+
"""add(self, resolver)
|
| 122 |
+
|
| 123 |
+
Register a resolver.
|
| 124 |
+
|
| 125 |
+
For each requested entity, the 'resolve' method of the resolver will
|
| 126 |
+
be called and the result will be passed to the parser. If this method
|
| 127 |
+
returns None, the request will be delegated to other resolvers or the
|
| 128 |
+
default resolver. The resolvers will be tested in an arbitrary order
|
| 129 |
+
until the first match is found.
|
| 130 |
+
"""
|
| 131 |
+
self._resolvers.add(resolver)
|
| 132 |
+
|
| 133 |
+
def remove(self, resolver):
|
| 134 |
+
"remove(self, resolver)"
|
| 135 |
+
self._resolvers.discard(resolver)
|
| 136 |
+
|
| 137 |
+
cdef _ResolverRegistry _copy(self):
|
| 138 |
+
cdef _ResolverRegistry registry
|
| 139 |
+
registry = _ResolverRegistry(self._default_resolver)
|
| 140 |
+
registry._resolvers = self._resolvers.copy()
|
| 141 |
+
return registry
|
| 142 |
+
|
| 143 |
+
def copy(self):
|
| 144 |
+
"copy(self)"
|
| 145 |
+
return self._copy()
|
| 146 |
+
|
| 147 |
+
def resolve(self, system_url, public_id, context):
|
| 148 |
+
"resolve(self, system_url, public_id, context)"
|
| 149 |
+
for resolver in self._resolvers:
|
| 150 |
+
result = resolver.resolve(system_url, public_id, context)
|
| 151 |
+
if result is not None:
|
| 152 |
+
return result
|
| 153 |
+
if self._default_resolver is None:
|
| 154 |
+
return None
|
| 155 |
+
return self._default_resolver.resolve(system_url, public_id, context)
|
| 156 |
+
|
| 157 |
+
def __repr__(self):
|
| 158 |
+
return repr(self._resolvers)
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
@cython.internal
|
| 162 |
+
cdef class _ResolverContext(_ExceptionContext):
|
| 163 |
+
cdef _ResolverRegistry _resolvers
|
| 164 |
+
cdef _TempStore _storage
|
| 165 |
+
|
| 166 |
+
cdef int clear(self) except -1:
|
| 167 |
+
_ExceptionContext.clear(self)
|
| 168 |
+
self._storage.clear()
|
| 169 |
+
return 0
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
cdef _initResolverContext(_ResolverContext context,
|
| 173 |
+
_ResolverRegistry resolvers):
|
| 174 |
+
if resolvers is None:
|
| 175 |
+
context._resolvers = _ResolverRegistry()
|
| 176 |
+
else:
|
| 177 |
+
context._resolvers = resolvers
|
| 178 |
+
context._storage = _TempStore()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/dtd.pxi
ADDED
|
@@ -0,0 +1,479 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# support for DTD validation
|
| 2 |
+
from lxml.includes cimport dtdvalid
|
| 3 |
+
|
| 4 |
+
cdef class DTDError(LxmlError):
|
| 5 |
+
"""Base class for DTD errors.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
cdef class DTDParseError(DTDError):
|
| 9 |
+
"""Error while parsing a DTD.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
cdef class DTDValidateError(DTDError):
|
| 13 |
+
"""Error while validating an XML document with a DTD.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
cdef inline int _assertValidDTDNode(node, void *c_node) except -1:
|
| 18 |
+
assert c_node is not NULL, "invalid DTD proxy at %s" % id(node)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@cython.final
|
| 22 |
+
@cython.internal
|
| 23 |
+
@cython.freelist(8)
|
| 24 |
+
cdef class _DTDElementContentDecl:
|
| 25 |
+
cdef DTD _dtd
|
| 26 |
+
cdef tree.xmlElementContent* _c_node
|
| 27 |
+
|
| 28 |
+
def __repr__(self):
|
| 29 |
+
return "<%s.%s object name=%r type=%r occur=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.type, self.occur, id(self))
|
| 30 |
+
|
| 31 |
+
@property
|
| 32 |
+
def name(self):
|
| 33 |
+
_assertValidDTDNode(self, self._c_node)
|
| 34 |
+
return funicodeOrNone(self._c_node.name)
|
| 35 |
+
|
| 36 |
+
@property
|
| 37 |
+
def type(self):
|
| 38 |
+
_assertValidDTDNode(self, self._c_node)
|
| 39 |
+
cdef int type = self._c_node.type
|
| 40 |
+
if type == tree.XML_ELEMENT_CONTENT_PCDATA:
|
| 41 |
+
return "pcdata"
|
| 42 |
+
elif type == tree.XML_ELEMENT_CONTENT_ELEMENT:
|
| 43 |
+
return "element"
|
| 44 |
+
elif type == tree.XML_ELEMENT_CONTENT_SEQ:
|
| 45 |
+
return "seq"
|
| 46 |
+
elif type == tree.XML_ELEMENT_CONTENT_OR:
|
| 47 |
+
return "or"
|
| 48 |
+
else:
|
| 49 |
+
return None
|
| 50 |
+
|
| 51 |
+
@property
|
| 52 |
+
def occur(self):
|
| 53 |
+
_assertValidDTDNode(self, self._c_node)
|
| 54 |
+
cdef int occur = self._c_node.ocur
|
| 55 |
+
if occur == tree.XML_ELEMENT_CONTENT_ONCE:
|
| 56 |
+
return "once"
|
| 57 |
+
elif occur == tree.XML_ELEMENT_CONTENT_OPT:
|
| 58 |
+
return "opt"
|
| 59 |
+
elif occur == tree.XML_ELEMENT_CONTENT_MULT:
|
| 60 |
+
return "mult"
|
| 61 |
+
elif occur == tree.XML_ELEMENT_CONTENT_PLUS:
|
| 62 |
+
return "plus"
|
| 63 |
+
else:
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
@property
|
| 67 |
+
def left(self):
|
| 68 |
+
_assertValidDTDNode(self, self._c_node)
|
| 69 |
+
c1 = self._c_node.c1
|
| 70 |
+
if c1:
|
| 71 |
+
node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
|
| 72 |
+
node._dtd = self._dtd
|
| 73 |
+
node._c_node = <tree.xmlElementContent*>c1
|
| 74 |
+
return node
|
| 75 |
+
else:
|
| 76 |
+
return None
|
| 77 |
+
|
| 78 |
+
@property
|
| 79 |
+
def right(self):
|
| 80 |
+
_assertValidDTDNode(self, self._c_node)
|
| 81 |
+
c2 = self._c_node.c2
|
| 82 |
+
if c2:
|
| 83 |
+
node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
|
| 84 |
+
node._dtd = self._dtd
|
| 85 |
+
node._c_node = <tree.xmlElementContent*>c2
|
| 86 |
+
return node
|
| 87 |
+
else:
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
@cython.final
|
| 92 |
+
@cython.internal
|
| 93 |
+
@cython.freelist(8)
|
| 94 |
+
cdef class _DTDAttributeDecl:
|
| 95 |
+
cdef DTD _dtd
|
| 96 |
+
cdef tree.xmlAttribute* _c_node
|
| 97 |
+
|
| 98 |
+
def __repr__(self):
|
| 99 |
+
return "<%s.%s object name=%r elemname=%r prefix=%r type=%r default=%r default_value=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.elemname, self.prefix, self.type, self.default, self.default_value, id(self))
|
| 100 |
+
|
| 101 |
+
@property
|
| 102 |
+
def name(self):
|
| 103 |
+
_assertValidDTDNode(self, self._c_node)
|
| 104 |
+
return funicodeOrNone(self._c_node.name)
|
| 105 |
+
|
| 106 |
+
@property
|
| 107 |
+
def elemname(self):
|
| 108 |
+
_assertValidDTDNode(self, self._c_node)
|
| 109 |
+
return funicodeOrNone(self._c_node.elem)
|
| 110 |
+
|
| 111 |
+
@property
|
| 112 |
+
def prefix(self):
|
| 113 |
+
_assertValidDTDNode(self, self._c_node)
|
| 114 |
+
return funicodeOrNone(self._c_node.prefix)
|
| 115 |
+
|
| 116 |
+
@property
|
| 117 |
+
def type(self):
|
| 118 |
+
_assertValidDTDNode(self, self._c_node)
|
| 119 |
+
cdef int type = self._c_node.atype
|
| 120 |
+
if type == tree.XML_ATTRIBUTE_CDATA:
|
| 121 |
+
return "cdata"
|
| 122 |
+
elif type == tree.XML_ATTRIBUTE_ID:
|
| 123 |
+
return "id"
|
| 124 |
+
elif type == tree.XML_ATTRIBUTE_IDREF:
|
| 125 |
+
return "idref"
|
| 126 |
+
elif type == tree.XML_ATTRIBUTE_IDREFS:
|
| 127 |
+
return "idrefs"
|
| 128 |
+
elif type == tree.XML_ATTRIBUTE_ENTITY:
|
| 129 |
+
return "entity"
|
| 130 |
+
elif type == tree.XML_ATTRIBUTE_ENTITIES:
|
| 131 |
+
return "entities"
|
| 132 |
+
elif type == tree.XML_ATTRIBUTE_NMTOKEN:
|
| 133 |
+
return "nmtoken"
|
| 134 |
+
elif type == tree.XML_ATTRIBUTE_NMTOKENS:
|
| 135 |
+
return "nmtokens"
|
| 136 |
+
elif type == tree.XML_ATTRIBUTE_ENUMERATION:
|
| 137 |
+
return "enumeration"
|
| 138 |
+
elif type == tree.XML_ATTRIBUTE_NOTATION:
|
| 139 |
+
return "notation"
|
| 140 |
+
else:
|
| 141 |
+
return None
|
| 142 |
+
|
| 143 |
+
@property
|
| 144 |
+
def default(self):
|
| 145 |
+
_assertValidDTDNode(self, self._c_node)
|
| 146 |
+
cdef int default = self._c_node.def_
|
| 147 |
+
if default == tree.XML_ATTRIBUTE_NONE:
|
| 148 |
+
return "none"
|
| 149 |
+
elif default == tree.XML_ATTRIBUTE_REQUIRED:
|
| 150 |
+
return "required"
|
| 151 |
+
elif default == tree.XML_ATTRIBUTE_IMPLIED:
|
| 152 |
+
return "implied"
|
| 153 |
+
elif default == tree.XML_ATTRIBUTE_FIXED:
|
| 154 |
+
return "fixed"
|
| 155 |
+
else:
|
| 156 |
+
return None
|
| 157 |
+
|
| 158 |
+
@property
|
| 159 |
+
def default_value(self):
|
| 160 |
+
_assertValidDTDNode(self, self._c_node)
|
| 161 |
+
return funicodeOrNone(self._c_node.defaultValue)
|
| 162 |
+
|
| 163 |
+
def itervalues(self):
|
| 164 |
+
_assertValidDTDNode(self, self._c_node)
|
| 165 |
+
cdef tree.xmlEnumeration *c_node = self._c_node.tree
|
| 166 |
+
while c_node is not NULL:
|
| 167 |
+
yield funicode(c_node.name)
|
| 168 |
+
c_node = c_node.next
|
| 169 |
+
|
| 170 |
+
def values(self):
|
| 171 |
+
return list(self.itervalues())
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
@cython.final
|
| 175 |
+
@cython.internal
|
| 176 |
+
@cython.freelist(8)
|
| 177 |
+
cdef class _DTDElementDecl:
|
| 178 |
+
cdef DTD _dtd
|
| 179 |
+
cdef tree.xmlElement* _c_node
|
| 180 |
+
|
| 181 |
+
def __repr__(self):
|
| 182 |
+
return "<%s.%s object name=%r prefix=%r type=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.prefix, self.type, id(self))
|
| 183 |
+
|
| 184 |
+
@property
|
| 185 |
+
def name(self):
|
| 186 |
+
_assertValidDTDNode(self, self._c_node)
|
| 187 |
+
return funicodeOrNone(self._c_node.name)
|
| 188 |
+
|
| 189 |
+
@property
|
| 190 |
+
def prefix(self):
|
| 191 |
+
_assertValidDTDNode(self, self._c_node)
|
| 192 |
+
return funicodeOrNone(self._c_node.prefix)
|
| 193 |
+
|
| 194 |
+
@property
|
| 195 |
+
def type(self):
|
| 196 |
+
_assertValidDTDNode(self, self._c_node)
|
| 197 |
+
cdef int type = self._c_node.etype
|
| 198 |
+
if type == tree.XML_ELEMENT_TYPE_UNDEFINED:
|
| 199 |
+
return "undefined"
|
| 200 |
+
elif type == tree.XML_ELEMENT_TYPE_EMPTY:
|
| 201 |
+
return "empty"
|
| 202 |
+
elif type == tree.XML_ELEMENT_TYPE_ANY:
|
| 203 |
+
return "any"
|
| 204 |
+
elif type == tree.XML_ELEMENT_TYPE_MIXED:
|
| 205 |
+
return "mixed"
|
| 206 |
+
elif type == tree.XML_ELEMENT_TYPE_ELEMENT:
|
| 207 |
+
return "element"
|
| 208 |
+
else:
|
| 209 |
+
return None
|
| 210 |
+
|
| 211 |
+
@property
|
| 212 |
+
def content(self):
|
| 213 |
+
_assertValidDTDNode(self, self._c_node)
|
| 214 |
+
cdef tree.xmlElementContent *content = self._c_node.content
|
| 215 |
+
if content:
|
| 216 |
+
node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
|
| 217 |
+
node._dtd = self._dtd
|
| 218 |
+
node._c_node = content
|
| 219 |
+
return node
|
| 220 |
+
else:
|
| 221 |
+
return None
|
| 222 |
+
|
| 223 |
+
def iterattributes(self):
|
| 224 |
+
_assertValidDTDNode(self, self._c_node)
|
| 225 |
+
cdef tree.xmlAttribute *c_node = self._c_node.attributes
|
| 226 |
+
while c_node:
|
| 227 |
+
node = <_DTDAttributeDecl>_DTDAttributeDecl.__new__(_DTDAttributeDecl)
|
| 228 |
+
node._dtd = self._dtd
|
| 229 |
+
node._c_node = c_node
|
| 230 |
+
yield node
|
| 231 |
+
c_node = c_node.nexth
|
| 232 |
+
|
| 233 |
+
def attributes(self):
|
| 234 |
+
return list(self.iterattributes())
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
@cython.final
|
| 238 |
+
@cython.internal
|
| 239 |
+
@cython.freelist(8)
|
| 240 |
+
cdef class _DTDEntityDecl:
|
| 241 |
+
cdef DTD _dtd
|
| 242 |
+
cdef tree.xmlEntity* _c_node
|
| 243 |
+
def __repr__(self):
|
| 244 |
+
return "<%s.%s object name=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
|
| 245 |
+
|
| 246 |
+
@property
|
| 247 |
+
def name(self):
|
| 248 |
+
_assertValidDTDNode(self, self._c_node)
|
| 249 |
+
return funicodeOrNone(self._c_node.name)
|
| 250 |
+
|
| 251 |
+
@property
|
| 252 |
+
def orig(self):
|
| 253 |
+
_assertValidDTDNode(self, self._c_node)
|
| 254 |
+
return funicodeOrNone(self._c_node.orig)
|
| 255 |
+
|
| 256 |
+
@property
|
| 257 |
+
def content(self):
|
| 258 |
+
_assertValidDTDNode(self, self._c_node)
|
| 259 |
+
return funicodeOrNone(self._c_node.content)
|
| 260 |
+
|
| 261 |
+
@property
|
| 262 |
+
def system_url(self):
|
| 263 |
+
_assertValidDTDNode(self, self._c_node)
|
| 264 |
+
return funicodeOrNone(self._c_node.SystemID)
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
################################################################################
|
| 268 |
+
# DTD
|
| 269 |
+
|
| 270 |
+
cdef class DTD(_Validator):
|
| 271 |
+
"""DTD(self, file=None, external_id=None)
|
| 272 |
+
A DTD validator.
|
| 273 |
+
|
| 274 |
+
Can load from filesystem directly given a filename or file-like object.
|
| 275 |
+
Alternatively, pass the keyword parameter ``external_id`` to load from a
|
| 276 |
+
catalog.
|
| 277 |
+
"""
|
| 278 |
+
cdef tree.xmlDtd* _c_dtd
|
| 279 |
+
def __init__(self, file=None, *, external_id=None):
|
| 280 |
+
_Validator.__init__(self)
|
| 281 |
+
if file is not None:
|
| 282 |
+
file = _getFSPathOrObject(file)
|
| 283 |
+
if _isString(file):
|
| 284 |
+
file = _encodeFilename(file)
|
| 285 |
+
with self._error_log:
|
| 286 |
+
orig_loader = _register_document_loader()
|
| 287 |
+
self._c_dtd = xmlparser.xmlParseDTD(NULL, _xcstr(file))
|
| 288 |
+
_reset_document_loader(orig_loader)
|
| 289 |
+
elif hasattr(file, 'read'):
|
| 290 |
+
orig_loader = _register_document_loader()
|
| 291 |
+
self._c_dtd = _parseDtdFromFilelike(file)
|
| 292 |
+
_reset_document_loader(orig_loader)
|
| 293 |
+
else:
|
| 294 |
+
raise DTDParseError, "file must be a filename, file-like or path-like object"
|
| 295 |
+
elif external_id is not None:
|
| 296 |
+
external_id_utf = _utf8(external_id)
|
| 297 |
+
with self._error_log:
|
| 298 |
+
orig_loader = _register_document_loader()
|
| 299 |
+
self._c_dtd = xmlparser.xmlParseDTD(<const_xmlChar*>external_id_utf, NULL)
|
| 300 |
+
_reset_document_loader(orig_loader)
|
| 301 |
+
else:
|
| 302 |
+
raise DTDParseError, "either filename or external ID required"
|
| 303 |
+
|
| 304 |
+
if self._c_dtd is NULL:
|
| 305 |
+
raise DTDParseError(
|
| 306 |
+
self._error_log._buildExceptionMessage("error parsing DTD"),
|
| 307 |
+
self._error_log)
|
| 308 |
+
|
| 309 |
+
@property
|
| 310 |
+
def name(self):
|
| 311 |
+
if self._c_dtd is NULL:
|
| 312 |
+
return None
|
| 313 |
+
return funicodeOrNone(self._c_dtd.name)
|
| 314 |
+
|
| 315 |
+
@property
|
| 316 |
+
def external_id(self):
|
| 317 |
+
if self._c_dtd is NULL:
|
| 318 |
+
return None
|
| 319 |
+
return funicodeOrNone(self._c_dtd.ExternalID)
|
| 320 |
+
|
| 321 |
+
@property
|
| 322 |
+
def system_url(self):
|
| 323 |
+
if self._c_dtd is NULL:
|
| 324 |
+
return None
|
| 325 |
+
return funicodeOrNone(self._c_dtd.SystemID)
|
| 326 |
+
|
| 327 |
+
def iterelements(self):
|
| 328 |
+
cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
|
| 329 |
+
while c_node is not NULL:
|
| 330 |
+
if c_node.type == tree.XML_ELEMENT_DECL:
|
| 331 |
+
node = _DTDElementDecl()
|
| 332 |
+
node._dtd = self
|
| 333 |
+
node._c_node = <tree.xmlElement*>c_node
|
| 334 |
+
yield node
|
| 335 |
+
c_node = c_node.next
|
| 336 |
+
|
| 337 |
+
def elements(self):
|
| 338 |
+
return list(self.iterelements())
|
| 339 |
+
|
| 340 |
+
def iterentities(self):
|
| 341 |
+
cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
|
| 342 |
+
while c_node is not NULL:
|
| 343 |
+
if c_node.type == tree.XML_ENTITY_DECL:
|
| 344 |
+
node = _DTDEntityDecl()
|
| 345 |
+
node._dtd = self
|
| 346 |
+
node._c_node = <tree.xmlEntity*>c_node
|
| 347 |
+
yield node
|
| 348 |
+
c_node = c_node.next
|
| 349 |
+
|
| 350 |
+
def entities(self):
|
| 351 |
+
return list(self.iterentities())
|
| 352 |
+
|
| 353 |
+
def __dealloc__(self):
|
| 354 |
+
tree.xmlFreeDtd(self._c_dtd)
|
| 355 |
+
|
| 356 |
+
def __call__(self, etree):
|
| 357 |
+
"""__call__(self, etree)
|
| 358 |
+
|
| 359 |
+
Validate doc using the DTD.
|
| 360 |
+
|
| 361 |
+
Returns true if the document is valid, false if not.
|
| 362 |
+
"""
|
| 363 |
+
cdef _Document doc
|
| 364 |
+
cdef _Element root_node
|
| 365 |
+
cdef xmlDoc* c_doc
|
| 366 |
+
cdef dtdvalid.xmlValidCtxt* valid_ctxt
|
| 367 |
+
cdef int ret = -1
|
| 368 |
+
|
| 369 |
+
assert self._c_dtd is not NULL, "DTD not initialised"
|
| 370 |
+
doc = _documentOrRaise(etree)
|
| 371 |
+
root_node = _rootNodeOrRaise(etree)
|
| 372 |
+
|
| 373 |
+
valid_ctxt = dtdvalid.xmlNewValidCtxt()
|
| 374 |
+
if valid_ctxt is NULL:
|
| 375 |
+
raise DTDError("Failed to create validation context")
|
| 376 |
+
|
| 377 |
+
# work around error reporting bug in libxml2 <= 2.9.1 (and later?)
|
| 378 |
+
# https://bugzilla.gnome.org/show_bug.cgi?id=724903
|
| 379 |
+
valid_ctxt.error = <dtdvalid.xmlValidityErrorFunc>_nullGenericErrorFunc
|
| 380 |
+
valid_ctxt.userData = NULL
|
| 381 |
+
|
| 382 |
+
try:
|
| 383 |
+
with self._error_log:
|
| 384 |
+
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
|
| 385 |
+
ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
|
| 386 |
+
_destroyFakeDoc(doc._c_doc, c_doc)
|
| 387 |
+
finally:
|
| 388 |
+
dtdvalid.xmlFreeValidCtxt(valid_ctxt)
|
| 389 |
+
|
| 390 |
+
if ret == -1:
|
| 391 |
+
raise DTDValidateError("Internal error in DTD validation",
|
| 392 |
+
self._error_log)
|
| 393 |
+
return ret == 1
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL:
|
| 397 |
+
cdef _ExceptionContext exc_context
|
| 398 |
+
cdef _FileReaderContext dtd_parser
|
| 399 |
+
cdef _ErrorLog error_log
|
| 400 |
+
cdef tree.xmlDtd* c_dtd = NULL
|
| 401 |
+
exc_context = _ExceptionContext()
|
| 402 |
+
dtd_parser = _FileReaderContext(file, exc_context, None)
|
| 403 |
+
error_log = _ErrorLog()
|
| 404 |
+
|
| 405 |
+
with error_log:
|
| 406 |
+
c_dtd = dtd_parser._readDtd()
|
| 407 |
+
|
| 408 |
+
exc_context._raise_if_stored()
|
| 409 |
+
if c_dtd is NULL:
|
| 410 |
+
raise DTDParseError("error parsing DTD", error_log)
|
| 411 |
+
return c_dtd
|
| 412 |
+
|
| 413 |
+
cdef DTD _dtdFactory(tree.xmlDtd* c_dtd):
|
| 414 |
+
# do not run through DTD.__init__()!
|
| 415 |
+
cdef DTD dtd
|
| 416 |
+
if c_dtd is NULL:
|
| 417 |
+
return None
|
| 418 |
+
dtd = DTD.__new__(DTD)
|
| 419 |
+
dtd._c_dtd = _copyDtd(c_dtd)
|
| 420 |
+
_Validator.__init__(dtd)
|
| 421 |
+
return dtd
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
cdef tree.xmlDtd* _copyDtd(tree.xmlDtd* c_orig_dtd) except NULL:
|
| 425 |
+
"""
|
| 426 |
+
Copy a DTD. libxml2 (currently) fails to set up the element->attributes
|
| 427 |
+
links when copying DTDs, so we have to rebuild them here.
|
| 428 |
+
"""
|
| 429 |
+
c_dtd = tree.xmlCopyDtd(c_orig_dtd)
|
| 430 |
+
if not c_dtd:
|
| 431 |
+
raise MemoryError
|
| 432 |
+
cdef tree.xmlNode* c_node = c_dtd.children
|
| 433 |
+
while c_node:
|
| 434 |
+
if c_node.type == tree.XML_ATTRIBUTE_DECL:
|
| 435 |
+
_linkDtdAttribute(c_dtd, <tree.xmlAttribute*>c_node)
|
| 436 |
+
c_node = c_node.next
|
| 437 |
+
return c_dtd
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
cdef void _linkDtdAttribute(tree.xmlDtd* c_dtd, tree.xmlAttribute* c_attr) noexcept:
|
| 441 |
+
"""
|
| 442 |
+
Create the link to the DTD attribute declaration from the corresponding
|
| 443 |
+
element declaration.
|
| 444 |
+
"""
|
| 445 |
+
c_elem = dtdvalid.xmlGetDtdElementDesc(c_dtd, c_attr.elem)
|
| 446 |
+
if not c_elem:
|
| 447 |
+
# no such element? something is wrong with the DTD ...
|
| 448 |
+
return
|
| 449 |
+
c_pos = c_elem.attributes
|
| 450 |
+
if not c_pos:
|
| 451 |
+
c_elem.attributes = c_attr
|
| 452 |
+
c_attr.nexth = NULL
|
| 453 |
+
return
|
| 454 |
+
# libxml2 keeps namespace declarations first, and we need to make
|
| 455 |
+
# sure we don't re-insert attributes that are already there
|
| 456 |
+
if _isDtdNsDecl(c_attr):
|
| 457 |
+
if not _isDtdNsDecl(c_pos):
|
| 458 |
+
c_elem.attributes = c_attr
|
| 459 |
+
c_attr.nexth = c_pos
|
| 460 |
+
return
|
| 461 |
+
while c_pos != c_attr and c_pos.nexth and _isDtdNsDecl(c_pos.nexth):
|
| 462 |
+
c_pos = c_pos.nexth
|
| 463 |
+
else:
|
| 464 |
+
# append at end
|
| 465 |
+
while c_pos != c_attr and c_pos.nexth:
|
| 466 |
+
c_pos = c_pos.nexth
|
| 467 |
+
if c_pos == c_attr:
|
| 468 |
+
return
|
| 469 |
+
c_attr.nexth = c_pos.nexth
|
| 470 |
+
c_pos.nexth = c_attr
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
cdef bint _isDtdNsDecl(tree.xmlAttribute* c_attr) noexcept:
|
| 474 |
+
if cstring_h.strcmp(<const_char*>c_attr.name, "xmlns") == 0:
|
| 475 |
+
return True
|
| 476 |
+
if (c_attr.prefix is not NULL and
|
| 477 |
+
cstring_h.strcmp(<const_char*>c_attr.prefix, "xmlns") == 0):
|
| 478 |
+
return True
|
| 479 |
+
return False
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/iterparse.pxi
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# iterparse -- event-driven parsing
|
| 2 |
+
|
| 3 |
+
DEF __ITERPARSE_CHUNK_SIZE = 32768
|
| 4 |
+
|
| 5 |
+
cdef class iterparse:
|
| 6 |
+
"""iterparse(self, source, events=("end",), tag=None, \
|
| 7 |
+
attribute_defaults=False, dtd_validation=False, \
|
| 8 |
+
load_dtd=False, no_network=True, remove_blank_text=False, \
|
| 9 |
+
remove_comments=False, remove_pis=False, encoding=None, \
|
| 10 |
+
html=False, recover=None, huge_tree=False, schema=None)
|
| 11 |
+
|
| 12 |
+
Incremental parser.
|
| 13 |
+
|
| 14 |
+
Parses XML into a tree and generates tuples (event, element) in a
|
| 15 |
+
SAX-like fashion. ``event`` is any of 'start', 'end', 'start-ns',
|
| 16 |
+
'end-ns'.
|
| 17 |
+
|
| 18 |
+
For 'start' and 'end', ``element`` is the Element that the parser just
|
| 19 |
+
found opening or closing. For 'start-ns', it is a tuple (prefix, URI) of
|
| 20 |
+
a new namespace declaration. For 'end-ns', it is simply None. Note that
|
| 21 |
+
all start and end events are guaranteed to be properly nested.
|
| 22 |
+
|
| 23 |
+
The keyword argument ``events`` specifies a sequence of event type names
|
| 24 |
+
that should be generated. By default, only 'end' events will be
|
| 25 |
+
generated.
|
| 26 |
+
|
| 27 |
+
The additional ``tag`` argument restricts the 'start' and 'end' events to
|
| 28 |
+
those elements that match the given tag. The ``tag`` argument can also be
|
| 29 |
+
a sequence of tags to allow matching more than one tag. By default,
|
| 30 |
+
events are generated for all elements. Note that the 'start-ns' and
|
| 31 |
+
'end-ns' events are not impacted by this restriction.
|
| 32 |
+
|
| 33 |
+
The other keyword arguments in the constructor are mainly based on the
|
| 34 |
+
libxml2 parser configuration. A DTD will also be loaded if validation or
|
| 35 |
+
attribute default values are requested.
|
| 36 |
+
|
| 37 |
+
Available boolean keyword arguments:
|
| 38 |
+
- attribute_defaults: read default attributes from DTD
|
| 39 |
+
- dtd_validation: validate (if DTD is available)
|
| 40 |
+
- load_dtd: use DTD for parsing
|
| 41 |
+
- no_network: prevent network access for related files
|
| 42 |
+
- remove_blank_text: discard blank text nodes
|
| 43 |
+
- remove_comments: discard comments
|
| 44 |
+
- remove_pis: discard processing instructions
|
| 45 |
+
- strip_cdata: replace CDATA sections by normal text content (default:
|
| 46 |
+
True for XML, ignored otherwise)
|
| 47 |
+
- compact: safe memory for short text content (default: True)
|
| 48 |
+
- resolve_entities: replace entities by their text value (default: True)
|
| 49 |
+
- huge_tree: disable security restrictions and support very deep trees
|
| 50 |
+
and very long text content (only affects libxml2 2.7+)
|
| 51 |
+
- html: parse input as HTML (default: XML)
|
| 52 |
+
- recover: try hard to parse through broken input (default: True for HTML,
|
| 53 |
+
False otherwise)
|
| 54 |
+
|
| 55 |
+
Other keyword arguments:
|
| 56 |
+
- encoding: override the document encoding
|
| 57 |
+
- schema: an XMLSchema to validate against
|
| 58 |
+
"""
|
| 59 |
+
cdef _FeedParser _parser
|
| 60 |
+
cdef object _tag
|
| 61 |
+
cdef object _events
|
| 62 |
+
cdef readonly object root
|
| 63 |
+
cdef object _source
|
| 64 |
+
cdef object _filename
|
| 65 |
+
cdef object _error
|
| 66 |
+
cdef bint _close_source_after_read
|
| 67 |
+
|
| 68 |
+
def __init__(self, source, events=("end",), *, tag=None,
|
| 69 |
+
attribute_defaults=False, dtd_validation=False,
|
| 70 |
+
load_dtd=False, no_network=True, remove_blank_text=False,
|
| 71 |
+
compact=True, resolve_entities=True, remove_comments=False,
|
| 72 |
+
remove_pis=False, strip_cdata=True, encoding=None,
|
| 73 |
+
html=False, recover=None, huge_tree=False, collect_ids=True,
|
| 74 |
+
XMLSchema schema=None):
|
| 75 |
+
if not hasattr(source, 'read'):
|
| 76 |
+
source = _getFSPathOrObject(source)
|
| 77 |
+
self._filename = source
|
| 78 |
+
self._source = open(source, 'rb')
|
| 79 |
+
self._close_source_after_read = True
|
| 80 |
+
else:
|
| 81 |
+
self._filename = _getFilenameForFile(source)
|
| 82 |
+
self._source = source
|
| 83 |
+
self._close_source_after_read = False
|
| 84 |
+
|
| 85 |
+
if recover is None:
|
| 86 |
+
recover = html
|
| 87 |
+
|
| 88 |
+
if html:
|
| 89 |
+
# make sure we're not looking for namespaces
|
| 90 |
+
events = [event for event in events
|
| 91 |
+
if event not in ('start-ns', 'end-ns')]
|
| 92 |
+
parser = HTMLPullParser(
|
| 93 |
+
events,
|
| 94 |
+
tag=tag,
|
| 95 |
+
recover=recover,
|
| 96 |
+
base_url=self._filename,
|
| 97 |
+
encoding=encoding,
|
| 98 |
+
remove_blank_text=remove_blank_text,
|
| 99 |
+
remove_comments=remove_comments,
|
| 100 |
+
remove_pis=remove_pis,
|
| 101 |
+
no_network=no_network,
|
| 102 |
+
target=None, # TODO
|
| 103 |
+
schema=schema,
|
| 104 |
+
compact=compact)
|
| 105 |
+
else:
|
| 106 |
+
parser = XMLPullParser(
|
| 107 |
+
events,
|
| 108 |
+
tag=tag,
|
| 109 |
+
recover=recover,
|
| 110 |
+
base_url=self._filename,
|
| 111 |
+
encoding=encoding,
|
| 112 |
+
attribute_defaults=attribute_defaults,
|
| 113 |
+
dtd_validation=dtd_validation,
|
| 114 |
+
load_dtd=load_dtd,
|
| 115 |
+
no_network=no_network,
|
| 116 |
+
schema=schema,
|
| 117 |
+
huge_tree=huge_tree,
|
| 118 |
+
remove_blank_text=remove_blank_text,
|
| 119 |
+
resolve_entities=resolve_entities,
|
| 120 |
+
remove_comments=remove_comments,
|
| 121 |
+
remove_pis=remove_pis,
|
| 122 |
+
strip_cdata=strip_cdata,
|
| 123 |
+
collect_ids=True,
|
| 124 |
+
target=None, # TODO
|
| 125 |
+
compact=compact)
|
| 126 |
+
|
| 127 |
+
self._events = parser.read_events()
|
| 128 |
+
self._parser = parser
|
| 129 |
+
|
| 130 |
+
@property
|
| 131 |
+
def error_log(self):
|
| 132 |
+
"""The error log of the last (or current) parser run.
|
| 133 |
+
"""
|
| 134 |
+
return self._parser.feed_error_log
|
| 135 |
+
|
| 136 |
+
@property
|
| 137 |
+
def resolvers(self):
|
| 138 |
+
"""The custom resolver registry of the last (or current) parser run.
|
| 139 |
+
"""
|
| 140 |
+
return self._parser.resolvers
|
| 141 |
+
|
| 142 |
+
@property
|
| 143 |
+
def version(self):
|
| 144 |
+
"""The version of the underlying XML parser."""
|
| 145 |
+
return self._parser.version
|
| 146 |
+
|
| 147 |
+
def set_element_class_lookup(self, ElementClassLookup lookup = None):
|
| 148 |
+
"""set_element_class_lookup(self, lookup = None)
|
| 149 |
+
|
| 150 |
+
Set a lookup scheme for element classes generated from this parser.
|
| 151 |
+
|
| 152 |
+
Reset it by passing None or nothing.
|
| 153 |
+
"""
|
| 154 |
+
self._parser.set_element_class_lookup(lookup)
|
| 155 |
+
|
| 156 |
+
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
|
| 157 |
+
"""makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
|
| 158 |
+
|
| 159 |
+
Creates a new element associated with this parser.
|
| 160 |
+
"""
|
| 161 |
+
self._parser.makeelement(
|
| 162 |
+
_tag, attrib=None, nsmap=None, **_extra)
|
| 163 |
+
|
| 164 |
+
@cython.final
|
| 165 |
+
cdef _close_source(self):
|
| 166 |
+
if self._source is None:
|
| 167 |
+
return
|
| 168 |
+
if not self._close_source_after_read:
|
| 169 |
+
self._source = None
|
| 170 |
+
return
|
| 171 |
+
try:
|
| 172 |
+
close = self._source.close
|
| 173 |
+
except AttributeError:
|
| 174 |
+
close = None
|
| 175 |
+
finally:
|
| 176 |
+
self._source = None
|
| 177 |
+
if close is not None:
|
| 178 |
+
close()
|
| 179 |
+
|
| 180 |
+
def __iter__(self):
|
| 181 |
+
return self
|
| 182 |
+
|
| 183 |
+
def __next__(self):
|
| 184 |
+
try:
|
| 185 |
+
return next(self._events)
|
| 186 |
+
except StopIteration:
|
| 187 |
+
pass
|
| 188 |
+
context = <_SaxParserContext>self._parser._getPushParserContext()
|
| 189 |
+
if self._source is not None:
|
| 190 |
+
done = False
|
| 191 |
+
while not done:
|
| 192 |
+
try:
|
| 193 |
+
done = self._read_more_events(context)
|
| 194 |
+
return next(self._events)
|
| 195 |
+
except StopIteration:
|
| 196 |
+
pass # no events yet
|
| 197 |
+
except Exception as e:
|
| 198 |
+
self._error = e
|
| 199 |
+
self._close_source()
|
| 200 |
+
try:
|
| 201 |
+
return next(self._events)
|
| 202 |
+
except StopIteration:
|
| 203 |
+
break
|
| 204 |
+
# nothing left to read or return
|
| 205 |
+
if self._error is not None:
|
| 206 |
+
error = self._error
|
| 207 |
+
self._error = None
|
| 208 |
+
raise error
|
| 209 |
+
if (context._validator is not None
|
| 210 |
+
and not context._validator.isvalid()):
|
| 211 |
+
_raiseParseError(context._c_ctxt, self._filename,
|
| 212 |
+
context._error_log)
|
| 213 |
+
# no errors => all done
|
| 214 |
+
raise StopIteration
|
| 215 |
+
|
| 216 |
+
@cython.final
|
| 217 |
+
cdef bint _read_more_events(self, _SaxParserContext context) except -123:
|
| 218 |
+
data = self._source.read(__ITERPARSE_CHUNK_SIZE)
|
| 219 |
+
if not isinstance(data, bytes):
|
| 220 |
+
self._close_source()
|
| 221 |
+
raise TypeError("reading file objects must return bytes objects")
|
| 222 |
+
if not data:
|
| 223 |
+
try:
|
| 224 |
+
self.root = self._parser.close()
|
| 225 |
+
finally:
|
| 226 |
+
self._close_source()
|
| 227 |
+
return True
|
| 228 |
+
self._parser.feed(data)
|
| 229 |
+
return False
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
cdef enum _IterwalkSkipStates:
|
| 233 |
+
IWSKIP_NEXT_IS_START
|
| 234 |
+
IWSKIP_SKIP_NEXT
|
| 235 |
+
IWSKIP_CAN_SKIP
|
| 236 |
+
IWSKIP_CANNOT_SKIP
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
cdef class iterwalk:
|
| 240 |
+
"""iterwalk(self, element_or_tree, events=("end",), tag=None)
|
| 241 |
+
|
| 242 |
+
A tree walker that generates events from an existing tree as if it
|
| 243 |
+
was parsing XML data with ``iterparse()``.
|
| 244 |
+
|
| 245 |
+
Just as for ``iterparse()``, the ``tag`` argument can be a single tag or a
|
| 246 |
+
sequence of tags.
|
| 247 |
+
|
| 248 |
+
After receiving a 'start' or 'start-ns' event, the children and
|
| 249 |
+
descendants of the current element can be excluded from iteration
|
| 250 |
+
by calling the ``skip_subtree()`` method.
|
| 251 |
+
"""
|
| 252 |
+
cdef _MultiTagMatcher _matcher
|
| 253 |
+
cdef list _node_stack
|
| 254 |
+
cdef list _events
|
| 255 |
+
cdef object _pop_event
|
| 256 |
+
cdef object _include_siblings
|
| 257 |
+
cdef int _index
|
| 258 |
+
cdef int _event_filter
|
| 259 |
+
cdef _IterwalkSkipStates _skip_state
|
| 260 |
+
|
| 261 |
+
def __init__(self, element_or_tree, events=("end",), tag=None):
|
| 262 |
+
cdef _Element root
|
| 263 |
+
cdef int ns_count
|
| 264 |
+
root = _rootNodeOrRaise(element_or_tree)
|
| 265 |
+
self._event_filter = _buildParseEventFilter(events)
|
| 266 |
+
if tag is None or tag == '*':
|
| 267 |
+
self._matcher = None
|
| 268 |
+
else:
|
| 269 |
+
self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
|
| 270 |
+
self._node_stack = []
|
| 271 |
+
self._events = []
|
| 272 |
+
self._pop_event = self._events.pop
|
| 273 |
+
self._skip_state = IWSKIP_CANNOT_SKIP # ignore all skip requests by default
|
| 274 |
+
|
| 275 |
+
if self._event_filter:
|
| 276 |
+
self._index = 0
|
| 277 |
+
if self._matcher is not None and self._event_filter & PARSE_EVENT_FILTER_START:
|
| 278 |
+
self._matcher.cacheTags(root._doc)
|
| 279 |
+
|
| 280 |
+
# When processing an ElementTree, add events for the preceding comments/PIs.
|
| 281 |
+
if self._event_filter & (PARSE_EVENT_FILTER_COMMENT | PARSE_EVENT_FILTER_PI):
|
| 282 |
+
if isinstance(element_or_tree, _ElementTree):
|
| 283 |
+
self._include_siblings = root
|
| 284 |
+
for elem in list(root.itersiblings(preceding=True))[::-1]:
|
| 285 |
+
if self._event_filter & PARSE_EVENT_FILTER_COMMENT and elem.tag is Comment:
|
| 286 |
+
self._events.append(('comment', elem))
|
| 287 |
+
elif self._event_filter & PARSE_EVENT_FILTER_PI and elem.tag is PI:
|
| 288 |
+
self._events.append(('pi', elem))
|
| 289 |
+
|
| 290 |
+
ns_count = self._start_node(root)
|
| 291 |
+
self._node_stack.append( (root, ns_count) )
|
| 292 |
+
else:
|
| 293 |
+
self._index = -1
|
| 294 |
+
|
| 295 |
+
def __iter__(self):
|
| 296 |
+
return self
|
| 297 |
+
|
| 298 |
+
def __next__(self):
|
| 299 |
+
cdef xmlNode* c_child
|
| 300 |
+
cdef _Element node
|
| 301 |
+
cdef _Element next_node
|
| 302 |
+
cdef int ns_count = 0
|
| 303 |
+
if self._events:
|
| 304 |
+
return self._next_event()
|
| 305 |
+
if self._matcher is not None and self._index >= 0:
|
| 306 |
+
node = self._node_stack[self._index][0]
|
| 307 |
+
self._matcher.cacheTags(node._doc)
|
| 308 |
+
|
| 309 |
+
# find next node
|
| 310 |
+
while self._index >= 0:
|
| 311 |
+
node = self._node_stack[self._index][0]
|
| 312 |
+
|
| 313 |
+
if self._skip_state == IWSKIP_SKIP_NEXT:
|
| 314 |
+
c_child = NULL
|
| 315 |
+
else:
|
| 316 |
+
c_child = self._process_non_elements(
|
| 317 |
+
node._doc, _findChildForwards(node._c_node, 0))
|
| 318 |
+
self._skip_state = IWSKIP_CANNOT_SKIP
|
| 319 |
+
|
| 320 |
+
while c_child is NULL:
|
| 321 |
+
# back off through parents
|
| 322 |
+
self._index -= 1
|
| 323 |
+
node = self._end_node()
|
| 324 |
+
if self._index < 0:
|
| 325 |
+
break
|
| 326 |
+
c_child = self._process_non_elements(
|
| 327 |
+
node._doc, _nextElement(node._c_node))
|
| 328 |
+
|
| 329 |
+
if c_child is not NULL:
|
| 330 |
+
next_node = _elementFactory(node._doc, c_child)
|
| 331 |
+
if self._event_filter & (PARSE_EVENT_FILTER_START |
|
| 332 |
+
PARSE_EVENT_FILTER_START_NS):
|
| 333 |
+
ns_count = self._start_node(next_node)
|
| 334 |
+
elif self._event_filter & PARSE_EVENT_FILTER_END_NS:
|
| 335 |
+
ns_count = _countNsDefs(next_node._c_node)
|
| 336 |
+
self._node_stack.append( (next_node, ns_count) )
|
| 337 |
+
self._index += 1
|
| 338 |
+
if self._events:
|
| 339 |
+
return self._next_event()
|
| 340 |
+
|
| 341 |
+
if self._include_siblings is not None:
|
| 342 |
+
node, self._include_siblings = self._include_siblings, None
|
| 343 |
+
self._process_non_elements(node._doc, _nextElement(node._c_node))
|
| 344 |
+
if self._events:
|
| 345 |
+
return self._next_event()
|
| 346 |
+
|
| 347 |
+
raise StopIteration
|
| 348 |
+
|
| 349 |
+
@cython.final
|
| 350 |
+
cdef xmlNode* _process_non_elements(self, _Document doc, xmlNode* c_node):
|
| 351 |
+
while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE:
|
| 352 |
+
if c_node.type == tree.XML_COMMENT_NODE:
|
| 353 |
+
if self._event_filter & PARSE_EVENT_FILTER_COMMENT:
|
| 354 |
+
self._events.append(
|
| 355 |
+
("comment", _elementFactory(doc, c_node)))
|
| 356 |
+
c_node = _nextElement(c_node)
|
| 357 |
+
elif c_node.type == tree.XML_PI_NODE:
|
| 358 |
+
if self._event_filter & PARSE_EVENT_FILTER_PI:
|
| 359 |
+
self._events.append(
|
| 360 |
+
("pi", _elementFactory(doc, c_node)))
|
| 361 |
+
c_node = _nextElement(c_node)
|
| 362 |
+
else:
|
| 363 |
+
break
|
| 364 |
+
return c_node
|
| 365 |
+
|
| 366 |
+
@cython.final
|
| 367 |
+
cdef _next_event(self):
|
| 368 |
+
if self._skip_state == IWSKIP_NEXT_IS_START:
|
| 369 |
+
if self._events[0][0] in ('start', 'start-ns'):
|
| 370 |
+
self._skip_state = IWSKIP_CAN_SKIP
|
| 371 |
+
return self._pop_event(0)
|
| 372 |
+
|
| 373 |
+
def skip_subtree(self):
|
| 374 |
+
"""Prevent descending into the current subtree.
|
| 375 |
+
Instead, the next returned event will be the 'end' event of the current element
|
| 376 |
+
(if included), ignoring any children or descendants.
|
| 377 |
+
|
| 378 |
+
This has no effect right after an 'end' or 'end-ns' event.
|
| 379 |
+
"""
|
| 380 |
+
if self._skip_state == IWSKIP_CAN_SKIP:
|
| 381 |
+
self._skip_state = IWSKIP_SKIP_NEXT
|
| 382 |
+
|
| 383 |
+
@cython.final
|
| 384 |
+
cdef int _start_node(self, _Element node) except -1:
|
| 385 |
+
cdef int ns_count
|
| 386 |
+
if self._event_filter & PARSE_EVENT_FILTER_START_NS:
|
| 387 |
+
ns_count = _appendStartNsEvents(node._c_node, self._events)
|
| 388 |
+
if self._events:
|
| 389 |
+
self._skip_state = IWSKIP_NEXT_IS_START
|
| 390 |
+
elif self._event_filter & PARSE_EVENT_FILTER_END_NS:
|
| 391 |
+
ns_count = _countNsDefs(node._c_node)
|
| 392 |
+
else:
|
| 393 |
+
ns_count = 0
|
| 394 |
+
if self._event_filter & PARSE_EVENT_FILTER_START:
|
| 395 |
+
if self._matcher is None or self._matcher.matches(node._c_node):
|
| 396 |
+
self._events.append( ("start", node) )
|
| 397 |
+
self._skip_state = IWSKIP_NEXT_IS_START
|
| 398 |
+
return ns_count
|
| 399 |
+
|
| 400 |
+
@cython.final
|
| 401 |
+
cdef _Element _end_node(self):
|
| 402 |
+
cdef _Element node
|
| 403 |
+
cdef int i, ns_count
|
| 404 |
+
node, ns_count = self._node_stack.pop()
|
| 405 |
+
if self._event_filter & PARSE_EVENT_FILTER_END:
|
| 406 |
+
if self._matcher is None or self._matcher.matches(node._c_node):
|
| 407 |
+
self._events.append( ("end", node) )
|
| 408 |
+
if self._event_filter & PARSE_EVENT_FILTER_END_NS and ns_count:
|
| 409 |
+
event = ("end-ns", None)
|
| 410 |
+
for i in range(ns_count):
|
| 411 |
+
self._events.append(event)
|
| 412 |
+
return node
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
cdef int _countNsDefs(xmlNode* c_node) noexcept:
|
| 416 |
+
cdef xmlNs* c_ns
|
| 417 |
+
cdef int count
|
| 418 |
+
count = 0
|
| 419 |
+
c_ns = c_node.nsDef
|
| 420 |
+
while c_ns is not NULL:
|
| 421 |
+
count += (c_ns.href is not NULL)
|
| 422 |
+
c_ns = c_ns.next
|
| 423 |
+
return count
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
|
| 427 |
+
cdef xmlNs* c_ns
|
| 428 |
+
cdef int count
|
| 429 |
+
count = 0
|
| 430 |
+
c_ns = c_node.nsDef
|
| 431 |
+
while c_ns is not NULL:
|
| 432 |
+
if c_ns.href:
|
| 433 |
+
ns_tuple = (funicodeOrEmpty(c_ns.prefix),
|
| 434 |
+
funicode(c_ns.href))
|
| 435 |
+
event_list.append( ("start-ns", ns_tuple) )
|
| 436 |
+
count += 1
|
| 437 |
+
c_ns = c_ns.next
|
| 438 |
+
return count
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/lxml.etree_api.h
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Generated by Cython 3.1.4 */
|
| 2 |
+
|
| 3 |
+
#ifndef __PYX_HAVE_API__lxml__etree
|
| 4 |
+
#define __PYX_HAVE_API__lxml__etree
|
| 5 |
+
#ifdef __MINGW64__
|
| 6 |
+
#define MS_WIN64
|
| 7 |
+
#endif
|
| 8 |
+
#include "Python.h"
|
| 9 |
+
#include "lxml.etree.h"
|
| 10 |
+
|
| 11 |
+
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument)(struct LxmlDocument *, xmlNode *) = 0;
|
| 12 |
+
#define deepcopyNodeToDocument __pyx_api_f_4lxml_5etree_deepcopyNodeToDocument
|
| 13 |
+
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_elementTreeFactory)(struct LxmlElement *) = 0;
|
| 14 |
+
#define elementTreeFactory __pyx_api_f_4lxml_5etree_elementTreeFactory
|
| 15 |
+
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_newElementTree)(struct LxmlElement *, PyObject *) = 0;
|
| 16 |
+
#define newElementTree __pyx_api_f_4lxml_5etree_newElementTree
|
| 17 |
+
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_adoptExternalDocument)(xmlDoc *, PyObject *, int) = 0;
|
| 18 |
+
#define adoptExternalDocument __pyx_api_f_4lxml_5etree_adoptExternalDocument
|
| 19 |
+
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_elementFactory)(struct LxmlDocument *, xmlNode *) = 0;
|
| 20 |
+
#define elementFactory __pyx_api_f_4lxml_5etree_elementFactory
|
| 21 |
+
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeElement)(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
|
| 22 |
+
#define makeElement __pyx_api_f_4lxml_5etree_makeElement
|
| 23 |
+
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeSubElement)(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
|
| 24 |
+
#define makeSubElement __pyx_api_f_4lxml_5etree_makeSubElement
|
| 25 |
+
static void (*__pyx_api_f_4lxml_5etree_setElementClassLookupFunction)(_element_class_lookup_function, PyObject *) = 0;
|
| 26 |
+
#define setElementClassLookupFunction __pyx_api_f_4lxml_5etree_setElementClassLookupFunction
|
| 27 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_lookupDefaultElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
|
| 28 |
+
#define lookupDefaultElementClass __pyx_api_f_4lxml_5etree_lookupDefaultElementClass
|
| 29 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
|
| 30 |
+
#define lookupNamespaceElementClass __pyx_api_f_4lxml_5etree_lookupNamespaceElementClass
|
| 31 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_callLookupFallback)(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *) = 0;
|
| 32 |
+
#define callLookupFallback __pyx_api_f_4lxml_5etree_callLookupFallback
|
| 33 |
+
static int (*__pyx_api_f_4lxml_5etree_tagMatches)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
| 34 |
+
#define tagMatches __pyx_api_f_4lxml_5etree_tagMatches
|
| 35 |
+
static struct LxmlDocument *(*__pyx_api_f_4lxml_5etree_documentOrRaise)(PyObject *) = 0;
|
| 36 |
+
#define documentOrRaise __pyx_api_f_4lxml_5etree_documentOrRaise
|
| 37 |
+
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_rootNodeOrRaise)(PyObject *) = 0;
|
| 38 |
+
#define rootNodeOrRaise __pyx_api_f_4lxml_5etree_rootNodeOrRaise
|
| 39 |
+
static int (*__pyx_api_f_4lxml_5etree_hasText)(xmlNode *) = 0;
|
| 40 |
+
#define hasText __pyx_api_f_4lxml_5etree_hasText
|
| 41 |
+
static int (*__pyx_api_f_4lxml_5etree_hasTail)(xmlNode *) = 0;
|
| 42 |
+
#define hasTail __pyx_api_f_4lxml_5etree_hasTail
|
| 43 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_textOf)(xmlNode *) = 0;
|
| 44 |
+
#define textOf __pyx_api_f_4lxml_5etree_textOf
|
| 45 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_tailOf)(xmlNode *) = 0;
|
| 46 |
+
#define tailOf __pyx_api_f_4lxml_5etree_tailOf
|
| 47 |
+
static int (*__pyx_api_f_4lxml_5etree_setNodeText)(xmlNode *, PyObject *) = 0;
|
| 48 |
+
#define setNodeText __pyx_api_f_4lxml_5etree_setNodeText
|
| 49 |
+
static int (*__pyx_api_f_4lxml_5etree_setTailText)(xmlNode *, PyObject *) = 0;
|
| 50 |
+
#define setTailText __pyx_api_f_4lxml_5etree_setTailText
|
| 51 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValue)(xmlNode *, xmlAttr *) = 0;
|
| 52 |
+
#define attributeValue __pyx_api_f_4lxml_5etree_attributeValue
|
| 53 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValueFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
| 54 |
+
#define attributeValueFromNsName __pyx_api_f_4lxml_5etree_attributeValueFromNsName
|
| 55 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_getAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
|
| 56 |
+
#define getAttributeValue __pyx_api_f_4lxml_5etree_getAttributeValue
|
| 57 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_iterattributes)(struct LxmlElement *, int) = 0;
|
| 58 |
+
#define iterattributes __pyx_api_f_4lxml_5etree_iterattributes
|
| 59 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_collectAttributes)(xmlNode *, int) = 0;
|
| 60 |
+
#define collectAttributes __pyx_api_f_4lxml_5etree_collectAttributes
|
| 61 |
+
static int (*__pyx_api_f_4lxml_5etree_setAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
|
| 62 |
+
#define setAttributeValue __pyx_api_f_4lxml_5etree_setAttributeValue
|
| 63 |
+
static int (*__pyx_api_f_4lxml_5etree_delAttribute)(struct LxmlElement *, PyObject *) = 0;
|
| 64 |
+
#define delAttribute __pyx_api_f_4lxml_5etree_delAttribute
|
| 65 |
+
static int (*__pyx_api_f_4lxml_5etree_delAttributeFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
| 66 |
+
#define delAttributeFromNsName __pyx_api_f_4lxml_5etree_delAttributeFromNsName
|
| 67 |
+
static int (*__pyx_api_f_4lxml_5etree_hasChild)(xmlNode *) = 0;
|
| 68 |
+
#define hasChild __pyx_api_f_4lxml_5etree_hasChild
|
| 69 |
+
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChild)(xmlNode *, Py_ssize_t) = 0;
|
| 70 |
+
#define findChild __pyx_api_f_4lxml_5etree_findChild
|
| 71 |
+
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildForwards)(xmlNode *, Py_ssize_t) = 0;
|
| 72 |
+
#define findChildForwards __pyx_api_f_4lxml_5etree_findChildForwards
|
| 73 |
+
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildBackwards)(xmlNode *, Py_ssize_t) = 0;
|
| 74 |
+
#define findChildBackwards __pyx_api_f_4lxml_5etree_findChildBackwards
|
| 75 |
+
static xmlNode *(*__pyx_api_f_4lxml_5etree_nextElement)(xmlNode *) = 0;
|
| 76 |
+
#define nextElement __pyx_api_f_4lxml_5etree_nextElement
|
| 77 |
+
static xmlNode *(*__pyx_api_f_4lxml_5etree_previousElement)(xmlNode *) = 0;
|
| 78 |
+
#define previousElement __pyx_api_f_4lxml_5etree_previousElement
|
| 79 |
+
static void (*__pyx_api_f_4lxml_5etree_appendChild)(struct LxmlElement *, struct LxmlElement *) = 0;
|
| 80 |
+
#define appendChild __pyx_api_f_4lxml_5etree_appendChild
|
| 81 |
+
static int (*__pyx_api_f_4lxml_5etree_appendChildToElement)(struct LxmlElement *, struct LxmlElement *) = 0;
|
| 82 |
+
#define appendChildToElement __pyx_api_f_4lxml_5etree_appendChildToElement
|
| 83 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_pyunicode)(const xmlChar *) = 0;
|
| 84 |
+
#define pyunicode __pyx_api_f_4lxml_5etree_pyunicode
|
| 85 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_utf8)(PyObject *) = 0;
|
| 86 |
+
#define utf8 __pyx_api_f_4lxml_5etree_utf8
|
| 87 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTag)(PyObject *) = 0;
|
| 88 |
+
#define getNsTag __pyx_api_f_4lxml_5etree_getNsTag
|
| 89 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs)(PyObject *) = 0;
|
| 90 |
+
#define getNsTagWithEmptyNs __pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs
|
| 91 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedName)(xmlNode *) = 0;
|
| 92 |
+
#define namespacedName __pyx_api_f_4lxml_5etree_namespacedName
|
| 93 |
+
static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedNameFromNsName)(const xmlChar *, const xmlChar *) = 0;
|
| 94 |
+
#define namespacedNameFromNsName __pyx_api_f_4lxml_5etree_namespacedNameFromNsName
|
| 95 |
+
static void (*__pyx_api_f_4lxml_5etree_iteratorStoreNext)(struct LxmlElementIterator *, struct LxmlElement *) = 0;
|
| 96 |
+
#define iteratorStoreNext __pyx_api_f_4lxml_5etree_iteratorStoreNext
|
| 97 |
+
static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatcher *, PyObject *) = 0;
|
| 98 |
+
#define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch
|
| 99 |
+
static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
| 100 |
+
#define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix
|
| 101 |
+
static int __Pyx_ImportFunction_3_1_4(PyObject *module, const char *funcname, void (**f)(void), const char *sig);
|
| 102 |
+
|
| 103 |
+
#ifndef __PYX_HAVE_RT_ImportFunction_3_1_4
|
| 104 |
+
#define __PYX_HAVE_RT_ImportFunction_3_1_4
|
| 105 |
+
static int __Pyx_ImportFunction_3_1_4(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
|
| 106 |
+
PyObject *d = 0;
|
| 107 |
+
PyObject *cobj = 0;
|
| 108 |
+
union {
|
| 109 |
+
void (*fp)(void);
|
| 110 |
+
void *p;
|
| 111 |
+
} tmp;
|
| 112 |
+
d = PyObject_GetAttrString(module, "__pyx_capi__");
|
| 113 |
+
if (!d)
|
| 114 |
+
goto bad;
|
| 115 |
+
#if (defined(Py_LIMITED_API) && Py_LIMITED_API >= 0x030d0000) || (!defined(Py_LIMITED_API) && PY_VERSION_HEX >= 0x030d0000)
|
| 116 |
+
PyDict_GetItemStringRef(d, funcname, &cobj);
|
| 117 |
+
#else
|
| 118 |
+
cobj = PyDict_GetItemString(d, funcname);
|
| 119 |
+
Py_XINCREF(cobj);
|
| 120 |
+
#endif
|
| 121 |
+
if (!cobj) {
|
| 122 |
+
PyErr_Format(PyExc_ImportError,
|
| 123 |
+
"%.200s does not export expected C function %.200s",
|
| 124 |
+
PyModule_GetName(module), funcname);
|
| 125 |
+
goto bad;
|
| 126 |
+
}
|
| 127 |
+
if (!PyCapsule_IsValid(cobj, sig)) {
|
| 128 |
+
PyErr_Format(PyExc_TypeError,
|
| 129 |
+
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
|
| 130 |
+
PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
|
| 131 |
+
goto bad;
|
| 132 |
+
}
|
| 133 |
+
tmp.p = PyCapsule_GetPointer(cobj, sig);
|
| 134 |
+
*f = tmp.fp;
|
| 135 |
+
if (!(*f))
|
| 136 |
+
goto bad;
|
| 137 |
+
Py_DECREF(d);
|
| 138 |
+
Py_DECREF(cobj);
|
| 139 |
+
return 0;
|
| 140 |
+
bad:
|
| 141 |
+
Py_XDECREF(d);
|
| 142 |
+
Py_XDECREF(cobj);
|
| 143 |
+
return -1;
|
| 144 |
+
}
|
| 145 |
+
#endif
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
static int import_lxml__etree(void) {
|
| 149 |
+
PyObject *module = 0;
|
| 150 |
+
module = PyImport_ImportModule("lxml.etree");
|
| 151 |
+
if (!module) goto bad;
|
| 152 |
+
if (__Pyx_ImportFunction_3_1_4(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
| 153 |
+
if (__Pyx_ImportFunction_3_1_4(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad;
|
| 154 |
+
if (__Pyx_ImportFunction_3_1_4(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad;
|
| 155 |
+
if (__Pyx_ImportFunction_3_1_4(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad;
|
| 156 |
+
if (__Pyx_ImportFunction_3_1_4(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
| 157 |
+
if (__Pyx_ImportFunction_3_1_4(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
| 158 |
+
if (__Pyx_ImportFunction_3_1_4(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
| 159 |
+
if (__Pyx_ImportFunction_3_1_4(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad;
|
| 160 |
+
if (__Pyx_ImportFunction_3_1_4(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
| 161 |
+
if (__Pyx_ImportFunction_3_1_4(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
| 162 |
+
if (__Pyx_ImportFunction_3_1_4(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
| 163 |
+
if (__Pyx_ImportFunction_3_1_4(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
| 164 |
+
if (__Pyx_ImportFunction_3_1_4(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad;
|
| 165 |
+
if (__Pyx_ImportFunction_3_1_4(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad;
|
| 166 |
+
if (__Pyx_ImportFunction_3_1_4(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad;
|
| 167 |
+
if (__Pyx_ImportFunction_3_1_4(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad;
|
| 168 |
+
if (__Pyx_ImportFunction_3_1_4(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
| 169 |
+
if (__Pyx_ImportFunction_3_1_4(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
| 170 |
+
if (__Pyx_ImportFunction_3_1_4(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
| 171 |
+
if (__Pyx_ImportFunction_3_1_4(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
| 172 |
+
if (__Pyx_ImportFunction_3_1_4(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad;
|
| 173 |
+
if (__Pyx_ImportFunction_3_1_4(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
| 174 |
+
if (__Pyx_ImportFunction_3_1_4(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
| 175 |
+
if (__Pyx_ImportFunction_3_1_4(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad;
|
| 176 |
+
if (__Pyx_ImportFunction_3_1_4(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad;
|
| 177 |
+
if (__Pyx_ImportFunction_3_1_4(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
| 178 |
+
if (__Pyx_ImportFunction_3_1_4(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad;
|
| 179 |
+
if (__Pyx_ImportFunction_3_1_4(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
| 180 |
+
if (__Pyx_ImportFunction_3_1_4(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad;
|
| 181 |
+
if (__Pyx_ImportFunction_3_1_4(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
| 182 |
+
if (__Pyx_ImportFunction_3_1_4(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
| 183 |
+
if (__Pyx_ImportFunction_3_1_4(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
| 184 |
+
if (__Pyx_ImportFunction_3_1_4(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
| 185 |
+
if (__Pyx_ImportFunction_3_1_4(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
| 186 |
+
if (__Pyx_ImportFunction_3_1_4(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
| 187 |
+
if (__Pyx_ImportFunction_3_1_4(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
| 188 |
+
if (__Pyx_ImportFunction_3_1_4(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad;
|
| 189 |
+
if (__Pyx_ImportFunction_3_1_4(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad;
|
| 190 |
+
if (__Pyx_ImportFunction_3_1_4(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad;
|
| 191 |
+
if (__Pyx_ImportFunction_3_1_4(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad;
|
| 192 |
+
if (__Pyx_ImportFunction_3_1_4(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad;
|
| 193 |
+
if (__Pyx_ImportFunction_3_1_4(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad;
|
| 194 |
+
if (__Pyx_ImportFunction_3_1_4(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad;
|
| 195 |
+
if (__Pyx_ImportFunction_3_1_4(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad;
|
| 196 |
+
if (__Pyx_ImportFunction_3_1_4(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
| 197 |
+
Py_DECREF(module); module = 0;
|
| 198 |
+
return 0;
|
| 199 |
+
bad:
|
| 200 |
+
Py_XDECREF(module);
|
| 201 |
+
return -1;
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
#endif /* !__PYX_HAVE_API__lxml__etree */
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/nsclasses.pxi
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# module-level API for namespace implementations
|
| 2 |
+
|
| 3 |
+
cdef class LxmlRegistryError(LxmlError):
|
| 4 |
+
"""Base class of lxml registry errors.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
cdef class NamespaceRegistryError(LxmlRegistryError):
|
| 8 |
+
"""Error registering a namespace extension.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@cython.internal
|
| 13 |
+
cdef class _NamespaceRegistry:
|
| 14 |
+
"Dictionary-like namespace registry"
|
| 15 |
+
cdef object _ns_uri
|
| 16 |
+
cdef bytes _ns_uri_utf
|
| 17 |
+
cdef dict _entries
|
| 18 |
+
cdef char* _c_ns_uri_utf
|
| 19 |
+
def __cinit__(self, ns_uri):
|
| 20 |
+
self._ns_uri = ns_uri
|
| 21 |
+
if ns_uri is None:
|
| 22 |
+
self._ns_uri_utf = None
|
| 23 |
+
self._c_ns_uri_utf = NULL
|
| 24 |
+
else:
|
| 25 |
+
self._ns_uri_utf = _utf8(ns_uri)
|
| 26 |
+
self._c_ns_uri_utf = _cstr(self._ns_uri_utf)
|
| 27 |
+
self._entries = {}
|
| 28 |
+
|
| 29 |
+
def update(self, class_dict_iterable):
|
| 30 |
+
"""update(self, class_dict_iterable)
|
| 31 |
+
|
| 32 |
+
Forgivingly update the registry.
|
| 33 |
+
|
| 34 |
+
``class_dict_iterable`` may be a dict or some other iterable
|
| 35 |
+
that yields (name, value) pairs.
|
| 36 |
+
|
| 37 |
+
If a value does not match the required type for this registry,
|
| 38 |
+
or if the name starts with '_', it will be silently discarded.
|
| 39 |
+
This allows registrations at the module or class level using
|
| 40 |
+
vars(), globals() etc."""
|
| 41 |
+
if hasattr(class_dict_iterable, 'items'):
|
| 42 |
+
class_dict_iterable = class_dict_iterable.items()
|
| 43 |
+
for name, item in class_dict_iterable:
|
| 44 |
+
if (name is None or name[:1] != '_') and callable(item):
|
| 45 |
+
self[name] = item
|
| 46 |
+
|
| 47 |
+
def __getitem__(self, name):
|
| 48 |
+
if name is not None:
|
| 49 |
+
name = _utf8(name)
|
| 50 |
+
return self._get(name)
|
| 51 |
+
|
| 52 |
+
def __delitem__(self, name):
|
| 53 |
+
if name is not None:
|
| 54 |
+
name = _utf8(name)
|
| 55 |
+
del self._entries[name]
|
| 56 |
+
|
| 57 |
+
cdef object _get(self, object name):
|
| 58 |
+
cdef python.PyObject* dict_result
|
| 59 |
+
dict_result = python.PyDict_GetItem(self._entries, name)
|
| 60 |
+
if dict_result is NULL:
|
| 61 |
+
raise KeyError, "Name not registered."
|
| 62 |
+
return <object>dict_result
|
| 63 |
+
|
| 64 |
+
cdef object _getForString(self, char* name):
|
| 65 |
+
cdef python.PyObject* dict_result
|
| 66 |
+
dict_result = python.PyDict_GetItem(self._entries, name)
|
| 67 |
+
if dict_result is NULL:
|
| 68 |
+
raise KeyError, "Name not registered."
|
| 69 |
+
return <object>dict_result
|
| 70 |
+
|
| 71 |
+
def __iter__(self):
|
| 72 |
+
return iter(self._entries)
|
| 73 |
+
|
| 74 |
+
def items(self):
|
| 75 |
+
return list(self._entries.items())
|
| 76 |
+
|
| 77 |
+
def iteritems(self):
|
| 78 |
+
return iter(self._entries.items())
|
| 79 |
+
|
| 80 |
+
def clear(self):
|
| 81 |
+
self._entries.clear()
|
| 82 |
+
|
| 83 |
+
def __call__(self, obj):
|
| 84 |
+
# Usage as decorator:
|
| 85 |
+
# ns = lookup.get_namespace("...")
|
| 86 |
+
# @ns('abc')
|
| 87 |
+
# class element(ElementBase): pass
|
| 88 |
+
#
|
| 89 |
+
# @ns
|
| 90 |
+
# class elementname(ElementBase): pass
|
| 91 |
+
|
| 92 |
+
if obj is None or python._isString(obj):
|
| 93 |
+
# @ns(None) or @ns('tag')
|
| 94 |
+
return partial(self.__deco, obj)
|
| 95 |
+
# plain @ns decorator
|
| 96 |
+
self[obj.__name__] = obj
|
| 97 |
+
return obj
|
| 98 |
+
|
| 99 |
+
def __deco(self, name, obj):
|
| 100 |
+
self[name] = obj
|
| 101 |
+
return obj
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
@cython.final
|
| 105 |
+
@cython.internal
|
| 106 |
+
cdef class _ClassNamespaceRegistry(_NamespaceRegistry):
|
| 107 |
+
"Dictionary-like registry for namespace implementation classes"
|
| 108 |
+
def __setitem__(self, name, item):
|
| 109 |
+
if not isinstance(item, type) or not issubclass(item, ElementBase):
|
| 110 |
+
raise NamespaceRegistryError, \
|
| 111 |
+
"Registered element classes must be subtypes of ElementBase"
|
| 112 |
+
if name is not None:
|
| 113 |
+
name = _utf8(name)
|
| 114 |
+
self._entries[name] = item
|
| 115 |
+
|
| 116 |
+
def __repr__(self):
|
| 117 |
+
return "Namespace(%r)" % self._ns_uri
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
cdef class ElementNamespaceClassLookup(FallbackElementClassLookup):
|
| 121 |
+
"""ElementNamespaceClassLookup(self, fallback=None)
|
| 122 |
+
|
| 123 |
+
Element class lookup scheme that searches the Element class in the
|
| 124 |
+
Namespace registry.
|
| 125 |
+
|
| 126 |
+
Usage:
|
| 127 |
+
|
| 128 |
+
>>> lookup = ElementNamespaceClassLookup()
|
| 129 |
+
>>> ns_elements = lookup.get_namespace("http://schema.org/Movie")
|
| 130 |
+
|
| 131 |
+
>>> @ns_elements
|
| 132 |
+
... class movie(ElementBase):
|
| 133 |
+
... "Element implementation for 'movie' tag (using class name) in schema namespace."
|
| 134 |
+
|
| 135 |
+
>>> @ns_elements("movie")
|
| 136 |
+
... class MovieElement(ElementBase):
|
| 137 |
+
... "Element implementation for 'movie' tag (explicit tag name) in schema namespace."
|
| 138 |
+
"""
|
| 139 |
+
cdef dict _namespace_registries
|
| 140 |
+
def __cinit__(self):
|
| 141 |
+
self._namespace_registries = {}
|
| 142 |
+
|
| 143 |
+
def __init__(self, ElementClassLookup fallback=None):
|
| 144 |
+
FallbackElementClassLookup.__init__(self, fallback)
|
| 145 |
+
self._lookup_function = _find_nselement_class
|
| 146 |
+
|
| 147 |
+
def get_namespace(self, ns_uri):
|
| 148 |
+
"""get_namespace(self, ns_uri)
|
| 149 |
+
|
| 150 |
+
Retrieve the namespace object associated with the given URI.
|
| 151 |
+
Pass None for the empty namespace.
|
| 152 |
+
|
| 153 |
+
Creates a new namespace object if it does not yet exist."""
|
| 154 |
+
if ns_uri:
|
| 155 |
+
ns_utf = _utf8(ns_uri)
|
| 156 |
+
else:
|
| 157 |
+
ns_utf = None
|
| 158 |
+
try:
|
| 159 |
+
return self._namespace_registries[ns_utf]
|
| 160 |
+
except KeyError:
|
| 161 |
+
registry = self._namespace_registries[ns_utf] = \
|
| 162 |
+
_ClassNamespaceRegistry(ns_uri)
|
| 163 |
+
return registry
|
| 164 |
+
|
| 165 |
+
cdef object _find_nselement_class(state, _Document doc, xmlNode* c_node):
|
| 166 |
+
cdef python.PyObject* dict_result
|
| 167 |
+
cdef ElementNamespaceClassLookup lookup
|
| 168 |
+
cdef _NamespaceRegistry registry
|
| 169 |
+
if state is None:
|
| 170 |
+
return _lookupDefaultElementClass(None, doc, c_node)
|
| 171 |
+
|
| 172 |
+
lookup = <ElementNamespaceClassLookup>state
|
| 173 |
+
if c_node.type != tree.XML_ELEMENT_NODE:
|
| 174 |
+
return _callLookupFallback(lookup, doc, c_node)
|
| 175 |
+
|
| 176 |
+
c_namespace_utf = _getNs(c_node)
|
| 177 |
+
if c_namespace_utf is not NULL:
|
| 178 |
+
dict_result = python.PyDict_GetItem(
|
| 179 |
+
lookup._namespace_registries, <unsigned char*>c_namespace_utf)
|
| 180 |
+
else:
|
| 181 |
+
dict_result = python.PyDict_GetItem(
|
| 182 |
+
lookup._namespace_registries, None)
|
| 183 |
+
if dict_result is not NULL:
|
| 184 |
+
registry = <_NamespaceRegistry>dict_result
|
| 185 |
+
classes = registry._entries
|
| 186 |
+
|
| 187 |
+
if c_node.name is not NULL:
|
| 188 |
+
dict_result = python.PyDict_GetItem(
|
| 189 |
+
classes, <unsigned char*>c_node.name)
|
| 190 |
+
else:
|
| 191 |
+
dict_result = NULL
|
| 192 |
+
|
| 193 |
+
if dict_result is NULL:
|
| 194 |
+
dict_result = python.PyDict_GetItem(classes, None)
|
| 195 |
+
|
| 196 |
+
if dict_result is not NULL:
|
| 197 |
+
return <object>dict_result
|
| 198 |
+
return _callLookupFallback(lookup, doc, c_node)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
################################################################################
|
| 202 |
+
# XPath extension functions
|
| 203 |
+
|
| 204 |
+
cdef dict __FUNCTION_NAMESPACE_REGISTRIES
|
| 205 |
+
__FUNCTION_NAMESPACE_REGISTRIES = {}
|
| 206 |
+
|
| 207 |
+
def FunctionNamespace(ns_uri):
|
| 208 |
+
"""FunctionNamespace(ns_uri)
|
| 209 |
+
|
| 210 |
+
Retrieve the function namespace object associated with the given
|
| 211 |
+
URI.
|
| 212 |
+
|
| 213 |
+
Creates a new one if it does not yet exist. A function namespace
|
| 214 |
+
can only be used to register extension functions.
|
| 215 |
+
|
| 216 |
+
Usage:
|
| 217 |
+
|
| 218 |
+
>>> ns_functions = FunctionNamespace("http://schema.org/Movie")
|
| 219 |
+
|
| 220 |
+
>>> @ns_functions # uses function name
|
| 221 |
+
... def add2(x):
|
| 222 |
+
... return x + 2
|
| 223 |
+
|
| 224 |
+
>>> @ns_functions("add3") # uses explicit name
|
| 225 |
+
... def add_three(x):
|
| 226 |
+
... return x + 3
|
| 227 |
+
"""
|
| 228 |
+
ns_utf = _utf8(ns_uri) if ns_uri else None
|
| 229 |
+
try:
|
| 230 |
+
return __FUNCTION_NAMESPACE_REGISTRIES[ns_utf]
|
| 231 |
+
except KeyError:
|
| 232 |
+
registry = __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] = \
|
| 233 |
+
_XPathFunctionNamespaceRegistry(ns_uri)
|
| 234 |
+
return registry
|
| 235 |
+
|
| 236 |
+
@cython.internal
|
| 237 |
+
cdef class _FunctionNamespaceRegistry(_NamespaceRegistry):
|
| 238 |
+
def __setitem__(self, name, item):
|
| 239 |
+
if not callable(item):
|
| 240 |
+
raise NamespaceRegistryError, \
|
| 241 |
+
"Registered functions must be callable."
|
| 242 |
+
if not name:
|
| 243 |
+
raise ValueError, \
|
| 244 |
+
"extensions must have non empty names"
|
| 245 |
+
self._entries[_utf8(name)] = item
|
| 246 |
+
|
| 247 |
+
def __repr__(self):
|
| 248 |
+
return "FunctionNamespace(%r)" % self._ns_uri
|
| 249 |
+
|
| 250 |
+
@cython.final
|
| 251 |
+
@cython.internal
|
| 252 |
+
cdef class _XPathFunctionNamespaceRegistry(_FunctionNamespaceRegistry):
|
| 253 |
+
cdef object _prefix
|
| 254 |
+
cdef bytes _prefix_utf
|
| 255 |
+
|
| 256 |
+
property prefix:
|
| 257 |
+
"Namespace prefix for extension functions."
|
| 258 |
+
def __del__(self):
|
| 259 |
+
self._prefix = None # no prefix configured
|
| 260 |
+
self._prefix_utf = None
|
| 261 |
+
def __get__(self):
|
| 262 |
+
if self._prefix is None:
|
| 263 |
+
return ''
|
| 264 |
+
else:
|
| 265 |
+
return self._prefix
|
| 266 |
+
def __set__(self, prefix):
|
| 267 |
+
if prefix == '':
|
| 268 |
+
prefix = None # empty prefix
|
| 269 |
+
self._prefix_utf = _utf8(prefix) if prefix is not None else None
|
| 270 |
+
self._prefix = prefix
|
| 271 |
+
|
| 272 |
+
cdef list _find_all_extension_prefixes():
|
| 273 |
+
"Internal lookup function to find all function prefixes for XSLT/XPath."
|
| 274 |
+
cdef _XPathFunctionNamespaceRegistry registry
|
| 275 |
+
cdef list ns_prefixes = []
|
| 276 |
+
for registry in __FUNCTION_NAMESPACE_REGISTRIES.itervalues():
|
| 277 |
+
if registry._prefix_utf is not None:
|
| 278 |
+
if registry._ns_uri_utf is not None:
|
| 279 |
+
ns_prefixes.append(
|
| 280 |
+
(registry._prefix_utf, registry._ns_uri_utf))
|
| 281 |
+
return ns_prefixes
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/objectify.pyx
ADDED
|
@@ -0,0 +1,2149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# cython: binding=True
|
| 2 |
+
# cython: auto_pickle=False
|
| 3 |
+
# cython: language_level=3
|
| 4 |
+
|
| 5 |
+
"""
|
| 6 |
+
The ``lxml.objectify`` module implements a Python object API for XML.
|
| 7 |
+
It is based on `lxml.etree`.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
cimport cython
|
| 11 |
+
|
| 12 |
+
from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup
|
| 13 |
+
from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode
|
| 14 |
+
from lxml.includes.tree cimport const_xmlChar, _xcstr
|
| 15 |
+
from lxml cimport python
|
| 16 |
+
from lxml.includes cimport tree
|
| 17 |
+
|
| 18 |
+
cimport lxml.includes.etreepublic as cetree
|
| 19 |
+
cimport libc.string as cstring_h # not to be confused with stdlib 'string'
|
| 20 |
+
from libc.string cimport const_char
|
| 21 |
+
from libc cimport limits
|
| 22 |
+
|
| 23 |
+
__all__ = ['BoolElement', 'DataElement', 'E', 'Element', 'ElementMaker',
|
| 24 |
+
'FloatElement', 'IntElement', 'NoneElement',
|
| 25 |
+
'NumberElement', 'ObjectPath', 'ObjectifiedDataElement',
|
| 26 |
+
'ObjectifiedElement', 'ObjectifyElementClassLookup',
|
| 27 |
+
'PYTYPE_ATTRIBUTE', 'PyType', 'StringElement', 'SubElement',
|
| 28 |
+
'XML', 'annotate', 'deannotate', 'dump', 'enable_recursive_str',
|
| 29 |
+
'fromstring', 'getRegisteredTypes', 'makeparser', 'parse',
|
| 30 |
+
'pyannotate', 'pytypename', 'set_default_parser',
|
| 31 |
+
'set_pytype_attribute_tag', 'xsiannotate']
|
| 32 |
+
|
| 33 |
+
cdef object etree
|
| 34 |
+
from lxml import etree
|
| 35 |
+
# initialize C-API of lxml.etree
|
| 36 |
+
import_lxml__etree()
|
| 37 |
+
|
| 38 |
+
__version__ = etree.__version__
|
| 39 |
+
|
| 40 |
+
cdef object _float_is_inf, _float_is_nan
|
| 41 |
+
from math import isinf as _float_is_inf, isnan as _float_is_nan
|
| 42 |
+
|
| 43 |
+
cdef object re
|
| 44 |
+
import re
|
| 45 |
+
|
| 46 |
+
cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError)
|
| 47 |
+
cdef object is_special_method = re.compile('__.*__$').match
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
cdef object _typename(object t):
|
| 51 |
+
cdef const_char* c_name
|
| 52 |
+
c_name = python._fqtypename(t)
|
| 53 |
+
s = cstring_h.strrchr(c_name, c'.')
|
| 54 |
+
if s is not NULL:
|
| 55 |
+
c_name = s + 1
|
| 56 |
+
return pyunicode(<const_xmlChar*>c_name)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# namespace/name for "pytype" hint attribute
|
| 60 |
+
cdef object PYTYPE_NAMESPACE
|
| 61 |
+
cdef bytes PYTYPE_NAMESPACE_UTF8
|
| 62 |
+
cdef const_xmlChar* _PYTYPE_NAMESPACE
|
| 63 |
+
|
| 64 |
+
cdef object PYTYPE_ATTRIBUTE_NAME
|
| 65 |
+
cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8
|
| 66 |
+
cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME
|
| 67 |
+
|
| 68 |
+
PYTYPE_ATTRIBUTE = None
|
| 69 |
+
|
| 70 |
+
cdef unicode TREE_PYTYPE_NAME = "TREE"
|
| 71 |
+
|
| 72 |
+
cdef tuple _unicodeAndUtf8(s):
|
| 73 |
+
return s, python.PyUnicode_AsUTF8String(s)
|
| 74 |
+
|
| 75 |
+
def set_pytype_attribute_tag(attribute_tag=None):
|
| 76 |
+
"""set_pytype_attribute_tag(attribute_tag=None)
|
| 77 |
+
Change name and namespace of the XML attribute that holds Python type
|
| 78 |
+
information.
|
| 79 |
+
|
| 80 |
+
Do not use this unless you know what you are doing.
|
| 81 |
+
|
| 82 |
+
Reset by calling without argument.
|
| 83 |
+
|
| 84 |
+
Default: "{http://codespeak.net/lxml/objectify/pytype}pytype"
|
| 85 |
+
"""
|
| 86 |
+
global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME
|
| 87 |
+
global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8
|
| 88 |
+
global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8
|
| 89 |
+
if attribute_tag is None:
|
| 90 |
+
PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \
|
| 91 |
+
_unicodeAndUtf8("http://codespeak.net/lxml/objectify/pytype")
|
| 92 |
+
PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
|
| 93 |
+
_unicodeAndUtf8("pytype")
|
| 94 |
+
else:
|
| 95 |
+
PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
|
| 96 |
+
cetree.getNsTag(attribute_tag)
|
| 97 |
+
PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8')
|
| 98 |
+
PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8')
|
| 99 |
+
|
| 100 |
+
_PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8
|
| 101 |
+
_PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8
|
| 102 |
+
PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName(
|
| 103 |
+
_PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
|
| 104 |
+
|
| 105 |
+
set_pytype_attribute_tag()
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# namespaces for XML Schema
|
| 109 |
+
cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8
|
| 110 |
+
XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \
|
| 111 |
+
_unicodeAndUtf8("http://www.w3.org/2001/XMLSchema")
|
| 112 |
+
cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8)
|
| 113 |
+
|
| 114 |
+
cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8
|
| 115 |
+
XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \
|
| 116 |
+
_unicodeAndUtf8("http://www.w3.org/2001/XMLSchema-instance")
|
| 117 |
+
cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8)
|
| 118 |
+
|
| 119 |
+
cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS
|
| 120 |
+
cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
################################################################################
|
| 124 |
+
# Element class for the main API
|
| 125 |
+
|
| 126 |
+
cdef class ObjectifiedElement(ElementBase):
|
| 127 |
+
"""Main XML Element class.
|
| 128 |
+
|
| 129 |
+
Element children are accessed as object attributes. Multiple children
|
| 130 |
+
with the same name are available through a list index. Example::
|
| 131 |
+
|
| 132 |
+
>>> root = XML("<root><c1><c2>0</c2><c2>1</c2></c1></root>")
|
| 133 |
+
>>> second_c2 = root.c1.c2[1]
|
| 134 |
+
>>> print(second_c2.text)
|
| 135 |
+
1
|
| 136 |
+
|
| 137 |
+
Note that you cannot (and must not) instantiate this class or its
|
| 138 |
+
subclasses.
|
| 139 |
+
"""
|
| 140 |
+
def __iter__(self):
|
| 141 |
+
"""Iterate over self and all siblings with the same tag.
|
| 142 |
+
"""
|
| 143 |
+
parent = self.getparent()
|
| 144 |
+
if parent is None:
|
| 145 |
+
return iter([self])
|
| 146 |
+
return etree.ElementChildIterator(parent, tag=self.tag)
|
| 147 |
+
|
| 148 |
+
def __str__(self):
|
| 149 |
+
if __RECURSIVE_STR:
|
| 150 |
+
return _dump(self, 0)
|
| 151 |
+
else:
|
| 152 |
+
return textOf(self._c_node) or ''
|
| 153 |
+
|
| 154 |
+
# pickle support for objectified Element
|
| 155 |
+
def __reduce__(self):
|
| 156 |
+
return fromstring, (etree.tostring(self),)
|
| 157 |
+
|
| 158 |
+
@property
|
| 159 |
+
def text(self):
|
| 160 |
+
return textOf(self._c_node)
|
| 161 |
+
|
| 162 |
+
@property
|
| 163 |
+
def __dict__(self):
|
| 164 |
+
"""A fake implementation for __dict__ to support dir() etc.
|
| 165 |
+
|
| 166 |
+
Note that this only considers the first child with a given name.
|
| 167 |
+
"""
|
| 168 |
+
cdef _Element child
|
| 169 |
+
cdef dict children
|
| 170 |
+
c_ns = tree._getNs(self._c_node)
|
| 171 |
+
tag = "{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None
|
| 172 |
+
children = {}
|
| 173 |
+
for child in etree.ElementChildIterator(self, tag=tag):
|
| 174 |
+
if c_ns is NULL and tree._getNs(child._c_node) is not NULL:
|
| 175 |
+
continue
|
| 176 |
+
name = pyunicode(child._c_node.name)
|
| 177 |
+
if name not in children:
|
| 178 |
+
children[name] = child
|
| 179 |
+
return children
|
| 180 |
+
|
| 181 |
+
def __len__(self):
|
| 182 |
+
"""Count self and siblings with the same tag.
|
| 183 |
+
"""
|
| 184 |
+
return _countSiblings(self._c_node)
|
| 185 |
+
|
| 186 |
+
def countchildren(self):
|
| 187 |
+
"""countchildren(self)
|
| 188 |
+
|
| 189 |
+
Return the number of children of this element, regardless of their
|
| 190 |
+
name.
|
| 191 |
+
"""
|
| 192 |
+
# copied from etree
|
| 193 |
+
cdef Py_ssize_t c
|
| 194 |
+
cdef tree.xmlNode* c_node
|
| 195 |
+
c = 0
|
| 196 |
+
c_node = self._c_node.children
|
| 197 |
+
while c_node is not NULL:
|
| 198 |
+
if tree._isElement(c_node):
|
| 199 |
+
c += 1
|
| 200 |
+
c_node = c_node.next
|
| 201 |
+
return c
|
| 202 |
+
|
| 203 |
+
def getchildren(self):
|
| 204 |
+
"""getchildren(self)
|
| 205 |
+
|
| 206 |
+
Returns a sequence of all direct children. The elements are
|
| 207 |
+
returned in document order.
|
| 208 |
+
"""
|
| 209 |
+
cdef tree.xmlNode* c_node
|
| 210 |
+
result = []
|
| 211 |
+
c_node = self._c_node.children
|
| 212 |
+
while c_node is not NULL:
|
| 213 |
+
if tree._isElement(c_node):
|
| 214 |
+
result.append(cetree.elementFactory(self._doc, c_node))
|
| 215 |
+
c_node = c_node.next
|
| 216 |
+
return result
|
| 217 |
+
|
| 218 |
+
def __getattr__(self, tag):
|
| 219 |
+
"""Return the (first) child with the given tag name. If no namespace
|
| 220 |
+
is provided, the child will be looked up in the same one as self.
|
| 221 |
+
"""
|
| 222 |
+
return _lookupChildOrRaise(self, tag)
|
| 223 |
+
|
| 224 |
+
def __setattr__(self, tag, value):
|
| 225 |
+
"""Set the value of the (first) child with the given tag name. If no
|
| 226 |
+
namespace is provided, the child will be looked up in the same one as
|
| 227 |
+
self.
|
| 228 |
+
"""
|
| 229 |
+
cdef _Element element
|
| 230 |
+
# properties are looked up /after/ __setattr__, so we must emulate them
|
| 231 |
+
if tag == 'text' or tag == 'pyval':
|
| 232 |
+
# read-only !
|
| 233 |
+
raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable"
|
| 234 |
+
elif tag == 'tail':
|
| 235 |
+
cetree.setTailText(self._c_node, value)
|
| 236 |
+
return
|
| 237 |
+
elif tag == 'tag':
|
| 238 |
+
ElementBase.tag.__set__(self, value)
|
| 239 |
+
return
|
| 240 |
+
elif tag == 'base':
|
| 241 |
+
ElementBase.base.__set__(self, value)
|
| 242 |
+
return
|
| 243 |
+
tag = _buildChildTag(self, tag)
|
| 244 |
+
element = _lookupChild(self, tag)
|
| 245 |
+
if element is None:
|
| 246 |
+
_appendValue(self, tag, value)
|
| 247 |
+
else:
|
| 248 |
+
_replaceElement(element, value)
|
| 249 |
+
|
| 250 |
+
def __delattr__(self, tag):
|
| 251 |
+
child = _lookupChildOrRaise(self, tag)
|
| 252 |
+
self.remove(child)
|
| 253 |
+
|
| 254 |
+
def addattr(self, tag, value):
|
| 255 |
+
"""addattr(self, tag, value)
|
| 256 |
+
|
| 257 |
+
Add a child value to the element.
|
| 258 |
+
|
| 259 |
+
As opposed to append(), it sets a data value, not an element.
|
| 260 |
+
"""
|
| 261 |
+
_appendValue(self, _buildChildTag(self, tag), value)
|
| 262 |
+
|
| 263 |
+
def __getitem__(self, key):
|
| 264 |
+
"""Return a sibling, counting from the first child of the parent. The
|
| 265 |
+
method behaves like both a dict and a sequence.
|
| 266 |
+
|
| 267 |
+
* If argument is an integer, returns the sibling at that position.
|
| 268 |
+
|
| 269 |
+
* If argument is a string, does the same as getattr(). This can be
|
| 270 |
+
used to provide namespaces for element lookup, or to look up
|
| 271 |
+
children with special names (``text`` etc.).
|
| 272 |
+
|
| 273 |
+
* If argument is a slice object, returns the matching slice.
|
| 274 |
+
"""
|
| 275 |
+
cdef tree.xmlNode* c_self_node
|
| 276 |
+
cdef tree.xmlNode* c_parent
|
| 277 |
+
cdef tree.xmlNode* c_node
|
| 278 |
+
cdef Py_ssize_t c_index
|
| 279 |
+
if python._isString(key):
|
| 280 |
+
return _lookupChildOrRaise(self, key)
|
| 281 |
+
elif isinstance(key, slice):
|
| 282 |
+
return list(self)[key]
|
| 283 |
+
# normal item access
|
| 284 |
+
c_index = key # raises TypeError if necessary
|
| 285 |
+
c_self_node = self._c_node
|
| 286 |
+
c_parent = c_self_node.parent
|
| 287 |
+
if c_parent is NULL:
|
| 288 |
+
if c_index == 0 or c_index == -1:
|
| 289 |
+
return self
|
| 290 |
+
raise IndexError, unicode(key)
|
| 291 |
+
if c_index < 0:
|
| 292 |
+
c_node = c_parent.last
|
| 293 |
+
else:
|
| 294 |
+
c_node = c_parent.children
|
| 295 |
+
c_node = _findFollowingSibling(
|
| 296 |
+
c_node, tree._getNs(c_self_node), c_self_node.name, c_index)
|
| 297 |
+
if c_node is NULL:
|
| 298 |
+
raise IndexError, unicode(key)
|
| 299 |
+
return elementFactory(self._doc, c_node)
|
| 300 |
+
|
| 301 |
+
def __setitem__(self, key, value):
|
| 302 |
+
"""Set the value of a sibling, counting from the first child of the
|
| 303 |
+
parent. Implements key assignment, item assignment and slice
|
| 304 |
+
assignment.
|
| 305 |
+
|
| 306 |
+
* If argument is an integer, sets the sibling at that position.
|
| 307 |
+
|
| 308 |
+
* If argument is a string, does the same as setattr(). This is used
|
| 309 |
+
to provide namespaces for element lookup.
|
| 310 |
+
|
| 311 |
+
* If argument is a sequence (list, tuple, etc.), assign the contained
|
| 312 |
+
items to the siblings.
|
| 313 |
+
"""
|
| 314 |
+
cdef _Element element
|
| 315 |
+
cdef tree.xmlNode* c_node
|
| 316 |
+
if python._isString(key):
|
| 317 |
+
key = _buildChildTag(self, key)
|
| 318 |
+
element = _lookupChild(self, key)
|
| 319 |
+
if element is None:
|
| 320 |
+
_appendValue(self, key, value)
|
| 321 |
+
else:
|
| 322 |
+
_replaceElement(element, value)
|
| 323 |
+
return
|
| 324 |
+
|
| 325 |
+
if self._c_node.parent is NULL:
|
| 326 |
+
# the 'root[i] = ...' case
|
| 327 |
+
raise TypeError, "assignment to root element is invalid"
|
| 328 |
+
|
| 329 |
+
if isinstance(key, slice):
|
| 330 |
+
# slice assignment
|
| 331 |
+
_setSlice(key, self, value)
|
| 332 |
+
else:
|
| 333 |
+
# normal index assignment
|
| 334 |
+
if key < 0:
|
| 335 |
+
c_node = self._c_node.parent.last
|
| 336 |
+
else:
|
| 337 |
+
c_node = self._c_node.parent.children
|
| 338 |
+
c_node = _findFollowingSibling(
|
| 339 |
+
c_node, tree._getNs(self._c_node), self._c_node.name, key)
|
| 340 |
+
if c_node is NULL:
|
| 341 |
+
raise IndexError, unicode(key)
|
| 342 |
+
element = elementFactory(self._doc, c_node)
|
| 343 |
+
_replaceElement(element, value)
|
| 344 |
+
|
| 345 |
+
def __delitem__(self, key):
|
| 346 |
+
parent = self.getparent()
|
| 347 |
+
if parent is None:
|
| 348 |
+
raise TypeError, "deleting items not supported by root element"
|
| 349 |
+
if isinstance(key, slice):
|
| 350 |
+
# slice deletion
|
| 351 |
+
del_items = list(self)[key]
|
| 352 |
+
remove = parent.remove
|
| 353 |
+
for el in del_items:
|
| 354 |
+
remove(el)
|
| 355 |
+
else:
|
| 356 |
+
# normal index deletion
|
| 357 |
+
sibling = self.__getitem__(key)
|
| 358 |
+
parent.remove(sibling)
|
| 359 |
+
|
| 360 |
+
def descendantpaths(self, prefix=None):
|
| 361 |
+
"""descendantpaths(self, prefix=None)
|
| 362 |
+
|
| 363 |
+
Returns a list of object path expressions for all descendants.
|
| 364 |
+
"""
|
| 365 |
+
if prefix is not None and not python._isString(prefix):
|
| 366 |
+
prefix = '.'.join(prefix)
|
| 367 |
+
return _build_descendant_paths(self._c_node, prefix)
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
|
| 371 |
+
if c_node.name != c_name:
|
| 372 |
+
return 0
|
| 373 |
+
if c_href == NULL:
|
| 374 |
+
return 1
|
| 375 |
+
c_node_href = tree._getNs(c_node)
|
| 376 |
+
if c_node_href == NULL:
|
| 377 |
+
return c_href[0] == c'\0'
|
| 378 |
+
return tree.xmlStrcmp(c_node_href, c_href) == 0
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node):
|
| 382 |
+
cdef tree.xmlNode* c_node
|
| 383 |
+
cdef Py_ssize_t count
|
| 384 |
+
c_tag = c_start_node.name
|
| 385 |
+
c_href = tree._getNs(c_start_node)
|
| 386 |
+
count = 1
|
| 387 |
+
c_node = c_start_node.next
|
| 388 |
+
while c_node is not NULL:
|
| 389 |
+
if c_node.type == tree.XML_ELEMENT_NODE and \
|
| 390 |
+
_tagMatches(c_node, c_href, c_tag):
|
| 391 |
+
count += 1
|
| 392 |
+
c_node = c_node.next
|
| 393 |
+
c_node = c_start_node.prev
|
| 394 |
+
while c_node is not NULL:
|
| 395 |
+
if c_node.type == tree.XML_ELEMENT_NODE and \
|
| 396 |
+
_tagMatches(c_node, c_href, c_tag):
|
| 397 |
+
count += 1
|
| 398 |
+
c_node = c_node.prev
|
| 399 |
+
return count
|
| 400 |
+
|
| 401 |
+
cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node,
|
| 402 |
+
const_xmlChar* href, const_xmlChar* name,
|
| 403 |
+
Py_ssize_t index):
|
| 404 |
+
cdef tree.xmlNode* (*next)(tree.xmlNode*)
|
| 405 |
+
if index >= 0:
|
| 406 |
+
next = cetree.nextElement
|
| 407 |
+
else:
|
| 408 |
+
index = -1 - index
|
| 409 |
+
next = cetree.previousElement
|
| 410 |
+
while c_node is not NULL:
|
| 411 |
+
if c_node.type == tree.XML_ELEMENT_NODE and \
|
| 412 |
+
_tagMatches(c_node, href, name):
|
| 413 |
+
index = index - 1
|
| 414 |
+
if index < 0:
|
| 415 |
+
return c_node
|
| 416 |
+
c_node = next(c_node)
|
| 417 |
+
return NULL
|
| 418 |
+
|
| 419 |
+
cdef object _lookupChild(_Element parent, tag):
|
| 420 |
+
cdef tree.xmlNode* c_result
|
| 421 |
+
cdef tree.xmlNode* c_node
|
| 422 |
+
c_node = parent._c_node
|
| 423 |
+
ns, tag = cetree.getNsTagWithEmptyNs(tag)
|
| 424 |
+
c_tag_len = len(<bytes> tag)
|
| 425 |
+
if c_tag_len > limits.INT_MAX:
|
| 426 |
+
return None
|
| 427 |
+
c_tag = tree.xmlDictExists(
|
| 428 |
+
c_node.doc.dict, _xcstr(tag), <int> c_tag_len)
|
| 429 |
+
if c_tag is NULL:
|
| 430 |
+
return None # not in the hash map => not in the tree
|
| 431 |
+
if ns is None:
|
| 432 |
+
# either inherit ns from parent or use empty (i.e. no) namespace
|
| 433 |
+
c_href = tree._getNs(c_node) or <const_xmlChar*>''
|
| 434 |
+
else:
|
| 435 |
+
c_href = _xcstr(ns)
|
| 436 |
+
c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0)
|
| 437 |
+
if c_result is NULL:
|
| 438 |
+
return None
|
| 439 |
+
return elementFactory(parent._doc, c_result)
|
| 440 |
+
|
| 441 |
+
cdef object _lookupChildOrRaise(_Element parent, tag):
|
| 442 |
+
element = _lookupChild(parent, tag)
|
| 443 |
+
if element is None:
|
| 444 |
+
raise AttributeError, "no such child: " + _buildChildTag(parent, tag)
|
| 445 |
+
return element
|
| 446 |
+
|
| 447 |
+
cdef object _buildChildTag(_Element parent, tag):
|
| 448 |
+
ns, tag = cetree.getNsTag(tag)
|
| 449 |
+
c_tag = _xcstr(tag)
|
| 450 |
+
c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns)
|
| 451 |
+
return cetree.namespacedNameFromNsName(c_href, c_tag)
|
| 452 |
+
|
| 453 |
+
cdef _replaceElement(_Element element, value):
|
| 454 |
+
cdef _Element new_element
|
| 455 |
+
if isinstance(value, _Element):
|
| 456 |
+
# deep copy the new element
|
| 457 |
+
new_element = cetree.deepcopyNodeToDocument(
|
| 458 |
+
element._doc, (<_Element>value)._c_node)
|
| 459 |
+
new_element.tag = element.tag
|
| 460 |
+
elif isinstance(value, (list, tuple)):
|
| 461 |
+
element[:] = value
|
| 462 |
+
return
|
| 463 |
+
else:
|
| 464 |
+
new_element = element.makeelement(element.tag)
|
| 465 |
+
_setElementValue(new_element, value)
|
| 466 |
+
element.getparent().replace(element, new_element)
|
| 467 |
+
|
| 468 |
+
cdef _appendValue(_Element parent, tag, value):
|
| 469 |
+
cdef _Element new_element
|
| 470 |
+
if isinstance(value, _Element):
|
| 471 |
+
# deep copy the new element
|
| 472 |
+
new_element = cetree.deepcopyNodeToDocument(
|
| 473 |
+
parent._doc, (<_Element>value)._c_node)
|
| 474 |
+
new_element.tag = tag
|
| 475 |
+
cetree.appendChildToElement(parent, new_element)
|
| 476 |
+
elif isinstance(value, (list, tuple)):
|
| 477 |
+
for item in value:
|
| 478 |
+
_appendValue(parent, tag, item)
|
| 479 |
+
else:
|
| 480 |
+
new_element = cetree.makeElement(
|
| 481 |
+
tag, parent._doc, None, None, None, None, None)
|
| 482 |
+
_setElementValue(new_element, value)
|
| 483 |
+
cetree.appendChildToElement(parent, new_element)
|
| 484 |
+
|
| 485 |
+
cdef _setElementValue(_Element element, value):
|
| 486 |
+
if value is None:
|
| 487 |
+
cetree.setAttributeValue(
|
| 488 |
+
element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true")
|
| 489 |
+
elif isinstance(value, _Element):
|
| 490 |
+
_replaceElement(element, value)
|
| 491 |
+
return
|
| 492 |
+
else:
|
| 493 |
+
cetree.delAttributeFromNsName(
|
| 494 |
+
element._c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil")
|
| 495 |
+
if python._isString(value):
|
| 496 |
+
pytype_name = "str"
|
| 497 |
+
py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
|
| 498 |
+
else:
|
| 499 |
+
pytype_name = _typename(value)
|
| 500 |
+
py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
|
| 501 |
+
if py_type is not None:
|
| 502 |
+
value = py_type.stringify(value)
|
| 503 |
+
else:
|
| 504 |
+
value = unicode(value)
|
| 505 |
+
if py_type is not None:
|
| 506 |
+
cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
|
| 507 |
+
else:
|
| 508 |
+
cetree.delAttributeFromNsName(
|
| 509 |
+
element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
|
| 510 |
+
cetree.setNodeText(element._c_node, value)
|
| 511 |
+
|
| 512 |
+
cdef _setSlice(sliceobject, _Element target, items):
|
| 513 |
+
cdef _Element parent
|
| 514 |
+
cdef tree.xmlNode* c_node
|
| 515 |
+
cdef Py_ssize_t c_step, c_start, pos
|
| 516 |
+
# collect existing slice
|
| 517 |
+
if (<slice>sliceobject).step is None:
|
| 518 |
+
c_step = 1
|
| 519 |
+
else:
|
| 520 |
+
c_step = (<slice>sliceobject).step
|
| 521 |
+
if c_step == 0:
|
| 522 |
+
raise ValueError, "Invalid slice"
|
| 523 |
+
cdef list del_items = target[sliceobject]
|
| 524 |
+
|
| 525 |
+
# collect new values
|
| 526 |
+
new_items = []
|
| 527 |
+
tag = target.tag
|
| 528 |
+
for item in items:
|
| 529 |
+
if isinstance(item, _Element):
|
| 530 |
+
# deep copy the new element
|
| 531 |
+
new_element = cetree.deepcopyNodeToDocument(
|
| 532 |
+
target._doc, (<_Element>item)._c_node)
|
| 533 |
+
new_element.tag = tag
|
| 534 |
+
else:
|
| 535 |
+
new_element = cetree.makeElement(
|
| 536 |
+
tag, target._doc, None, None, None, None, None)
|
| 537 |
+
_setElementValue(new_element, item)
|
| 538 |
+
new_items.append(new_element)
|
| 539 |
+
|
| 540 |
+
# sanity check - raise what a list would raise
|
| 541 |
+
if c_step != 1 and len(del_items) != len(new_items):
|
| 542 |
+
raise ValueError, \
|
| 543 |
+
f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}"
|
| 544 |
+
|
| 545 |
+
# replace existing items
|
| 546 |
+
pos = 0
|
| 547 |
+
parent = target.getparent()
|
| 548 |
+
replace = parent.replace
|
| 549 |
+
while pos < len(new_items) and pos < len(del_items):
|
| 550 |
+
replace(del_items[pos], new_items[pos])
|
| 551 |
+
pos += 1
|
| 552 |
+
# remove leftover items
|
| 553 |
+
if pos < len(del_items):
|
| 554 |
+
remove = parent.remove
|
| 555 |
+
while pos < len(del_items):
|
| 556 |
+
remove(del_items[pos])
|
| 557 |
+
pos += 1
|
| 558 |
+
# append remaining new items
|
| 559 |
+
if pos < len(new_items):
|
| 560 |
+
# the sanity check above guarantees (step == 1)
|
| 561 |
+
if pos > 0:
|
| 562 |
+
item = new_items[pos-1]
|
| 563 |
+
else:
|
| 564 |
+
if (<slice>sliceobject).start > 0:
|
| 565 |
+
c_node = parent._c_node.children
|
| 566 |
+
else:
|
| 567 |
+
c_node = parent._c_node.last
|
| 568 |
+
c_node = _findFollowingSibling(
|
| 569 |
+
c_node, tree._getNs(target._c_node), target._c_node.name,
|
| 570 |
+
(<slice>sliceobject).start - 1)
|
| 571 |
+
if c_node is NULL:
|
| 572 |
+
while pos < len(new_items):
|
| 573 |
+
cetree.appendChildToElement(parent, new_items[pos])
|
| 574 |
+
pos += 1
|
| 575 |
+
return
|
| 576 |
+
item = cetree.elementFactory(parent._doc, c_node)
|
| 577 |
+
while pos < len(new_items):
|
| 578 |
+
add = item.addnext
|
| 579 |
+
item = new_items[pos]
|
| 580 |
+
add(item)
|
| 581 |
+
pos += 1
|
| 582 |
+
|
| 583 |
+
################################################################################
|
| 584 |
+
# Data type support in subclasses
|
| 585 |
+
|
| 586 |
+
cdef class ObjectifiedDataElement(ObjectifiedElement):
|
| 587 |
+
"""This is the base class for all data type Elements. Subclasses should
|
| 588 |
+
override the 'pyval' property and possibly the __str__ method.
|
| 589 |
+
"""
|
| 590 |
+
@property
|
| 591 |
+
def pyval(self):
|
| 592 |
+
return textOf(self._c_node)
|
| 593 |
+
|
| 594 |
+
def __str__(self):
|
| 595 |
+
return textOf(self._c_node) or ''
|
| 596 |
+
|
| 597 |
+
def __repr__(self):
|
| 598 |
+
return textOf(self._c_node) or ''
|
| 599 |
+
|
| 600 |
+
def _setText(self, s):
|
| 601 |
+
"""For use in subclasses only. Don't use unless you know what you are
|
| 602 |
+
doing.
|
| 603 |
+
"""
|
| 604 |
+
cetree.setNodeText(self._c_node, s)
|
| 605 |
+
|
| 606 |
+
|
| 607 |
+
cdef class NumberElement(ObjectifiedDataElement):
|
| 608 |
+
cdef object _parse_value
|
| 609 |
+
|
| 610 |
+
def _setValueParser(self, function):
|
| 611 |
+
"""Set the function that parses the Python value from a string.
|
| 612 |
+
|
| 613 |
+
Do not use this unless you know what you are doing.
|
| 614 |
+
"""
|
| 615 |
+
self._parse_value = function
|
| 616 |
+
|
| 617 |
+
@property
|
| 618 |
+
def pyval(self):
|
| 619 |
+
return _parseNumber(self)
|
| 620 |
+
|
| 621 |
+
def __int__(self):
|
| 622 |
+
return int(_parseNumber(self))
|
| 623 |
+
|
| 624 |
+
def __float__(self):
|
| 625 |
+
return float(_parseNumber(self))
|
| 626 |
+
|
| 627 |
+
def __complex__(self):
|
| 628 |
+
return complex(_parseNumber(self))
|
| 629 |
+
|
| 630 |
+
def __str__(self):
|
| 631 |
+
return unicode(_parseNumber(self))
|
| 632 |
+
|
| 633 |
+
def __repr__(self):
|
| 634 |
+
return repr(_parseNumber(self))
|
| 635 |
+
|
| 636 |
+
def __oct__(self):
|
| 637 |
+
return oct(_parseNumber(self))
|
| 638 |
+
|
| 639 |
+
def __hex__(self):
|
| 640 |
+
return hex(_parseNumber(self))
|
| 641 |
+
|
| 642 |
+
def __richcmp__(self, other, int op):
|
| 643 |
+
return _richcmpPyvals(self, other, op)
|
| 644 |
+
|
| 645 |
+
def __hash__(self):
|
| 646 |
+
return hash(_parseNumber(self))
|
| 647 |
+
|
| 648 |
+
def __add__(self, other):
|
| 649 |
+
return _numericValueOf(self) + _numericValueOf(other)
|
| 650 |
+
|
| 651 |
+
def __radd__(self, other):
|
| 652 |
+
return _numericValueOf(other) + _numericValueOf(self)
|
| 653 |
+
|
| 654 |
+
def __sub__(self, other):
|
| 655 |
+
return _numericValueOf(self) - _numericValueOf(other)
|
| 656 |
+
|
| 657 |
+
def __rsub__(self, other):
|
| 658 |
+
return _numericValueOf(other) - _numericValueOf(self)
|
| 659 |
+
|
| 660 |
+
def __mul__(self, other):
|
| 661 |
+
return _numericValueOf(self) * _numericValueOf(other)
|
| 662 |
+
|
| 663 |
+
def __rmul__(self, other):
|
| 664 |
+
return _numericValueOf(other) * _numericValueOf(self)
|
| 665 |
+
|
| 666 |
+
def __div__(self, other):
|
| 667 |
+
return _numericValueOf(self) / _numericValueOf(other)
|
| 668 |
+
|
| 669 |
+
def __rdiv__(self, other):
|
| 670 |
+
return _numericValueOf(other) / _numericValueOf(self)
|
| 671 |
+
|
| 672 |
+
def __truediv__(self, other):
|
| 673 |
+
return _numericValueOf(self) / _numericValueOf(other)
|
| 674 |
+
|
| 675 |
+
def __rtruediv__(self, other):
|
| 676 |
+
return _numericValueOf(other) / _numericValueOf(self)
|
| 677 |
+
|
| 678 |
+
def __floordiv__(self, other):
|
| 679 |
+
return _numericValueOf(self) // _numericValueOf(other)
|
| 680 |
+
|
| 681 |
+
def __rfloordiv__(self, other):
|
| 682 |
+
return _numericValueOf(other) // _numericValueOf(self)
|
| 683 |
+
|
| 684 |
+
def __mod__(self, other):
|
| 685 |
+
return _numericValueOf(self) % _numericValueOf(other)
|
| 686 |
+
|
| 687 |
+
def __rmod__(self, other):
|
| 688 |
+
return _numericValueOf(other) % _numericValueOf(self)
|
| 689 |
+
|
| 690 |
+
def __divmod__(self, other):
|
| 691 |
+
return divmod(_numericValueOf(self), _numericValueOf(other))
|
| 692 |
+
|
| 693 |
+
def __rdivmod__(self, other):
|
| 694 |
+
return divmod(_numericValueOf(other), _numericValueOf(self))
|
| 695 |
+
|
| 696 |
+
def __pow__(self, other, modulo):
|
| 697 |
+
if modulo is None:
|
| 698 |
+
return _numericValueOf(self) ** _numericValueOf(other)
|
| 699 |
+
else:
|
| 700 |
+
return pow(_numericValueOf(self), _numericValueOf(other), modulo)
|
| 701 |
+
|
| 702 |
+
def __rpow__(self, other, modulo):
|
| 703 |
+
if modulo is None:
|
| 704 |
+
return _numericValueOf(other) ** _numericValueOf(self)
|
| 705 |
+
else:
|
| 706 |
+
return pow(_numericValueOf(other), _numericValueOf(self), modulo)
|
| 707 |
+
|
| 708 |
+
def __neg__(self):
|
| 709 |
+
return - _numericValueOf(self)
|
| 710 |
+
|
| 711 |
+
def __pos__(self):
|
| 712 |
+
return + _numericValueOf(self)
|
| 713 |
+
|
| 714 |
+
def __abs__(self):
|
| 715 |
+
return abs( _numericValueOf(self) )
|
| 716 |
+
|
| 717 |
+
def __bool__(self):
|
| 718 |
+
return bool(_numericValueOf(self))
|
| 719 |
+
|
| 720 |
+
def __invert__(self):
|
| 721 |
+
return ~ _numericValueOf(self)
|
| 722 |
+
|
| 723 |
+
def __lshift__(self, other):
|
| 724 |
+
return _numericValueOf(self) << _numericValueOf(other)
|
| 725 |
+
|
| 726 |
+
def __rlshift__(self, other):
|
| 727 |
+
return _numericValueOf(other) << _numericValueOf(self)
|
| 728 |
+
|
| 729 |
+
def __rshift__(self, other):
|
| 730 |
+
return _numericValueOf(self) >> _numericValueOf(other)
|
| 731 |
+
|
| 732 |
+
def __rrshift__(self, other):
|
| 733 |
+
return _numericValueOf(other) >> _numericValueOf(self)
|
| 734 |
+
|
| 735 |
+
def __and__(self, other):
|
| 736 |
+
return _numericValueOf(self) & _numericValueOf(other)
|
| 737 |
+
|
| 738 |
+
def __rand__(self, other):
|
| 739 |
+
return _numericValueOf(other) & _numericValueOf(self)
|
| 740 |
+
|
| 741 |
+
def __or__(self, other):
|
| 742 |
+
return _numericValueOf(self) | _numericValueOf(other)
|
| 743 |
+
|
| 744 |
+
def __ror__(self, other):
|
| 745 |
+
return _numericValueOf(other) | _numericValueOf(self)
|
| 746 |
+
|
| 747 |
+
def __xor__(self, other):
|
| 748 |
+
return _numericValueOf(self) ^ _numericValueOf(other)
|
| 749 |
+
|
| 750 |
+
def __rxor__(self, other):
|
| 751 |
+
return _numericValueOf(other) ^ _numericValueOf(self)
|
| 752 |
+
|
| 753 |
+
|
| 754 |
+
cdef class IntElement(NumberElement):
|
| 755 |
+
def _init(self):
|
| 756 |
+
self._parse_value = int
|
| 757 |
+
|
| 758 |
+
def __index__(self):
|
| 759 |
+
return int(_parseNumber(self))
|
| 760 |
+
|
| 761 |
+
|
| 762 |
+
cdef class FloatElement(NumberElement):
|
| 763 |
+
def _init(self):
|
| 764 |
+
self._parse_value = float
|
| 765 |
+
|
| 766 |
+
|
| 767 |
+
cdef class StringElement(ObjectifiedDataElement):
|
| 768 |
+
"""String data class.
|
| 769 |
+
|
| 770 |
+
Note that this class does *not* support the sequence protocol of strings:
|
| 771 |
+
len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported.
|
| 772 |
+
Instead, use the .text attribute to get a 'real' string.
|
| 773 |
+
"""
|
| 774 |
+
@property
|
| 775 |
+
def pyval(self):
|
| 776 |
+
return textOf(self._c_node) or ''
|
| 777 |
+
|
| 778 |
+
def __repr__(self):
|
| 779 |
+
return repr(textOf(self._c_node) or '')
|
| 780 |
+
|
| 781 |
+
def strlen(self):
|
| 782 |
+
text = textOf(self._c_node)
|
| 783 |
+
if text is None:
|
| 784 |
+
return 0
|
| 785 |
+
else:
|
| 786 |
+
return len(text)
|
| 787 |
+
|
| 788 |
+
def __bool__(self):
|
| 789 |
+
return bool(textOf(self._c_node))
|
| 790 |
+
|
| 791 |
+
def __richcmp__(self, other, int op):
|
| 792 |
+
return _richcmpPyvals(self, other, op)
|
| 793 |
+
|
| 794 |
+
def __hash__(self):
|
| 795 |
+
return hash(textOf(self._c_node) or '')
|
| 796 |
+
|
| 797 |
+
def __add__(self, other):
|
| 798 |
+
text = _strValueOf(self)
|
| 799 |
+
other = _strValueOf(other)
|
| 800 |
+
return text + other
|
| 801 |
+
|
| 802 |
+
def __radd__(self, other):
|
| 803 |
+
text = _strValueOf(self)
|
| 804 |
+
other = _strValueOf(other)
|
| 805 |
+
return other + text
|
| 806 |
+
|
| 807 |
+
def __mul__(self, other):
|
| 808 |
+
if isinstance(self, StringElement):
|
| 809 |
+
return (textOf((<StringElement>self)._c_node) or '') * _numericValueOf(other)
|
| 810 |
+
elif isinstance(other, StringElement):
|
| 811 |
+
return _numericValueOf(self) * (textOf((<StringElement>other)._c_node) or '')
|
| 812 |
+
else:
|
| 813 |
+
return NotImplemented
|
| 814 |
+
|
| 815 |
+
def __rmul__(self, other):
|
| 816 |
+
return _numericValueOf(other) * (textOf((<StringElement>self)._c_node) or '')
|
| 817 |
+
|
| 818 |
+
def __mod__(self, other):
|
| 819 |
+
return (_strValueOf(self) or '') % other
|
| 820 |
+
|
| 821 |
+
def __int__(self):
|
| 822 |
+
return int(textOf(self._c_node))
|
| 823 |
+
|
| 824 |
+
def __float__(self):
|
| 825 |
+
return float(textOf(self._c_node))
|
| 826 |
+
|
| 827 |
+
def __complex__(self):
|
| 828 |
+
return complex(textOf(self._c_node))
|
| 829 |
+
|
| 830 |
+
|
| 831 |
+
cdef class NoneElement(ObjectifiedDataElement):
|
| 832 |
+
def __str__(self):
|
| 833 |
+
return "None"
|
| 834 |
+
|
| 835 |
+
def __repr__(self):
|
| 836 |
+
return "None"
|
| 837 |
+
|
| 838 |
+
def __bool__(self):
|
| 839 |
+
return False
|
| 840 |
+
|
| 841 |
+
def __richcmp__(self, other, int op):
|
| 842 |
+
if other is None or self is None:
|
| 843 |
+
return python.PyObject_RichCompare(None, None, op)
|
| 844 |
+
if isinstance(self, NoneElement):
|
| 845 |
+
return python.PyObject_RichCompare(None, other, op)
|
| 846 |
+
else:
|
| 847 |
+
return python.PyObject_RichCompare(self, None, op)
|
| 848 |
+
|
| 849 |
+
def __hash__(self):
|
| 850 |
+
return hash(None)
|
| 851 |
+
|
| 852 |
+
@property
|
| 853 |
+
def pyval(self):
|
| 854 |
+
return None
|
| 855 |
+
|
| 856 |
+
|
| 857 |
+
cdef class BoolElement(IntElement):
|
| 858 |
+
"""Boolean type base on string values: 'true' or 'false'.
|
| 859 |
+
|
| 860 |
+
Note that this inherits from IntElement to mimic the behaviour of
|
| 861 |
+
Python's bool type.
|
| 862 |
+
"""
|
| 863 |
+
def _init(self):
|
| 864 |
+
self._parse_value = _parseBool # wraps as Python callable
|
| 865 |
+
|
| 866 |
+
def __bool__(self):
|
| 867 |
+
return _parseBool(textOf(self._c_node))
|
| 868 |
+
|
| 869 |
+
def __int__(self):
|
| 870 |
+
return 0 + _parseBool(textOf(self._c_node))
|
| 871 |
+
|
| 872 |
+
def __float__(self):
|
| 873 |
+
return 0.0 + _parseBool(textOf(self._c_node))
|
| 874 |
+
|
| 875 |
+
def __richcmp__(self, other, int op):
|
| 876 |
+
return _richcmpPyvals(self, other, op)
|
| 877 |
+
|
| 878 |
+
def __hash__(self):
|
| 879 |
+
return hash(_parseBool(textOf(self._c_node)))
|
| 880 |
+
|
| 881 |
+
def __str__(self):
|
| 882 |
+
return unicode(_parseBool(textOf(self._c_node)))
|
| 883 |
+
|
| 884 |
+
def __repr__(self):
|
| 885 |
+
return repr(_parseBool(textOf(self._c_node)))
|
| 886 |
+
|
| 887 |
+
@property
|
| 888 |
+
def pyval(self):
|
| 889 |
+
return _parseBool(textOf(self._c_node))
|
| 890 |
+
|
| 891 |
+
|
| 892 |
+
cdef _checkBool(s):
|
| 893 |
+
cdef int value = -1
|
| 894 |
+
if s is not None:
|
| 895 |
+
value = __parseBoolAsInt(s)
|
| 896 |
+
if value == -1:
|
| 897 |
+
raise ValueError
|
| 898 |
+
|
| 899 |
+
|
| 900 |
+
cdef bint _parseBool(s) except -1:
|
| 901 |
+
cdef int value
|
| 902 |
+
if s is None:
|
| 903 |
+
return False
|
| 904 |
+
value = __parseBoolAsInt(s)
|
| 905 |
+
if value == -1:
|
| 906 |
+
raise ValueError, f"Invalid boolean value: '{s}'"
|
| 907 |
+
return value
|
| 908 |
+
|
| 909 |
+
|
| 910 |
+
cdef inline int __parseBoolAsInt(text) except -2:
|
| 911 |
+
if text == 'false':
|
| 912 |
+
return 0
|
| 913 |
+
elif text == 'true':
|
| 914 |
+
return 1
|
| 915 |
+
elif text == '0':
|
| 916 |
+
return 0
|
| 917 |
+
elif text == '1':
|
| 918 |
+
return 1
|
| 919 |
+
return -1
|
| 920 |
+
|
| 921 |
+
|
| 922 |
+
cdef object _parseNumber(NumberElement element):
|
| 923 |
+
return element._parse_value(textOf(element._c_node))
|
| 924 |
+
|
| 925 |
+
|
| 926 |
+
cdef enum NumberParserState:
|
| 927 |
+
NPS_SPACE_PRE = 0
|
| 928 |
+
NPS_SIGN = 1
|
| 929 |
+
NPS_DIGITS = 2
|
| 930 |
+
NPS_POINT_LEAD = 3
|
| 931 |
+
NPS_POINT = 4
|
| 932 |
+
NPS_FRACTION = 5
|
| 933 |
+
NPS_EXP = 6
|
| 934 |
+
NPS_EXP_SIGN = 7
|
| 935 |
+
NPS_DIGITS_EXP = 8
|
| 936 |
+
NPS_SPACE_TAIL = 9
|
| 937 |
+
NPS_INF1 = 20
|
| 938 |
+
NPS_INF2 = 21
|
| 939 |
+
NPS_INF3 = 22
|
| 940 |
+
NPS_NAN1 = 23
|
| 941 |
+
NPS_NAN2 = 24
|
| 942 |
+
NPS_NAN3 = 25
|
| 943 |
+
NPS_ERROR = 99
|
| 944 |
+
|
| 945 |
+
|
| 946 |
+
ctypedef fused bytes_unicode:
|
| 947 |
+
bytes
|
| 948 |
+
unicode
|
| 949 |
+
|
| 950 |
+
|
| 951 |
+
cdef _checkNumber(bytes_unicode s, bint allow_float):
|
| 952 |
+
cdef Py_UCS4 c
|
| 953 |
+
cdef NumberParserState state = NPS_SPACE_PRE
|
| 954 |
+
|
| 955 |
+
for c in s:
|
| 956 |
+
if c in '0123456789':
|
| 957 |
+
if state in (NPS_DIGITS, NPS_FRACTION, NPS_DIGITS_EXP):
|
| 958 |
+
pass
|
| 959 |
+
elif state in (NPS_SPACE_PRE, NPS_SIGN):
|
| 960 |
+
state = NPS_DIGITS
|
| 961 |
+
elif state in (NPS_POINT_LEAD, NPS_POINT):
|
| 962 |
+
state = NPS_FRACTION
|
| 963 |
+
elif state in (NPS_EXP, NPS_EXP_SIGN):
|
| 964 |
+
state = NPS_DIGITS_EXP
|
| 965 |
+
else:
|
| 966 |
+
state = NPS_ERROR
|
| 967 |
+
else:
|
| 968 |
+
if c == '.':
|
| 969 |
+
if state in (NPS_SPACE_PRE, NPS_SIGN):
|
| 970 |
+
state = NPS_POINT_LEAD
|
| 971 |
+
elif state == NPS_DIGITS:
|
| 972 |
+
state = NPS_POINT
|
| 973 |
+
else:
|
| 974 |
+
state = NPS_ERROR
|
| 975 |
+
if not allow_float:
|
| 976 |
+
state = NPS_ERROR
|
| 977 |
+
elif c in '-+':
|
| 978 |
+
if state == NPS_SPACE_PRE:
|
| 979 |
+
state = NPS_SIGN
|
| 980 |
+
elif state == NPS_EXP:
|
| 981 |
+
state = NPS_EXP_SIGN
|
| 982 |
+
else:
|
| 983 |
+
state = NPS_ERROR
|
| 984 |
+
elif c == 'E':
|
| 985 |
+
if state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION):
|
| 986 |
+
state = NPS_EXP
|
| 987 |
+
else:
|
| 988 |
+
state = NPS_ERROR
|
| 989 |
+
if not allow_float:
|
| 990 |
+
state = NPS_ERROR
|
| 991 |
+
# Allow INF and NaN. XMLSchema requires case, we don't, like Python.
|
| 992 |
+
elif c in 'iI':
|
| 993 |
+
state = NPS_INF1 if allow_float and state in (NPS_SPACE_PRE, NPS_SIGN) else NPS_ERROR
|
| 994 |
+
elif c in 'fF':
|
| 995 |
+
state = NPS_INF3 if state == NPS_INF2 else NPS_ERROR
|
| 996 |
+
elif c in 'aA':
|
| 997 |
+
state = NPS_NAN2 if state == NPS_NAN1 else NPS_ERROR
|
| 998 |
+
elif c in 'nN':
|
| 999 |
+
# Python also allows [+-]NaN, so let's accept that.
|
| 1000 |
+
if state in (NPS_SPACE_PRE, NPS_SIGN):
|
| 1001 |
+
state = NPS_NAN1 if allow_float else NPS_ERROR
|
| 1002 |
+
elif state == NPS_NAN2:
|
| 1003 |
+
state = NPS_NAN3
|
| 1004 |
+
elif state == NPS_INF1:
|
| 1005 |
+
state = NPS_INF2
|
| 1006 |
+
else:
|
| 1007 |
+
state = NPS_ERROR
|
| 1008 |
+
# Allow spaces around text values.
|
| 1009 |
+
else:
|
| 1010 |
+
if c.isspace() if (bytes_unicode is unicode) else c in b'\x09\x0a\x0b\x0c\x0d\x20':
|
| 1011 |
+
if state in (NPS_SPACE_PRE, NPS_SPACE_TAIL):
|
| 1012 |
+
pass
|
| 1013 |
+
elif state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3):
|
| 1014 |
+
state = NPS_SPACE_TAIL
|
| 1015 |
+
else:
|
| 1016 |
+
state = NPS_ERROR
|
| 1017 |
+
else:
|
| 1018 |
+
state = NPS_ERROR
|
| 1019 |
+
|
| 1020 |
+
if state == NPS_ERROR:
|
| 1021 |
+
break
|
| 1022 |
+
|
| 1023 |
+
if state not in (NPS_DIGITS, NPS_FRACTION, NPS_POINT, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3, NPS_SPACE_TAIL):
|
| 1024 |
+
raise ValueError
|
| 1025 |
+
|
| 1026 |
+
|
| 1027 |
+
cdef _checkInt(s):
|
| 1028 |
+
return _checkNumber(<unicode>s, allow_float=False)
|
| 1029 |
+
|
| 1030 |
+
|
| 1031 |
+
cdef _checkFloat(s):
|
| 1032 |
+
return _checkNumber(<unicode>s, allow_float=True)
|
| 1033 |
+
|
| 1034 |
+
|
| 1035 |
+
cdef object _strValueOf(obj):
|
| 1036 |
+
if python._isString(obj):
|
| 1037 |
+
return obj
|
| 1038 |
+
if isinstance(obj, _Element):
|
| 1039 |
+
return textOf((<_Element>obj)._c_node) or ''
|
| 1040 |
+
if obj is None:
|
| 1041 |
+
return ''
|
| 1042 |
+
return unicode(obj)
|
| 1043 |
+
|
| 1044 |
+
|
| 1045 |
+
cdef object _numericValueOf(obj):
|
| 1046 |
+
if isinstance(obj, NumberElement):
|
| 1047 |
+
return _parseNumber(<NumberElement>obj)
|
| 1048 |
+
try:
|
| 1049 |
+
# not always numeric, but Python will raise the right exception
|
| 1050 |
+
return obj.pyval
|
| 1051 |
+
except AttributeError:
|
| 1052 |
+
pass
|
| 1053 |
+
return obj
|
| 1054 |
+
|
| 1055 |
+
|
| 1056 |
+
cdef _richcmpPyvals(left, right, int op):
|
| 1057 |
+
left = getattr(left, 'pyval', left)
|
| 1058 |
+
right = getattr(right, 'pyval', right)
|
| 1059 |
+
return python.PyObject_RichCompare(left, right, op)
|
| 1060 |
+
|
| 1061 |
+
|
| 1062 |
+
################################################################################
|
| 1063 |
+
# Python type registry
|
| 1064 |
+
|
| 1065 |
+
cdef class PyType:
|
| 1066 |
+
"""PyType(self, name, type_check, type_class, stringify=None)
|
| 1067 |
+
User defined type.
|
| 1068 |
+
|
| 1069 |
+
Named type that contains a type check function, a type class that
|
| 1070 |
+
inherits from ObjectifiedDataElement and an optional "stringification"
|
| 1071 |
+
function. The type check must take a string as argument and raise
|
| 1072 |
+
ValueError or TypeError if it cannot handle the string value. It may be
|
| 1073 |
+
None in which case it is not considered for type guessing. For registered
|
| 1074 |
+
named types, the 'stringify' function (or unicode() if None) is used to
|
| 1075 |
+
convert a Python object with type name 'name' to the string representation
|
| 1076 |
+
stored in the XML tree.
|
| 1077 |
+
|
| 1078 |
+
Example::
|
| 1079 |
+
|
| 1080 |
+
PyType('int', int, MyIntClass).register()
|
| 1081 |
+
|
| 1082 |
+
Note that the order in which types are registered matters. The first
|
| 1083 |
+
matching type will be used.
|
| 1084 |
+
"""
|
| 1085 |
+
cdef readonly object name
|
| 1086 |
+
cdef readonly object type_check
|
| 1087 |
+
cdef readonly object stringify
|
| 1088 |
+
cdef object _type
|
| 1089 |
+
cdef list _schema_types
|
| 1090 |
+
def __init__(self, name, type_check, type_class, stringify=None):
|
| 1091 |
+
if isinstance(name, bytes):
|
| 1092 |
+
name = (<bytes>name).decode('ascii')
|
| 1093 |
+
elif not isinstance(name, unicode):
|
| 1094 |
+
raise TypeError, "Type name must be a string"
|
| 1095 |
+
if type_check is not None and not callable(type_check):
|
| 1096 |
+
raise TypeError, "Type check function must be callable (or None)"
|
| 1097 |
+
if name != TREE_PYTYPE_NAME and \
|
| 1098 |
+
not issubclass(type_class, ObjectifiedDataElement):
|
| 1099 |
+
raise TypeError, \
|
| 1100 |
+
"Data classes must inherit from ObjectifiedDataElement"
|
| 1101 |
+
self.name = name
|
| 1102 |
+
self._type = type_class
|
| 1103 |
+
self.type_check = type_check
|
| 1104 |
+
if stringify is None:
|
| 1105 |
+
stringify = unicode
|
| 1106 |
+
self.stringify = stringify
|
| 1107 |
+
self._schema_types = []
|
| 1108 |
+
|
| 1109 |
+
def __repr__(self):
|
| 1110 |
+
return "PyType(%s, %s)" % (self.name, self._type.__name__)
|
| 1111 |
+
|
| 1112 |
+
def register(self, before=None, after=None):
|
| 1113 |
+
"""register(self, before=None, after=None)
|
| 1114 |
+
|
| 1115 |
+
Register the type.
|
| 1116 |
+
|
| 1117 |
+
The additional keyword arguments 'before' and 'after' accept a
|
| 1118 |
+
sequence of type names that must appear before/after the new type in
|
| 1119 |
+
the type list. If any of them is not currently known, it is simply
|
| 1120 |
+
ignored. Raises ValueError if the dependencies cannot be fulfilled.
|
| 1121 |
+
"""
|
| 1122 |
+
if self.name == TREE_PYTYPE_NAME:
|
| 1123 |
+
raise ValueError, "Cannot register tree type"
|
| 1124 |
+
if self.type_check is not None:
|
| 1125 |
+
for item in _TYPE_CHECKS:
|
| 1126 |
+
if item[0] is self.type_check:
|
| 1127 |
+
_TYPE_CHECKS.remove(item)
|
| 1128 |
+
break
|
| 1129 |
+
entry = (self.type_check, self)
|
| 1130 |
+
first_pos = 0
|
| 1131 |
+
last_pos = -1
|
| 1132 |
+
if before or after:
|
| 1133 |
+
if before is None:
|
| 1134 |
+
before = ()
|
| 1135 |
+
elif after is None:
|
| 1136 |
+
after = ()
|
| 1137 |
+
for i, (check, pytype) in enumerate(_TYPE_CHECKS):
|
| 1138 |
+
if last_pos == -1 and pytype.name in before:
|
| 1139 |
+
last_pos = i
|
| 1140 |
+
if pytype.name in after:
|
| 1141 |
+
first_pos = i+1
|
| 1142 |
+
if last_pos == -1:
|
| 1143 |
+
_TYPE_CHECKS.append(entry)
|
| 1144 |
+
elif first_pos > last_pos:
|
| 1145 |
+
raise ValueError, "inconsistent before/after dependencies"
|
| 1146 |
+
else:
|
| 1147 |
+
_TYPE_CHECKS.insert(last_pos, entry)
|
| 1148 |
+
|
| 1149 |
+
_PYTYPE_DICT[self.name] = self
|
| 1150 |
+
for xs_type in self._schema_types:
|
| 1151 |
+
_SCHEMA_TYPE_DICT[xs_type] = self
|
| 1152 |
+
|
| 1153 |
+
def unregister(self):
|
| 1154 |
+
"unregister(self)"
|
| 1155 |
+
if _PYTYPE_DICT.get(self.name) is self:
|
| 1156 |
+
del _PYTYPE_DICT[self.name]
|
| 1157 |
+
for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()):
|
| 1158 |
+
if pytype is self:
|
| 1159 |
+
del _SCHEMA_TYPE_DICT[xs_type]
|
| 1160 |
+
if self.type_check is None:
|
| 1161 |
+
return
|
| 1162 |
+
try:
|
| 1163 |
+
_TYPE_CHECKS.remove( (self.type_check, self) )
|
| 1164 |
+
except ValueError:
|
| 1165 |
+
pass
|
| 1166 |
+
|
| 1167 |
+
property xmlSchemaTypes:
|
| 1168 |
+
"""The list of XML Schema datatypes this Python type maps to.
|
| 1169 |
+
|
| 1170 |
+
Note that this must be set before registering the type!
|
| 1171 |
+
"""
|
| 1172 |
+
def __get__(self):
|
| 1173 |
+
return self._schema_types
|
| 1174 |
+
def __set__(self, types):
|
| 1175 |
+
self._schema_types = list(map(unicode, types))
|
| 1176 |
+
|
| 1177 |
+
|
| 1178 |
+
cdef dict _PYTYPE_DICT = {}
|
| 1179 |
+
cdef dict _SCHEMA_TYPE_DICT = {}
|
| 1180 |
+
cdef list _TYPE_CHECKS = []
|
| 1181 |
+
|
| 1182 |
+
cdef unicode _xml_bool(value):
|
| 1183 |
+
return "true" if value else "false"
|
| 1184 |
+
|
| 1185 |
+
cdef unicode _xml_float(value):
|
| 1186 |
+
if _float_is_inf(value):
|
| 1187 |
+
if value > 0:
|
| 1188 |
+
return "INF"
|
| 1189 |
+
return "-INF"
|
| 1190 |
+
if _float_is_nan(value):
|
| 1191 |
+
return "NaN"
|
| 1192 |
+
return unicode(repr(value))
|
| 1193 |
+
|
| 1194 |
+
cdef _pytypename(obj):
|
| 1195 |
+
return "str" if python._isString(obj) else _typename(obj)
|
| 1196 |
+
|
| 1197 |
+
def pytypename(obj):
|
| 1198 |
+
"""pytypename(obj)
|
| 1199 |
+
|
| 1200 |
+
Find the name of the corresponding PyType for a Python object.
|
| 1201 |
+
"""
|
| 1202 |
+
return _pytypename(obj)
|
| 1203 |
+
|
| 1204 |
+
cdef _registerPyTypes():
|
| 1205 |
+
pytype = PyType('int', _checkInt, IntElement) # wraps functions for Python
|
| 1206 |
+
pytype.xmlSchemaTypes = ("integer", "int", "short", "byte", "unsignedShort",
|
| 1207 |
+
"unsignedByte", "nonPositiveInteger",
|
| 1208 |
+
"negativeInteger", "long", "nonNegativeInteger",
|
| 1209 |
+
"unsignedLong", "unsignedInt", "positiveInteger",)
|
| 1210 |
+
pytype.register()
|
| 1211 |
+
|
| 1212 |
+
# 'long' type just for backwards compatibility
|
| 1213 |
+
pytype = PyType('long', None, IntElement)
|
| 1214 |
+
pytype.register()
|
| 1215 |
+
|
| 1216 |
+
pytype = PyType('float', _checkFloat, FloatElement, _xml_float) # wraps functions for Python
|
| 1217 |
+
pytype.xmlSchemaTypes = ("double", "float")
|
| 1218 |
+
pytype.register()
|
| 1219 |
+
|
| 1220 |
+
pytype = PyType('bool', _checkBool, BoolElement, _xml_bool) # wraps functions for Python
|
| 1221 |
+
pytype.xmlSchemaTypes = ("boolean",)
|
| 1222 |
+
pytype.register()
|
| 1223 |
+
|
| 1224 |
+
pytype = PyType('str', None, StringElement)
|
| 1225 |
+
pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language",
|
| 1226 |
+
"Name", "NCName", "ID", "IDREF", "ENTITY",
|
| 1227 |
+
"NMTOKEN", )
|
| 1228 |
+
pytype.register()
|
| 1229 |
+
|
| 1230 |
+
# since lxml 2.0
|
| 1231 |
+
pytype = PyType('NoneType', None, NoneElement)
|
| 1232 |
+
pytype.register()
|
| 1233 |
+
|
| 1234 |
+
# backwards compatibility
|
| 1235 |
+
pytype = PyType('none', None, NoneElement)
|
| 1236 |
+
pytype.register()
|
| 1237 |
+
|
| 1238 |
+
# non-registered PyType for inner tree elements
|
| 1239 |
+
cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement)
|
| 1240 |
+
|
| 1241 |
+
_registerPyTypes()
|
| 1242 |
+
|
| 1243 |
+
def getRegisteredTypes():
|
| 1244 |
+
"""getRegisteredTypes()
|
| 1245 |
+
|
| 1246 |
+
Returns a list of the currently registered PyType objects.
|
| 1247 |
+
|
| 1248 |
+
To add a new type, retrieve this list and call unregister() for all
|
| 1249 |
+
entries. Then add the new type at a suitable position (possibly replacing
|
| 1250 |
+
an existing one) and call register() for all entries.
|
| 1251 |
+
|
| 1252 |
+
This is necessary if the new type interferes with the type check functions
|
| 1253 |
+
of existing ones (normally only int/float/bool) and must the tried before
|
| 1254 |
+
other types. To add a type that is not yet parsable by the current type
|
| 1255 |
+
check functions, you can simply register() it, which will append it to the
|
| 1256 |
+
end of the type list.
|
| 1257 |
+
"""
|
| 1258 |
+
cdef list types = []
|
| 1259 |
+
cdef set known = set()
|
| 1260 |
+
for check, pytype in _TYPE_CHECKS:
|
| 1261 |
+
name = pytype.name
|
| 1262 |
+
if name not in known:
|
| 1263 |
+
known.add(name)
|
| 1264 |
+
types.append(pytype)
|
| 1265 |
+
for pytype in _PYTYPE_DICT.values():
|
| 1266 |
+
name = pytype.name
|
| 1267 |
+
if name not in known:
|
| 1268 |
+
known.add(name)
|
| 1269 |
+
types.append(pytype)
|
| 1270 |
+
return types
|
| 1271 |
+
|
| 1272 |
+
cdef PyType _guessPyType(value, PyType defaulttype):
|
| 1273 |
+
if value is None:
|
| 1274 |
+
return None
|
| 1275 |
+
for type_check, tested_pytype in _TYPE_CHECKS:
|
| 1276 |
+
try:
|
| 1277 |
+
type_check(value)
|
| 1278 |
+
return <PyType>tested_pytype
|
| 1279 |
+
except IGNORABLE_ERRORS:
|
| 1280 |
+
# could not be parsed as the specified type => ignore
|
| 1281 |
+
pass
|
| 1282 |
+
return defaulttype
|
| 1283 |
+
|
| 1284 |
+
cdef object _guessElementClass(tree.xmlNode* c_node):
|
| 1285 |
+
value = textOf(c_node)
|
| 1286 |
+
if value is None:
|
| 1287 |
+
return None
|
| 1288 |
+
if value == '':
|
| 1289 |
+
return StringElement
|
| 1290 |
+
|
| 1291 |
+
for type_check, pytype in _TYPE_CHECKS:
|
| 1292 |
+
try:
|
| 1293 |
+
type_check(value)
|
| 1294 |
+
return (<PyType>pytype)._type
|
| 1295 |
+
except IGNORABLE_ERRORS:
|
| 1296 |
+
pass
|
| 1297 |
+
return None
|
| 1298 |
+
|
| 1299 |
+
################################################################################
|
| 1300 |
+
# adapted ElementMaker supports registered PyTypes
|
| 1301 |
+
|
| 1302 |
+
@cython.final
|
| 1303 |
+
@cython.internal
|
| 1304 |
+
cdef class _ObjectifyElementMakerCaller:
|
| 1305 |
+
cdef object _tag
|
| 1306 |
+
cdef object _nsmap
|
| 1307 |
+
cdef object _element_factory
|
| 1308 |
+
cdef bint _annotate
|
| 1309 |
+
|
| 1310 |
+
def __call__(self, *children, **attrib):
|
| 1311 |
+
"__call__(self, *children, **attrib)"
|
| 1312 |
+
cdef _ObjectifyElementMakerCaller elementMaker
|
| 1313 |
+
cdef _Element element
|
| 1314 |
+
cdef _Element childElement
|
| 1315 |
+
cdef bint has_children
|
| 1316 |
+
cdef bint has_string_value
|
| 1317 |
+
if self._element_factory is None:
|
| 1318 |
+
element = _makeElement(self._tag, None, attrib, self._nsmap)
|
| 1319 |
+
else:
|
| 1320 |
+
element = self._element_factory(self._tag, attrib, self._nsmap)
|
| 1321 |
+
|
| 1322 |
+
pytype_name = None
|
| 1323 |
+
has_children = False
|
| 1324 |
+
has_string_value = False
|
| 1325 |
+
for child in children:
|
| 1326 |
+
if child is None:
|
| 1327 |
+
if len(children) == 1:
|
| 1328 |
+
cetree.setAttributeValue(
|
| 1329 |
+
element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true")
|
| 1330 |
+
elif python._isString(child):
|
| 1331 |
+
_add_text(element, child)
|
| 1332 |
+
has_string_value = True
|
| 1333 |
+
elif isinstance(child, _Element):
|
| 1334 |
+
cetree.appendChildToElement(element, <_Element>child)
|
| 1335 |
+
has_children = True
|
| 1336 |
+
elif isinstance(child, _ObjectifyElementMakerCaller):
|
| 1337 |
+
elementMaker = <_ObjectifyElementMakerCaller>child
|
| 1338 |
+
if elementMaker._element_factory is None:
|
| 1339 |
+
cetree.makeSubElement(element, elementMaker._tag,
|
| 1340 |
+
None, None, None, None)
|
| 1341 |
+
else:
|
| 1342 |
+
childElement = elementMaker._element_factory(
|
| 1343 |
+
elementMaker._tag)
|
| 1344 |
+
cetree.appendChildToElement(element, childElement)
|
| 1345 |
+
has_children = True
|
| 1346 |
+
elif isinstance(child, dict):
|
| 1347 |
+
for name, value in child.items():
|
| 1348 |
+
# keyword arguments in attrib take precedence
|
| 1349 |
+
if name in attrib:
|
| 1350 |
+
continue
|
| 1351 |
+
pytype = _PYTYPE_DICT.get(_typename(value))
|
| 1352 |
+
if pytype is not None:
|
| 1353 |
+
value = (<PyType>pytype).stringify(value)
|
| 1354 |
+
elif not python._isString(value):
|
| 1355 |
+
value = unicode(value)
|
| 1356 |
+
cetree.setAttributeValue(element, name, value)
|
| 1357 |
+
else:
|
| 1358 |
+
if pytype_name is not None:
|
| 1359 |
+
# concatenation always makes the result a string
|
| 1360 |
+
has_string_value = True
|
| 1361 |
+
pytype_name = _typename(child)
|
| 1362 |
+
pytype = _PYTYPE_DICT.get(_typename(child))
|
| 1363 |
+
if pytype is not None:
|
| 1364 |
+
_add_text(element, (<PyType>pytype).stringify(child))
|
| 1365 |
+
else:
|
| 1366 |
+
has_string_value = True
|
| 1367 |
+
child = unicode(child)
|
| 1368 |
+
_add_text(element, child)
|
| 1369 |
+
|
| 1370 |
+
if self._annotate and not has_children:
|
| 1371 |
+
if has_string_value:
|
| 1372 |
+
cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, "str")
|
| 1373 |
+
elif pytype_name is not None:
|
| 1374 |
+
cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
|
| 1375 |
+
|
| 1376 |
+
return element
|
| 1377 |
+
|
| 1378 |
+
cdef _add_text(_Element elem, text):
|
| 1379 |
+
# add text to the tree in construction, either as element text or
|
| 1380 |
+
# tail text, depending on the current tree state
|
| 1381 |
+
cdef tree.xmlNode* c_child
|
| 1382 |
+
c_child = cetree.findChildBackwards(elem._c_node, 0)
|
| 1383 |
+
if c_child is not NULL:
|
| 1384 |
+
old = cetree.tailOf(c_child)
|
| 1385 |
+
if old is not None:
|
| 1386 |
+
text = old + text
|
| 1387 |
+
cetree.setTailText(c_child, text)
|
| 1388 |
+
else:
|
| 1389 |
+
old = cetree.textOf(elem._c_node)
|
| 1390 |
+
if old is not None:
|
| 1391 |
+
text = old + text
|
| 1392 |
+
cetree.setNodeText(elem._c_node, text)
|
| 1393 |
+
|
| 1394 |
+
cdef class ElementMaker:
|
| 1395 |
+
"""ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None)
|
| 1396 |
+
|
| 1397 |
+
An ElementMaker that can be used for constructing trees.
|
| 1398 |
+
|
| 1399 |
+
Example::
|
| 1400 |
+
|
| 1401 |
+
>>> M = ElementMaker(annotate=False)
|
| 1402 |
+
>>> attributes = {'class': 'par'}
|
| 1403 |
+
>>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) )
|
| 1404 |
+
|
| 1405 |
+
>>> from lxml.etree import tostring
|
| 1406 |
+
>>> print(tostring(html, method='html').decode('ascii'))
|
| 1407 |
+
<html><body><p style="font-weight: bold" class="par">hello<br>objectify</p></body></html>
|
| 1408 |
+
|
| 1409 |
+
To create tags that are not valid Python identifiers, call the factory
|
| 1410 |
+
directly and pass the tag name as first argument::
|
| 1411 |
+
|
| 1412 |
+
>>> root = M('tricky-tag', 'some text')
|
| 1413 |
+
>>> print(root.tag)
|
| 1414 |
+
tricky-tag
|
| 1415 |
+
>>> print(root.text)
|
| 1416 |
+
some text
|
| 1417 |
+
|
| 1418 |
+
Note that this module has a predefined ElementMaker instance called ``E``.
|
| 1419 |
+
"""
|
| 1420 |
+
cdef object _makeelement
|
| 1421 |
+
cdef object _namespace
|
| 1422 |
+
cdef object _nsmap
|
| 1423 |
+
cdef bint _annotate
|
| 1424 |
+
cdef dict _cache
|
| 1425 |
+
def __init__(self, *, namespace=None, nsmap=None, annotate=True,
|
| 1426 |
+
makeelement=None):
|
| 1427 |
+
if nsmap is None:
|
| 1428 |
+
nsmap = _DEFAULT_NSMAP if annotate else {}
|
| 1429 |
+
self._nsmap = nsmap
|
| 1430 |
+
self._namespace = None if namespace is None else "{%s}" % namespace
|
| 1431 |
+
self._annotate = annotate
|
| 1432 |
+
if makeelement is not None:
|
| 1433 |
+
if not callable(makeelement):
|
| 1434 |
+
raise TypeError(
|
| 1435 |
+
f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}")
|
| 1436 |
+
self._makeelement = makeelement
|
| 1437 |
+
else:
|
| 1438 |
+
self._makeelement = None
|
| 1439 |
+
self._cache = {}
|
| 1440 |
+
|
| 1441 |
+
@cython.final
|
| 1442 |
+
cdef _build_element_maker(self, tag, bint caching):
|
| 1443 |
+
cdef _ObjectifyElementMakerCaller element_maker
|
| 1444 |
+
element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller)
|
| 1445 |
+
if self._namespace is not None and tag[0] != "{":
|
| 1446 |
+
element_maker._tag = self._namespace + tag
|
| 1447 |
+
else:
|
| 1448 |
+
element_maker._tag = tag
|
| 1449 |
+
element_maker._nsmap = self._nsmap
|
| 1450 |
+
element_maker._annotate = self._annotate
|
| 1451 |
+
element_maker._element_factory = self._makeelement
|
| 1452 |
+
if caching:
|
| 1453 |
+
if len(self._cache) > 200:
|
| 1454 |
+
self._cache.clear()
|
| 1455 |
+
self._cache[tag] = element_maker
|
| 1456 |
+
return element_maker
|
| 1457 |
+
|
| 1458 |
+
def __getattr__(self, tag):
|
| 1459 |
+
element_maker = self._cache.get(tag)
|
| 1460 |
+
if element_maker is None:
|
| 1461 |
+
return self._build_element_maker(tag, caching=True)
|
| 1462 |
+
return element_maker
|
| 1463 |
+
|
| 1464 |
+
def __call__(self, tag, *args, **kwargs):
|
| 1465 |
+
element_maker = self._cache.get(tag)
|
| 1466 |
+
if element_maker is None:
|
| 1467 |
+
element_maker = self._build_element_maker(
|
| 1468 |
+
tag, caching=not is_special_method(tag))
|
| 1469 |
+
return element_maker(*args, **kwargs)
|
| 1470 |
+
|
| 1471 |
+
################################################################################
|
| 1472 |
+
# Recursive element dumping
|
| 1473 |
+
|
| 1474 |
+
cdef bint __RECURSIVE_STR = 0 # default: off
|
| 1475 |
+
|
| 1476 |
+
def enable_recursive_str(on=True):
|
| 1477 |
+
"""enable_recursive_str(on=True)
|
| 1478 |
+
|
| 1479 |
+
Enable a recursively generated tree representation for str(element),
|
| 1480 |
+
based on objectify.dump(element).
|
| 1481 |
+
"""
|
| 1482 |
+
global __RECURSIVE_STR
|
| 1483 |
+
__RECURSIVE_STR = on
|
| 1484 |
+
|
| 1485 |
+
def dump(_Element element not None):
|
| 1486 |
+
"""dump(_Element element not None)
|
| 1487 |
+
|
| 1488 |
+
Return a recursively generated string representation of an element.
|
| 1489 |
+
"""
|
| 1490 |
+
return _dump(element, 0)
|
| 1491 |
+
|
| 1492 |
+
cdef object _dump(_Element element, int indent):
|
| 1493 |
+
indentstr = " " * indent
|
| 1494 |
+
if isinstance(element, ObjectifiedDataElement):
|
| 1495 |
+
value = repr(element)
|
| 1496 |
+
else:
|
| 1497 |
+
value = textOf(element._c_node)
|
| 1498 |
+
if value is not None:
|
| 1499 |
+
if not value.strip():
|
| 1500 |
+
value = None
|
| 1501 |
+
else:
|
| 1502 |
+
value = repr(value)
|
| 1503 |
+
result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n"
|
| 1504 |
+
xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS
|
| 1505 |
+
pytype_ns = "{%s}" % PYTYPE_NAMESPACE
|
| 1506 |
+
for name, value in sorted(cetree.iterattributes(element, 3)):
|
| 1507 |
+
if '{' in name:
|
| 1508 |
+
if name == PYTYPE_ATTRIBUTE:
|
| 1509 |
+
if value == TREE_PYTYPE_NAME:
|
| 1510 |
+
continue
|
| 1511 |
+
else:
|
| 1512 |
+
name = name.replace(pytype_ns, 'py:')
|
| 1513 |
+
name = name.replace(xsi_ns, 'xsi:')
|
| 1514 |
+
result += f"{indentstr} * {name} = {value!r}\n"
|
| 1515 |
+
|
| 1516 |
+
indent += 1
|
| 1517 |
+
for child in element.iterchildren():
|
| 1518 |
+
result += _dump(child, indent)
|
| 1519 |
+
if indent == 1:
|
| 1520 |
+
return result[:-1] # strip last '\n'
|
| 1521 |
+
else:
|
| 1522 |
+
return result
|
| 1523 |
+
|
| 1524 |
+
|
| 1525 |
+
################################################################################
|
| 1526 |
+
# Pickle support for objectified ElementTree
|
| 1527 |
+
|
| 1528 |
+
def __unpickleElementTree(data):
|
| 1529 |
+
return etree.ElementTree(fromstring(data))
|
| 1530 |
+
|
| 1531 |
+
cdef _setupPickle(elementTreeReduceFunction):
|
| 1532 |
+
import copyreg
|
| 1533 |
+
copyreg.pickle(etree._ElementTree,
|
| 1534 |
+
elementTreeReduceFunction, __unpickleElementTree)
|
| 1535 |
+
|
| 1536 |
+
def pickleReduceElementTree(obj):
|
| 1537 |
+
return __unpickleElementTree, (etree.tostring(obj),)
|
| 1538 |
+
|
| 1539 |
+
_setupPickle(pickleReduceElementTree)
|
| 1540 |
+
del pickleReduceElementTree
|
| 1541 |
+
|
| 1542 |
+
################################################################################
|
| 1543 |
+
# Element class lookup
|
| 1544 |
+
|
| 1545 |
+
cdef class ObjectifyElementClassLookup(ElementClassLookup):
|
| 1546 |
+
"""ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None)
|
| 1547 |
+
Element class lookup method that uses the objectify classes.
|
| 1548 |
+
"""
|
| 1549 |
+
cdef object empty_data_class
|
| 1550 |
+
cdef object tree_class
|
| 1551 |
+
def __init__(self, tree_class=None, empty_data_class=None):
|
| 1552 |
+
"""Lookup mechanism for objectify.
|
| 1553 |
+
|
| 1554 |
+
The default Element classes can be replaced by passing subclasses of
|
| 1555 |
+
ObjectifiedElement and ObjectifiedDataElement as keyword arguments.
|
| 1556 |
+
'tree_class' defines inner tree classes (defaults to
|
| 1557 |
+
ObjectifiedElement), 'empty_data_class' defines the default class for
|
| 1558 |
+
empty data elements (defaults to StringElement).
|
| 1559 |
+
"""
|
| 1560 |
+
self._lookup_function = _lookupElementClass
|
| 1561 |
+
if tree_class is None:
|
| 1562 |
+
tree_class = ObjectifiedElement
|
| 1563 |
+
self.tree_class = tree_class
|
| 1564 |
+
if empty_data_class is None:
|
| 1565 |
+
empty_data_class = StringElement
|
| 1566 |
+
self.empty_data_class = empty_data_class
|
| 1567 |
+
|
| 1568 |
+
cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node):
|
| 1569 |
+
cdef ObjectifyElementClassLookup lookup
|
| 1570 |
+
lookup = <ObjectifyElementClassLookup>state
|
| 1571 |
+
# if element has children => no data class
|
| 1572 |
+
if cetree.hasChild(c_node):
|
| 1573 |
+
return lookup.tree_class
|
| 1574 |
+
|
| 1575 |
+
# if element is defined as xsi:nil, return NoneElement class
|
| 1576 |
+
if "true" == cetree.attributeValueFromNsName(
|
| 1577 |
+
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil"):
|
| 1578 |
+
return NoneElement
|
| 1579 |
+
|
| 1580 |
+
# check for Python type hint
|
| 1581 |
+
value = cetree.attributeValueFromNsName(
|
| 1582 |
+
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
|
| 1583 |
+
if value is not None:
|
| 1584 |
+
if value == TREE_PYTYPE_NAME:
|
| 1585 |
+
return lookup.tree_class
|
| 1586 |
+
py_type = <PyType>_PYTYPE_DICT.get(value)
|
| 1587 |
+
if py_type is not None:
|
| 1588 |
+
return py_type._type
|
| 1589 |
+
# unknown 'pyval' => try to figure it out ourself, just go on
|
| 1590 |
+
|
| 1591 |
+
# check for XML Schema type hint
|
| 1592 |
+
value = cetree.attributeValueFromNsName(
|
| 1593 |
+
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
|
| 1594 |
+
|
| 1595 |
+
if value is not None:
|
| 1596 |
+
schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
|
| 1597 |
+
if schema_type is None and ':' in value:
|
| 1598 |
+
prefix, value = value.split(':', 1)
|
| 1599 |
+
schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
|
| 1600 |
+
if schema_type is not None:
|
| 1601 |
+
return schema_type._type
|
| 1602 |
+
|
| 1603 |
+
# otherwise determine class based on text content type
|
| 1604 |
+
el_class = _guessElementClass(c_node)
|
| 1605 |
+
if el_class is not None:
|
| 1606 |
+
return el_class
|
| 1607 |
+
|
| 1608 |
+
# if element is a root node => default to tree node
|
| 1609 |
+
if c_node.parent is NULL or not tree._isElement(c_node.parent):
|
| 1610 |
+
return lookup.tree_class
|
| 1611 |
+
|
| 1612 |
+
return lookup.empty_data_class
|
| 1613 |
+
|
| 1614 |
+
|
| 1615 |
+
################################################################################
|
| 1616 |
+
# Type annotations
|
| 1617 |
+
|
| 1618 |
+
cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype):
|
| 1619 |
+
if pytype is None:
|
| 1620 |
+
return None
|
| 1621 |
+
value = textOf(c_node)
|
| 1622 |
+
try:
|
| 1623 |
+
pytype.type_check(value)
|
| 1624 |
+
return pytype
|
| 1625 |
+
except IGNORABLE_ERRORS:
|
| 1626 |
+
# could not be parsed as the specified type => ignore
|
| 1627 |
+
pass
|
| 1628 |
+
return None
|
| 1629 |
+
|
| 1630 |
+
def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False,
|
| 1631 |
+
empty_pytype=None):
|
| 1632 |
+
"""pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None)
|
| 1633 |
+
|
| 1634 |
+
Recursively annotates the elements of an XML tree with 'pytype'
|
| 1635 |
+
attributes.
|
| 1636 |
+
|
| 1637 |
+
If the 'ignore_old' keyword argument is True (the default), current 'pytype'
|
| 1638 |
+
attributes will be ignored and replaced. Otherwise, they will be checked
|
| 1639 |
+
and only replaced if they no longer fit the current text value.
|
| 1640 |
+
|
| 1641 |
+
Setting the keyword argument ``ignore_xsi`` to True makes the function
|
| 1642 |
+
additionally ignore existing ``xsi:type`` annotations. The default is to
|
| 1643 |
+
use them as a type hint.
|
| 1644 |
+
|
| 1645 |
+
The default annotation of empty elements can be set with the
|
| 1646 |
+
``empty_pytype`` keyword argument. The default is not to annotate empty
|
| 1647 |
+
elements. Pass 'str', for example, to make string values the default.
|
| 1648 |
+
"""
|
| 1649 |
+
cdef _Element element
|
| 1650 |
+
element = cetree.rootNodeOrRaise(element_or_tree)
|
| 1651 |
+
_annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype)
|
| 1652 |
+
|
| 1653 |
+
def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False,
|
| 1654 |
+
empty_type=None):
|
| 1655 |
+
"""xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None)
|
| 1656 |
+
|
| 1657 |
+
Recursively annotates the elements of an XML tree with 'xsi:type'
|
| 1658 |
+
attributes.
|
| 1659 |
+
|
| 1660 |
+
If the 'ignore_old' keyword argument is True (the default), current
|
| 1661 |
+
'xsi:type' attributes will be ignored and replaced. Otherwise, they will be
|
| 1662 |
+
checked and only replaced if they no longer fit the current text value.
|
| 1663 |
+
|
| 1664 |
+
Note that the mapping from Python types to XSI types is usually ambiguous.
|
| 1665 |
+
Currently, only the first XSI type name in the corresponding PyType
|
| 1666 |
+
definition will be used for annotation. Thus, you should consider naming
|
| 1667 |
+
the widest type first if you define additional types.
|
| 1668 |
+
|
| 1669 |
+
Setting the keyword argument ``ignore_pytype`` to True makes the function
|
| 1670 |
+
additionally ignore existing ``pytype`` annotations. The default is to
|
| 1671 |
+
use them as a type hint.
|
| 1672 |
+
|
| 1673 |
+
The default annotation of empty elements can be set with the
|
| 1674 |
+
``empty_type`` keyword argument. The default is not to annotate empty
|
| 1675 |
+
elements. Pass 'string', for example, to make string values the default.
|
| 1676 |
+
"""
|
| 1677 |
+
cdef _Element element
|
| 1678 |
+
element = cetree.rootNodeOrRaise(element_or_tree)
|
| 1679 |
+
_annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None)
|
| 1680 |
+
|
| 1681 |
+
def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
|
| 1682 |
+
empty_pytype=None, empty_type=None, annotate_xsi=0,
|
| 1683 |
+
annotate_pytype=1):
|
| 1684 |
+
"""annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1)
|
| 1685 |
+
|
| 1686 |
+
Recursively annotates the elements of an XML tree with 'xsi:type'
|
| 1687 |
+
and/or 'py:pytype' attributes.
|
| 1688 |
+
|
| 1689 |
+
If the 'ignore_old' keyword argument is True (the default), current
|
| 1690 |
+
'py:pytype' attributes will be ignored for the type annotation. Set to False
|
| 1691 |
+
if you want reuse existing 'py:pytype' information (iff appropriate for the
|
| 1692 |
+
element text value).
|
| 1693 |
+
|
| 1694 |
+
If the 'ignore_xsi' keyword argument is False (the default), existing
|
| 1695 |
+
'xsi:type' attributes will be used for the type annotation, if they fit the
|
| 1696 |
+
element text values.
|
| 1697 |
+
|
| 1698 |
+
Note that the mapping from Python types to XSI types is usually ambiguous.
|
| 1699 |
+
Currently, only the first XSI type name in the corresponding PyType
|
| 1700 |
+
definition will be used for annotation. Thus, you should consider naming
|
| 1701 |
+
the widest type first if you define additional types.
|
| 1702 |
+
|
| 1703 |
+
The default 'py:pytype' annotation of empty elements can be set with the
|
| 1704 |
+
``empty_pytype`` keyword argument. Pass 'str', for example, to make
|
| 1705 |
+
string values the default.
|
| 1706 |
+
|
| 1707 |
+
The default 'xsi:type' annotation of empty elements can be set with the
|
| 1708 |
+
``empty_type`` keyword argument. The default is not to annotate empty
|
| 1709 |
+
elements. Pass 'string', for example, to make string values the default.
|
| 1710 |
+
|
| 1711 |
+
The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype'
|
| 1712 |
+
(default: 1) control which kind(s) of annotation to use.
|
| 1713 |
+
"""
|
| 1714 |
+
cdef _Element element
|
| 1715 |
+
element = cetree.rootNodeOrRaise(element_or_tree)
|
| 1716 |
+
_annotate(element, annotate_xsi, annotate_pytype, ignore_xsi,
|
| 1717 |
+
ignore_old, empty_type, empty_pytype)
|
| 1718 |
+
|
| 1719 |
+
|
| 1720 |
+
cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype,
|
| 1721 |
+
bint ignore_xsi, bint ignore_pytype,
|
| 1722 |
+
empty_type_name, empty_pytype_name):
|
| 1723 |
+
cdef _Document doc
|
| 1724 |
+
cdef tree.xmlNode* c_node
|
| 1725 |
+
cdef PyType empty_pytype, StrType, NoneType
|
| 1726 |
+
|
| 1727 |
+
if not annotate_xsi and not annotate_pytype:
|
| 1728 |
+
return
|
| 1729 |
+
|
| 1730 |
+
if empty_type_name is not None:
|
| 1731 |
+
if isinstance(empty_type_name, bytes):
|
| 1732 |
+
empty_type_name = (<bytes>empty_type_name).decode("ascii")
|
| 1733 |
+
empty_pytype = <PyType>_SCHEMA_TYPE_DICT.get(empty_type_name)
|
| 1734 |
+
elif empty_pytype_name is not None:
|
| 1735 |
+
if isinstance(empty_pytype_name, bytes):
|
| 1736 |
+
empty_pytype_name = (<bytes>empty_pytype_name).decode("ascii")
|
| 1737 |
+
empty_pytype = <PyType>_PYTYPE_DICT.get(empty_pytype_name)
|
| 1738 |
+
else:
|
| 1739 |
+
empty_pytype = None
|
| 1740 |
+
|
| 1741 |
+
StrType = <PyType>_PYTYPE_DICT.get('str')
|
| 1742 |
+
NoneType = <PyType>_PYTYPE_DICT.get('NoneType')
|
| 1743 |
+
|
| 1744 |
+
doc = element._doc
|
| 1745 |
+
c_node = element._c_node
|
| 1746 |
+
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
| 1747 |
+
if c_node.type == tree.XML_ELEMENT_NODE:
|
| 1748 |
+
_annotate_element(c_node, doc, annotate_xsi, annotate_pytype,
|
| 1749 |
+
ignore_xsi, ignore_pytype,
|
| 1750 |
+
empty_type_name, empty_pytype, StrType, NoneType)
|
| 1751 |
+
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
| 1752 |
+
|
| 1753 |
+
cdef int _annotate_element(tree.xmlNode* c_node, _Document doc,
|
| 1754 |
+
bint annotate_xsi, bint annotate_pytype,
|
| 1755 |
+
bint ignore_xsi, bint ignore_pytype,
|
| 1756 |
+
empty_type_name, PyType empty_pytype,
|
| 1757 |
+
PyType StrType, PyType NoneType) except -1:
|
| 1758 |
+
cdef tree.xmlNs* c_ns
|
| 1759 |
+
cdef PyType pytype = None
|
| 1760 |
+
typename = None
|
| 1761 |
+
istree = 0
|
| 1762 |
+
|
| 1763 |
+
# if element is defined as xsi:nil, represent it as None
|
| 1764 |
+
if cetree.attributeValueFromNsName(
|
| 1765 |
+
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") == "true":
|
| 1766 |
+
pytype = NoneType
|
| 1767 |
+
|
| 1768 |
+
if pytype is None and not ignore_xsi:
|
| 1769 |
+
# check that old xsi type value is valid
|
| 1770 |
+
typename = cetree.attributeValueFromNsName(
|
| 1771 |
+
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
|
| 1772 |
+
if typename is not None:
|
| 1773 |
+
pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
|
| 1774 |
+
if pytype is None and ':' in typename:
|
| 1775 |
+
prefix, typename = typename.split(':', 1)
|
| 1776 |
+
pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
|
| 1777 |
+
if pytype is not None and pytype is not StrType:
|
| 1778 |
+
# StrType does not have a typecheck but is the default
|
| 1779 |
+
# anyway, so just accept it if given as type
|
| 1780 |
+
# information
|
| 1781 |
+
pytype = _check_type(c_node, pytype)
|
| 1782 |
+
if pytype is None:
|
| 1783 |
+
typename = None
|
| 1784 |
+
|
| 1785 |
+
if pytype is None and not ignore_pytype:
|
| 1786 |
+
# check that old pytype value is valid
|
| 1787 |
+
old_pytypename = cetree.attributeValueFromNsName(
|
| 1788 |
+
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
|
| 1789 |
+
if old_pytypename is not None:
|
| 1790 |
+
if old_pytypename == TREE_PYTYPE_NAME:
|
| 1791 |
+
if not cetree.hasChild(c_node):
|
| 1792 |
+
# only case where we should keep it,
|
| 1793 |
+
# everything else is clear enough
|
| 1794 |
+
pytype = TREE_PYTYPE
|
| 1795 |
+
else:
|
| 1796 |
+
if old_pytypename == 'none':
|
| 1797 |
+
# transition from lxml 1.x
|
| 1798 |
+
old_pytypename = "NoneType"
|
| 1799 |
+
pytype = <PyType>_PYTYPE_DICT.get(old_pytypename)
|
| 1800 |
+
if pytype is not None and pytype is not StrType:
|
| 1801 |
+
# StrType does not have a typecheck but is the
|
| 1802 |
+
# default anyway, so just accept it if given as
|
| 1803 |
+
# type information
|
| 1804 |
+
pytype = _check_type(c_node, pytype)
|
| 1805 |
+
|
| 1806 |
+
if pytype is None:
|
| 1807 |
+
# try to guess type
|
| 1808 |
+
if not cetree.hasChild(c_node):
|
| 1809 |
+
# element has no children => data class
|
| 1810 |
+
pytype = _guessPyType(textOf(c_node), StrType)
|
| 1811 |
+
else:
|
| 1812 |
+
istree = 1
|
| 1813 |
+
|
| 1814 |
+
if pytype is None:
|
| 1815 |
+
# use default type for empty elements
|
| 1816 |
+
if cetree.hasText(c_node):
|
| 1817 |
+
pytype = StrType
|
| 1818 |
+
else:
|
| 1819 |
+
pytype = empty_pytype
|
| 1820 |
+
if typename is None:
|
| 1821 |
+
typename = empty_type_name
|
| 1822 |
+
|
| 1823 |
+
if pytype is not None:
|
| 1824 |
+
if typename is None:
|
| 1825 |
+
if not istree:
|
| 1826 |
+
if pytype._schema_types:
|
| 1827 |
+
# pytype->xsi:type is a 1:n mapping
|
| 1828 |
+
# simply take the first
|
| 1829 |
+
typename = pytype._schema_types[0]
|
| 1830 |
+
elif typename not in pytype._schema_types:
|
| 1831 |
+
typename = pytype._schema_types[0]
|
| 1832 |
+
|
| 1833 |
+
if annotate_xsi:
|
| 1834 |
+
if typename is None or istree:
|
| 1835 |
+
cetree.delAttributeFromNsName(
|
| 1836 |
+
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
|
| 1837 |
+
else:
|
| 1838 |
+
# update or create attribute
|
| 1839 |
+
typename_utf8 = cetree.utf8(typename)
|
| 1840 |
+
c_ns = cetree.findOrBuildNodeNsPrefix(
|
| 1841 |
+
doc, c_node, _XML_SCHEMA_NS, <unsigned char*>'xsd')
|
| 1842 |
+
if c_ns is not NULL:
|
| 1843 |
+
if b':' in typename_utf8:
|
| 1844 |
+
prefix, name = typename_utf8.split(b':', 1)
|
| 1845 |
+
if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0':
|
| 1846 |
+
typename_utf8 = name
|
| 1847 |
+
elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0:
|
| 1848 |
+
typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + name
|
| 1849 |
+
elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0':
|
| 1850 |
+
typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + typename_utf8
|
| 1851 |
+
c_ns = cetree.findOrBuildNodeNsPrefix(
|
| 1852 |
+
doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
|
| 1853 |
+
tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"type", _xcstr(typename_utf8))
|
| 1854 |
+
|
| 1855 |
+
if annotate_pytype:
|
| 1856 |
+
if pytype is None:
|
| 1857 |
+
# delete attribute if it exists
|
| 1858 |
+
cetree.delAttributeFromNsName(
|
| 1859 |
+
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
|
| 1860 |
+
else:
|
| 1861 |
+
# update or create attribute
|
| 1862 |
+
c_ns = cetree.findOrBuildNodeNsPrefix(
|
| 1863 |
+
doc, c_node, _PYTYPE_NAMESPACE, <unsigned char*>'py')
|
| 1864 |
+
pytype_name = cetree.utf8(pytype.name)
|
| 1865 |
+
tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME,
|
| 1866 |
+
_xcstr(pytype_name))
|
| 1867 |
+
if pytype is NoneType:
|
| 1868 |
+
c_ns = cetree.findOrBuildNodeNsPrefix(
|
| 1869 |
+
doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
|
| 1870 |
+
tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"nil", <unsigned char*>"true")
|
| 1871 |
+
|
| 1872 |
+
return 0
|
| 1873 |
+
|
| 1874 |
+
cdef object _strip_attributes = etree.strip_attributes
|
| 1875 |
+
cdef object _cleanup_namespaces = etree.cleanup_namespaces
|
| 1876 |
+
|
| 1877 |
+
def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True,
|
| 1878 |
+
bint xsi_nil=False, bint cleanup_namespaces=False):
|
| 1879 |
+
"""deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False)
|
| 1880 |
+
|
| 1881 |
+
Recursively de-annotate the elements of an XML tree by removing 'py:pytype'
|
| 1882 |
+
and/or 'xsi:type' attributes and/or 'xsi:nil' attributes.
|
| 1883 |
+
|
| 1884 |
+
If the 'pytype' keyword argument is True (the default), 'py:pytype'
|
| 1885 |
+
attributes will be removed. If the 'xsi' keyword argument is True (the
|
| 1886 |
+
default), 'xsi:type' attributes will be removed.
|
| 1887 |
+
If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil'
|
| 1888 |
+
attributes will be removed.
|
| 1889 |
+
|
| 1890 |
+
Note that this does not touch the namespace declarations by
|
| 1891 |
+
default. If you want to remove unused namespace declarations from
|
| 1892 |
+
the tree, pass the option ``cleanup_namespaces=True``.
|
| 1893 |
+
"""
|
| 1894 |
+
cdef list attribute_names = []
|
| 1895 |
+
|
| 1896 |
+
if pytype:
|
| 1897 |
+
attribute_names.append(PYTYPE_ATTRIBUTE)
|
| 1898 |
+
if xsi:
|
| 1899 |
+
attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR)
|
| 1900 |
+
if xsi_nil:
|
| 1901 |
+
attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR)
|
| 1902 |
+
|
| 1903 |
+
_strip_attributes(element_or_tree, *attribute_names)
|
| 1904 |
+
if cleanup_namespaces:
|
| 1905 |
+
_cleanup_namespaces(element_or_tree)
|
| 1906 |
+
|
| 1907 |
+
################################################################################
|
| 1908 |
+
# Module level parser setup
|
| 1909 |
+
|
| 1910 |
+
cdef object __DEFAULT_PARSER
|
| 1911 |
+
__DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True)
|
| 1912 |
+
__DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() )
|
| 1913 |
+
|
| 1914 |
+
cdef object objectify_parser
|
| 1915 |
+
objectify_parser = __DEFAULT_PARSER
|
| 1916 |
+
|
| 1917 |
+
def set_default_parser(new_parser = None):
|
| 1918 |
+
"""set_default_parser(new_parser = None)
|
| 1919 |
+
|
| 1920 |
+
Replace the default parser used by objectify's Element() and
|
| 1921 |
+
fromstring() functions.
|
| 1922 |
+
|
| 1923 |
+
The new parser must be an etree.XMLParser.
|
| 1924 |
+
|
| 1925 |
+
Call without arguments to reset to the original parser.
|
| 1926 |
+
"""
|
| 1927 |
+
global objectify_parser
|
| 1928 |
+
if new_parser is None:
|
| 1929 |
+
objectify_parser = __DEFAULT_PARSER
|
| 1930 |
+
elif isinstance(new_parser, etree.XMLParser):
|
| 1931 |
+
objectify_parser = new_parser
|
| 1932 |
+
else:
|
| 1933 |
+
raise TypeError, "parser must inherit from lxml.etree.XMLParser"
|
| 1934 |
+
|
| 1935 |
+
def makeparser(**kw):
|
| 1936 |
+
"""makeparser(remove_blank_text=True, **kw)
|
| 1937 |
+
|
| 1938 |
+
Create a new XML parser for objectify trees.
|
| 1939 |
+
|
| 1940 |
+
You can pass all keyword arguments that are supported by
|
| 1941 |
+
``etree.XMLParser()``. Note that this parser defaults to removing
|
| 1942 |
+
blank text. You can disable this by passing the
|
| 1943 |
+
``remove_blank_text`` boolean keyword option yourself.
|
| 1944 |
+
"""
|
| 1945 |
+
if 'remove_blank_text' not in kw:
|
| 1946 |
+
kw['remove_blank_text'] = True
|
| 1947 |
+
parser = etree.XMLParser(**kw)
|
| 1948 |
+
parser.set_element_class_lookup( ObjectifyElementClassLookup() )
|
| 1949 |
+
return parser
|
| 1950 |
+
|
| 1951 |
+
cdef _Element _makeElement(tag, text, attrib, nsmap):
|
| 1952 |
+
return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap)
|
| 1953 |
+
|
| 1954 |
+
################################################################################
|
| 1955 |
+
# Module level factory functions
|
| 1956 |
+
|
| 1957 |
+
cdef object _fromstring
|
| 1958 |
+
_fromstring = etree.fromstring
|
| 1959 |
+
|
| 1960 |
+
SubElement = etree.SubElement
|
| 1961 |
+
|
| 1962 |
+
def fromstring(xml, parser=None, *, base_url=None):
|
| 1963 |
+
"""fromstring(xml, parser=None, base_url=None)
|
| 1964 |
+
|
| 1965 |
+
Objectify specific version of the lxml.etree fromstring() function
|
| 1966 |
+
that uses the objectify parser.
|
| 1967 |
+
|
| 1968 |
+
You can pass a different parser as second argument.
|
| 1969 |
+
|
| 1970 |
+
The ``base_url`` keyword argument allows to set the original base URL of
|
| 1971 |
+
the document to support relative Paths when looking up external entities
|
| 1972 |
+
(DTD, XInclude, ...).
|
| 1973 |
+
"""
|
| 1974 |
+
if parser is None:
|
| 1975 |
+
parser = objectify_parser
|
| 1976 |
+
return _fromstring(xml, parser, base_url=base_url)
|
| 1977 |
+
|
| 1978 |
+
def XML(xml, parser=None, *, base_url=None):
|
| 1979 |
+
"""XML(xml, parser=None, base_url=None)
|
| 1980 |
+
|
| 1981 |
+
Objectify specific version of the lxml.etree XML() literal factory
|
| 1982 |
+
that uses the objectify parser.
|
| 1983 |
+
|
| 1984 |
+
You can pass a different parser as second argument.
|
| 1985 |
+
|
| 1986 |
+
The ``base_url`` keyword argument allows to set the original base URL of
|
| 1987 |
+
the document to support relative Paths when looking up external entities
|
| 1988 |
+
(DTD, XInclude, ...).
|
| 1989 |
+
"""
|
| 1990 |
+
if parser is None:
|
| 1991 |
+
parser = objectify_parser
|
| 1992 |
+
return _fromstring(xml, parser, base_url=base_url)
|
| 1993 |
+
|
| 1994 |
+
cdef object _parse
|
| 1995 |
+
_parse = etree.parse
|
| 1996 |
+
|
| 1997 |
+
def parse(f, parser=None, *, base_url=None):
|
| 1998 |
+
"""parse(f, parser=None, base_url=None)
|
| 1999 |
+
|
| 2000 |
+
Parse a file or file-like object with the objectify parser.
|
| 2001 |
+
|
| 2002 |
+
You can pass a different parser as second argument.
|
| 2003 |
+
|
| 2004 |
+
The ``base_url`` keyword allows setting a URL for the document
|
| 2005 |
+
when parsing from a file-like object. This is needed when looking
|
| 2006 |
+
up external entities (DTD, XInclude, ...) with relative paths.
|
| 2007 |
+
"""
|
| 2008 |
+
if parser is None:
|
| 2009 |
+
parser = objectify_parser
|
| 2010 |
+
return _parse(f, parser, base_url=base_url)
|
| 2011 |
+
|
| 2012 |
+
cdef dict _DEFAULT_NSMAP = {
|
| 2013 |
+
"py" : PYTYPE_NAMESPACE,
|
| 2014 |
+
"xsi" : XML_SCHEMA_INSTANCE_NS,
|
| 2015 |
+
"xsd" : XML_SCHEMA_NS
|
| 2016 |
+
}
|
| 2017 |
+
|
| 2018 |
+
E = ElementMaker()
|
| 2019 |
+
|
| 2020 |
+
def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes):
|
| 2021 |
+
"""Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes)
|
| 2022 |
+
|
| 2023 |
+
Objectify specific version of the lxml.etree Element() factory that
|
| 2024 |
+
always creates a structural (tree) element.
|
| 2025 |
+
|
| 2026 |
+
NOTE: requires parser based element class lookup activated in lxml.etree!
|
| 2027 |
+
"""
|
| 2028 |
+
if attrib is not None:
|
| 2029 |
+
if _attributes:
|
| 2030 |
+
attrib = dict(attrib)
|
| 2031 |
+
attrib.update(_attributes)
|
| 2032 |
+
_attributes = attrib
|
| 2033 |
+
if _pytype is None:
|
| 2034 |
+
_pytype = TREE_PYTYPE_NAME
|
| 2035 |
+
if nsmap is None:
|
| 2036 |
+
nsmap = _DEFAULT_NSMAP
|
| 2037 |
+
_attributes[PYTYPE_ATTRIBUTE] = _pytype
|
| 2038 |
+
return _makeElement(_tag, None, _attributes, nsmap)
|
| 2039 |
+
|
| 2040 |
+
def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
|
| 2041 |
+
**_attributes):
|
| 2042 |
+
"""DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes)
|
| 2043 |
+
|
| 2044 |
+
Create a new element from a Python value and XML attributes taken from
|
| 2045 |
+
keyword arguments or a dictionary passed as second argument.
|
| 2046 |
+
|
| 2047 |
+
Automatically adds a 'pytype' attribute for the Python type of the value,
|
| 2048 |
+
if the type can be identified. If '_pytype' or '_xsi' are among the
|
| 2049 |
+
keyword arguments, they will be used instead.
|
| 2050 |
+
|
| 2051 |
+
If the _value argument is an ObjectifiedDataElement instance, its py:pytype,
|
| 2052 |
+
xsi:type and other attributes and nsmap are reused unless they are redefined
|
| 2053 |
+
in attrib and/or keyword arguments.
|
| 2054 |
+
"""
|
| 2055 |
+
if nsmap is None:
|
| 2056 |
+
nsmap = _DEFAULT_NSMAP
|
| 2057 |
+
if attrib is not None and attrib:
|
| 2058 |
+
if _attributes:
|
| 2059 |
+
attrib = dict(attrib)
|
| 2060 |
+
attrib.update(_attributes)
|
| 2061 |
+
_attributes = attrib
|
| 2062 |
+
if isinstance(_value, ObjectifiedElement):
|
| 2063 |
+
if _pytype is None:
|
| 2064 |
+
if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP:
|
| 2065 |
+
# special case: no change!
|
| 2066 |
+
return _value.__copy__()
|
| 2067 |
+
if isinstance(_value, ObjectifiedDataElement):
|
| 2068 |
+
# reuse existing nsmap unless redefined in nsmap parameter
|
| 2069 |
+
temp = _value.nsmap
|
| 2070 |
+
if temp is not None and temp:
|
| 2071 |
+
temp = dict(temp)
|
| 2072 |
+
temp.update(nsmap)
|
| 2073 |
+
nsmap = temp
|
| 2074 |
+
# reuse existing attributes unless redefined in attrib/_attributes
|
| 2075 |
+
temp = _value.attrib
|
| 2076 |
+
if temp is not None and temp:
|
| 2077 |
+
temp = dict(temp)
|
| 2078 |
+
temp.update(_attributes)
|
| 2079 |
+
_attributes = temp
|
| 2080 |
+
# reuse existing xsi:type or py:pytype attributes, unless provided as
|
| 2081 |
+
# arguments
|
| 2082 |
+
if _xsi is None and _pytype is None:
|
| 2083 |
+
_xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
|
| 2084 |
+
_pytype = _attributes.get(PYTYPE_ATTRIBUTE)
|
| 2085 |
+
|
| 2086 |
+
if _xsi is not None:
|
| 2087 |
+
if ':' in _xsi:
|
| 2088 |
+
prefix, name = _xsi.split(':', 1)
|
| 2089 |
+
ns = nsmap.get(prefix)
|
| 2090 |
+
if ns != XML_SCHEMA_NS:
|
| 2091 |
+
raise ValueError, "XSD types require the XSD namespace"
|
| 2092 |
+
elif nsmap is _DEFAULT_NSMAP:
|
| 2093 |
+
name = _xsi
|
| 2094 |
+
_xsi = 'xsd:' + _xsi
|
| 2095 |
+
else:
|
| 2096 |
+
name = _xsi
|
| 2097 |
+
for prefix, ns in nsmap.items():
|
| 2098 |
+
if ns == XML_SCHEMA_NS:
|
| 2099 |
+
if prefix is not None and prefix:
|
| 2100 |
+
_xsi = prefix + ':' + _xsi
|
| 2101 |
+
break
|
| 2102 |
+
else:
|
| 2103 |
+
raise ValueError, "XSD types require the XSD namespace"
|
| 2104 |
+
_attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi
|
| 2105 |
+
if _pytype is None:
|
| 2106 |
+
# allow using unregistered or even wrong xsi:type names
|
| 2107 |
+
py_type = <PyType>_SCHEMA_TYPE_DICT.get(_xsi)
|
| 2108 |
+
if py_type is None:
|
| 2109 |
+
py_type = <PyType>_SCHEMA_TYPE_DICT.get(name)
|
| 2110 |
+
if py_type is not None:
|
| 2111 |
+
_pytype = py_type.name
|
| 2112 |
+
|
| 2113 |
+
if _pytype is None:
|
| 2114 |
+
_pytype = _pytypename(_value)
|
| 2115 |
+
|
| 2116 |
+
if _value is None and _pytype != "str":
|
| 2117 |
+
_pytype = _pytype or "NoneType"
|
| 2118 |
+
strval = None
|
| 2119 |
+
elif python._isString(_value):
|
| 2120 |
+
strval = _value
|
| 2121 |
+
elif isinstance(_value, bool):
|
| 2122 |
+
if _value:
|
| 2123 |
+
strval = "true"
|
| 2124 |
+
else:
|
| 2125 |
+
strval = "false"
|
| 2126 |
+
else:
|
| 2127 |
+
py_type = <PyType>_PYTYPE_DICT.get(_pytype)
|
| 2128 |
+
stringify = unicode if py_type is None else py_type.stringify
|
| 2129 |
+
strval = stringify(_value)
|
| 2130 |
+
|
| 2131 |
+
if _pytype is not None:
|
| 2132 |
+
if _pytype == "NoneType" or _pytype == "none":
|
| 2133 |
+
strval = None
|
| 2134 |
+
_attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = "true"
|
| 2135 |
+
else:
|
| 2136 |
+
# check if type information from arguments is valid
|
| 2137 |
+
py_type = <PyType>_PYTYPE_DICT.get(_pytype)
|
| 2138 |
+
if py_type is not None:
|
| 2139 |
+
if py_type.type_check is not None:
|
| 2140 |
+
py_type.type_check(strval)
|
| 2141 |
+
_attributes[PYTYPE_ATTRIBUTE] = _pytype
|
| 2142 |
+
|
| 2143 |
+
return _makeElement("value", strval, _attributes, nsmap)
|
| 2144 |
+
|
| 2145 |
+
|
| 2146 |
+
################################################################################
|
| 2147 |
+
# ObjectPath
|
| 2148 |
+
|
| 2149 |
+
include "objectpath.pxi"
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/parser.pxi
ADDED
|
@@ -0,0 +1,2071 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Parsers for XML and HTML
|
| 2 |
+
|
| 3 |
+
from lxml.includes cimport xmlparser
|
| 4 |
+
from lxml.includes cimport htmlparser
|
| 5 |
+
|
| 6 |
+
cdef object _GenericAlias
|
| 7 |
+
try:
|
| 8 |
+
from types import GenericAlias as _GenericAlias
|
| 9 |
+
except ImportError:
|
| 10 |
+
# Python 3.8 - we only need this as return value from "__class_getitem__"
|
| 11 |
+
def _GenericAlias(cls, item):
|
| 12 |
+
return f"{cls.__name__}[{item.__name__}]"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class ParseError(LxmlSyntaxError):
|
| 16 |
+
"""Syntax error while parsing an XML document.
|
| 17 |
+
|
| 18 |
+
For compatibility with ElementTree 1.3 and later.
|
| 19 |
+
"""
|
| 20 |
+
def __init__(self, message, code, line, column, filename=None):
|
| 21 |
+
super(_ParseError, self).__init__(message)
|
| 22 |
+
self.lineno, self.offset = (line, column - 1)
|
| 23 |
+
self.code = code
|
| 24 |
+
self.filename = filename
|
| 25 |
+
|
| 26 |
+
@property
|
| 27 |
+
def position(self):
|
| 28 |
+
return self.lineno, self.offset + 1
|
| 29 |
+
|
| 30 |
+
@position.setter
|
| 31 |
+
def position(self, new_pos):
|
| 32 |
+
self.lineno, column = new_pos
|
| 33 |
+
self.offset = column - 1
|
| 34 |
+
|
| 35 |
+
cdef object _ParseError = ParseError
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class XMLSyntaxError(ParseError):
|
| 39 |
+
"""Syntax error while parsing an XML document.
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
cdef class ParserError(LxmlError):
|
| 43 |
+
"""Internal lxml parser error.
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@cython.final
|
| 48 |
+
@cython.internal
|
| 49 |
+
cdef class _ParserDictionaryContext:
|
| 50 |
+
# Global parser context to share the string dictionary.
|
| 51 |
+
#
|
| 52 |
+
# This class is a delegate singleton!
|
| 53 |
+
#
|
| 54 |
+
# It creates _ParserDictionaryContext objects for each thread to keep thread state,
|
| 55 |
+
# but those must never be used directly. Always stick to using the static
|
| 56 |
+
# __GLOBAL_PARSER_CONTEXT as defined below the class.
|
| 57 |
+
#
|
| 58 |
+
|
| 59 |
+
cdef tree.xmlDict* _c_dict
|
| 60 |
+
cdef _BaseParser _default_parser
|
| 61 |
+
cdef list _implied_parser_contexts
|
| 62 |
+
|
| 63 |
+
def __cinit__(self):
|
| 64 |
+
self._implied_parser_contexts = []
|
| 65 |
+
|
| 66 |
+
def __dealloc__(self):
|
| 67 |
+
if self._c_dict is not NULL:
|
| 68 |
+
xmlparser.xmlDictFree(self._c_dict)
|
| 69 |
+
|
| 70 |
+
cdef int initMainParserContext(self) except -1:
|
| 71 |
+
"""Put the global context into the thread dictionary of the main
|
| 72 |
+
thread. To be called once and only in the main thread."""
|
| 73 |
+
thread_dict = python.PyThreadState_GetDict()
|
| 74 |
+
if thread_dict is not NULL:
|
| 75 |
+
(<dict>thread_dict)["_ParserDictionaryContext"] = self
|
| 76 |
+
|
| 77 |
+
cdef _ParserDictionaryContext _findThreadParserContext(self):
|
| 78 |
+
"Find (or create) the _ParserDictionaryContext object for the current thread"
|
| 79 |
+
cdef _ParserDictionaryContext context
|
| 80 |
+
thread_dict = python.PyThreadState_GetDict()
|
| 81 |
+
if thread_dict is NULL:
|
| 82 |
+
return self
|
| 83 |
+
d = <dict>thread_dict
|
| 84 |
+
result = python.PyDict_GetItem(d, "_ParserDictionaryContext")
|
| 85 |
+
if result is not NULL:
|
| 86 |
+
return <object>result
|
| 87 |
+
context = <_ParserDictionaryContext>_ParserDictionaryContext.__new__(_ParserDictionaryContext)
|
| 88 |
+
d["_ParserDictionaryContext"] = context
|
| 89 |
+
return context
|
| 90 |
+
|
| 91 |
+
cdef int setDefaultParser(self, _BaseParser parser) except -1:
|
| 92 |
+
"Set the default parser for the current thread"
|
| 93 |
+
cdef _ParserDictionaryContext context
|
| 94 |
+
context = self._findThreadParserContext()
|
| 95 |
+
context._default_parser = parser
|
| 96 |
+
|
| 97 |
+
cdef _BaseParser getDefaultParser(self):
|
| 98 |
+
"Return (or create) the default parser of the current thread"
|
| 99 |
+
cdef _ParserDictionaryContext context
|
| 100 |
+
context = self._findThreadParserContext()
|
| 101 |
+
if context._default_parser is None:
|
| 102 |
+
if self._default_parser is None:
|
| 103 |
+
self._default_parser = __DEFAULT_XML_PARSER._copy()
|
| 104 |
+
if context is not self:
|
| 105 |
+
context._default_parser = self._default_parser._copy()
|
| 106 |
+
return context._default_parser
|
| 107 |
+
|
| 108 |
+
cdef tree.xmlDict* _getThreadDict(self, tree.xmlDict* default):
|
| 109 |
+
"Return the thread-local dict or create a new one if necessary."
|
| 110 |
+
cdef _ParserDictionaryContext context
|
| 111 |
+
context = self._findThreadParserContext()
|
| 112 |
+
if context._c_dict is NULL:
|
| 113 |
+
# thread dict not yet set up => use default or create a new one
|
| 114 |
+
if default is not NULL:
|
| 115 |
+
context._c_dict = default
|
| 116 |
+
xmlparser.xmlDictReference(default)
|
| 117 |
+
return default
|
| 118 |
+
if self._c_dict is NULL:
|
| 119 |
+
self._c_dict = xmlparser.xmlDictCreate()
|
| 120 |
+
if context is not self:
|
| 121 |
+
context._c_dict = xmlparser.xmlDictCreateSub(self._c_dict)
|
| 122 |
+
return context._c_dict
|
| 123 |
+
|
| 124 |
+
cdef int initThreadDictRef(self, tree.xmlDict** c_dict_ref) except -1:
|
| 125 |
+
c_dict = c_dict_ref[0]
|
| 126 |
+
c_thread_dict = self._getThreadDict(c_dict)
|
| 127 |
+
if c_dict is c_thread_dict:
|
| 128 |
+
return 0
|
| 129 |
+
if c_dict is not NULL:
|
| 130 |
+
xmlparser.xmlDictFree(c_dict)
|
| 131 |
+
c_dict_ref[0] = c_thread_dict
|
| 132 |
+
xmlparser.xmlDictReference(c_thread_dict)
|
| 133 |
+
|
| 134 |
+
cdef int initParserDict(self, xmlparser.xmlParserCtxt* pctxt) except -1:
|
| 135 |
+
"Assure we always use the same string dictionary."
|
| 136 |
+
self.initThreadDictRef(&pctxt.dict)
|
| 137 |
+
pctxt.dictNames = 1
|
| 138 |
+
|
| 139 |
+
cdef int initXPathParserDict(self, xpath.xmlXPathContext* pctxt) except -1:
|
| 140 |
+
"Assure we always use the same string dictionary."
|
| 141 |
+
self.initThreadDictRef(&pctxt.dict)
|
| 142 |
+
|
| 143 |
+
cdef int initDocDict(self, xmlDoc* result) except -1:
|
| 144 |
+
"Store dict of last object parsed if no shared dict yet"
|
| 145 |
+
# XXX We also free the result dict here if there already was one.
|
| 146 |
+
# This case should only occur for new documents with empty dicts,
|
| 147 |
+
# otherwise we'd free data that's in use => segfault
|
| 148 |
+
self.initThreadDictRef(&result.dict)
|
| 149 |
+
|
| 150 |
+
cdef _ParserContext findImpliedContext(self):
|
| 151 |
+
"""Return any current implied xml parser context for the current
|
| 152 |
+
thread. This is used when the resolver functions are called
|
| 153 |
+
with an xmlParserCtxt that was generated from within libxml2
|
| 154 |
+
(i.e. without a _ParserContext) - which happens when parsing
|
| 155 |
+
schema and xinclude external references."""
|
| 156 |
+
cdef _ParserDictionaryContext context
|
| 157 |
+
cdef _ParserContext implied_context
|
| 158 |
+
|
| 159 |
+
# see if we have a current implied parser
|
| 160 |
+
context = self._findThreadParserContext()
|
| 161 |
+
if context._implied_parser_contexts:
|
| 162 |
+
implied_context = context._implied_parser_contexts[-1]
|
| 163 |
+
return implied_context
|
| 164 |
+
return None
|
| 165 |
+
|
| 166 |
+
cdef int pushImpliedContextFromParser(self, _BaseParser parser) except -1:
|
| 167 |
+
"Push a new implied context object taken from the parser."
|
| 168 |
+
if parser is not None:
|
| 169 |
+
self.pushImpliedContext(parser._getParserContext())
|
| 170 |
+
else:
|
| 171 |
+
self.pushImpliedContext(None)
|
| 172 |
+
|
| 173 |
+
cdef int pushImpliedContext(self, _ParserContext parser_context) except -1:
|
| 174 |
+
"Push a new implied context object."
|
| 175 |
+
cdef _ParserDictionaryContext context
|
| 176 |
+
context = self._findThreadParserContext()
|
| 177 |
+
context._implied_parser_contexts.append(parser_context)
|
| 178 |
+
|
| 179 |
+
cdef int popImpliedContext(self) except -1:
|
| 180 |
+
"Pop the current implied context object."
|
| 181 |
+
cdef _ParserDictionaryContext context
|
| 182 |
+
context = self._findThreadParserContext()
|
| 183 |
+
context._implied_parser_contexts.pop()
|
| 184 |
+
|
| 185 |
+
cdef _ParserDictionaryContext __GLOBAL_PARSER_CONTEXT = _ParserDictionaryContext()
|
| 186 |
+
__GLOBAL_PARSER_CONTEXT.initMainParserContext()
|
| 187 |
+
|
| 188 |
+
############################################################
|
| 189 |
+
## support for Python unicode I/O
|
| 190 |
+
############################################################
|
| 191 |
+
|
| 192 |
+
# name of Python Py_UNICODE encoding as known to libxml2
|
| 193 |
+
cdef const_char* _PY_UNICODE_ENCODING = NULL
|
| 194 |
+
|
| 195 |
+
cdef int _setupPythonUnicode() except -1:
|
| 196 |
+
"""Sets _PY_UNICODE_ENCODING to the internal encoding name of Python unicode
|
| 197 |
+
strings if libxml2 supports reading native Python unicode. This depends
|
| 198 |
+
on iconv and the local Python installation, so we simply check if we find
|
| 199 |
+
a matching encoding handler.
|
| 200 |
+
"""
|
| 201 |
+
cdef tree.xmlCharEncodingHandler* enchandler
|
| 202 |
+
cdef Py_ssize_t l
|
| 203 |
+
cdef const_char* enc
|
| 204 |
+
cdef Py_UNICODE *uchars = [c'<', c't', c'e', c's', c't', c'/', c'>']
|
| 205 |
+
cdef const_xmlChar* buffer = <const_xmlChar*>uchars
|
| 206 |
+
# apparently, libxml2 can't detect UTF-16 on some systems
|
| 207 |
+
if (buffer[0] == c'<' and buffer[1] == c'\0' and
|
| 208 |
+
buffer[2] == c't' and buffer[3] == c'\0'):
|
| 209 |
+
enc = "UTF-16LE"
|
| 210 |
+
elif (buffer[0] == c'\0' and buffer[1] == c'<' and
|
| 211 |
+
buffer[2] == c'\0' and buffer[3] == c't'):
|
| 212 |
+
enc = "UTF-16BE"
|
| 213 |
+
else:
|
| 214 |
+
# let libxml2 give it a try
|
| 215 |
+
enc = _findEncodingName(buffer, sizeof(Py_UNICODE) * 7)
|
| 216 |
+
if enc is NULL:
|
| 217 |
+
# not my fault, it's YOUR broken system :)
|
| 218 |
+
return 0
|
| 219 |
+
enchandler = tree.xmlFindCharEncodingHandler(enc)
|
| 220 |
+
if enchandler is not NULL:
|
| 221 |
+
global _PY_UNICODE_ENCODING
|
| 222 |
+
tree.xmlCharEncCloseFunc(enchandler)
|
| 223 |
+
_PY_UNICODE_ENCODING = enc
|
| 224 |
+
return 0
|
| 225 |
+
|
| 226 |
+
cdef const_char* _findEncodingName(const_xmlChar* buffer, int size):
|
| 227 |
+
"Work around bug in libxml2: find iconv name of encoding on our own."
|
| 228 |
+
cdef tree.xmlCharEncoding enc
|
| 229 |
+
enc = tree.xmlDetectCharEncoding(buffer, size)
|
| 230 |
+
if enc == tree.XML_CHAR_ENCODING_UTF16LE:
|
| 231 |
+
if size >= 4 and (buffer[0] == <const_xmlChar> b'\xFF' and
|
| 232 |
+
buffer[1] == <const_xmlChar> b'\xFE' and
|
| 233 |
+
buffer[2] == 0 and buffer[3] == 0):
|
| 234 |
+
return "UTF-32LE" # according to BOM
|
| 235 |
+
else:
|
| 236 |
+
return "UTF-16LE"
|
| 237 |
+
elif enc == tree.XML_CHAR_ENCODING_UTF16BE:
|
| 238 |
+
return "UTF-16BE"
|
| 239 |
+
elif enc == tree.XML_CHAR_ENCODING_UCS4LE:
|
| 240 |
+
return "UCS-4LE"
|
| 241 |
+
elif enc == tree.XML_CHAR_ENCODING_UCS4BE:
|
| 242 |
+
return "UCS-4BE"
|
| 243 |
+
elif enc == tree.XML_CHAR_ENCODING_NONE:
|
| 244 |
+
return NULL
|
| 245 |
+
else:
|
| 246 |
+
# returns a constant char*, no need to free it
|
| 247 |
+
return tree.xmlGetCharEncodingName(enc)
|
| 248 |
+
|
| 249 |
+
# Python 3.12 removed support for "Py_UNICODE".
|
| 250 |
+
if python.PY_VERSION_HEX < 0x030C0000:
|
| 251 |
+
_setupPythonUnicode()
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
cdef unicode _find_PyUCS4EncodingName():
|
| 255 |
+
"""
|
| 256 |
+
Find a suitable encoding for Py_UCS4 PyUnicode strings in libxml2.
|
| 257 |
+
"""
|
| 258 |
+
ustring = "<xml>\U0001F92A</xml>"
|
| 259 |
+
cdef const xmlChar* buffer = <const xmlChar*> python.PyUnicode_DATA(ustring)
|
| 260 |
+
cdef Py_ssize_t py_buffer_len = python.PyUnicode_GET_LENGTH(ustring)
|
| 261 |
+
|
| 262 |
+
encoding_name = ''
|
| 263 |
+
cdef tree.xmlCharEncoding enc = tree.xmlDetectCharEncoding(buffer, py_buffer_len)
|
| 264 |
+
enchandler = tree.xmlGetCharEncodingHandler(enc)
|
| 265 |
+
if enchandler is not NULL:
|
| 266 |
+
try:
|
| 267 |
+
if enchandler.name:
|
| 268 |
+
encoding_name = enchandler.name.decode('UTF-8')
|
| 269 |
+
finally:
|
| 270 |
+
tree.xmlCharEncCloseFunc(enchandler)
|
| 271 |
+
else:
|
| 272 |
+
c_name = tree.xmlGetCharEncodingName(enc)
|
| 273 |
+
if c_name:
|
| 274 |
+
encoding_name = c_name.decode('UTF-8')
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
if encoding_name and not encoding_name.endswith('LE') and not encoding_name.endswith('BE'):
|
| 278 |
+
encoding_name += 'BE' if python.PY_BIG_ENDIAN else 'LE'
|
| 279 |
+
return encoding_name or None
|
| 280 |
+
|
| 281 |
+
_pyucs4_encoding_name = _find_PyUCS4EncodingName()
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
############################################################
|
| 285 |
+
## support for file-like objects
|
| 286 |
+
############################################################
|
| 287 |
+
|
| 288 |
+
@cython.final
|
| 289 |
+
@cython.internal
|
| 290 |
+
cdef class _FileReaderContext:
|
| 291 |
+
cdef object _filelike
|
| 292 |
+
cdef object _encoding
|
| 293 |
+
cdef object _url
|
| 294 |
+
cdef object _bytes
|
| 295 |
+
cdef _ExceptionContext _exc_context
|
| 296 |
+
cdef Py_ssize_t _bytes_read
|
| 297 |
+
cdef char* _c_url
|
| 298 |
+
cdef bint _close_file_after_read
|
| 299 |
+
|
| 300 |
+
def __cinit__(self, filelike, exc_context not None, url, encoding=None, bint close_file=False):
|
| 301 |
+
self._exc_context = exc_context
|
| 302 |
+
self._filelike = filelike
|
| 303 |
+
self._close_file_after_read = close_file
|
| 304 |
+
self._encoding = encoding
|
| 305 |
+
if url is not None:
|
| 306 |
+
url = _encodeFilename(url)
|
| 307 |
+
self._c_url = _cstr(url)
|
| 308 |
+
self._url = url
|
| 309 |
+
self._bytes = b''
|
| 310 |
+
self._bytes_read = 0
|
| 311 |
+
|
| 312 |
+
cdef _close_file(self):
|
| 313 |
+
if self._filelike is None or not self._close_file_after_read:
|
| 314 |
+
return
|
| 315 |
+
try:
|
| 316 |
+
close = self._filelike.close
|
| 317 |
+
except AttributeError:
|
| 318 |
+
close = None
|
| 319 |
+
finally:
|
| 320 |
+
self._filelike = None
|
| 321 |
+
if close is not None:
|
| 322 |
+
close()
|
| 323 |
+
|
| 324 |
+
cdef xmlparser.xmlParserInputBuffer* _createParserInputBuffer(self) noexcept:
|
| 325 |
+
cdef xmlparser.xmlParserInputBuffer* c_buffer = xmlparser.xmlAllocParserInputBuffer(0)
|
| 326 |
+
if c_buffer:
|
| 327 |
+
c_buffer.readcallback = _readFilelikeParser
|
| 328 |
+
c_buffer.context = <python.PyObject*> self
|
| 329 |
+
return c_buffer
|
| 330 |
+
|
| 331 |
+
cdef xmlparser.xmlParserInput* _createParserInput(
|
| 332 |
+
self, xmlparser.xmlParserCtxt* ctxt) noexcept:
|
| 333 |
+
cdef xmlparser.xmlParserInputBuffer* c_buffer = self._createParserInputBuffer()
|
| 334 |
+
if not c_buffer:
|
| 335 |
+
return NULL
|
| 336 |
+
return xmlparser.xmlNewIOInputStream(ctxt, c_buffer, 0)
|
| 337 |
+
|
| 338 |
+
cdef tree.xmlDtd* _readDtd(self) noexcept:
|
| 339 |
+
cdef xmlparser.xmlParserInputBuffer* c_buffer = self._createParserInputBuffer()
|
| 340 |
+
if not c_buffer:
|
| 341 |
+
return NULL
|
| 342 |
+
with nogil:
|
| 343 |
+
return xmlparser.xmlIOParseDTD(NULL, c_buffer, 0)
|
| 344 |
+
|
| 345 |
+
cdef xmlDoc* _readDoc(self, xmlparser.xmlParserCtxt* ctxt, int options) noexcept:
|
| 346 |
+
cdef xmlDoc* result
|
| 347 |
+
cdef void* c_callback_context = <python.PyObject*> self
|
| 348 |
+
cdef char* c_encoding = _cstr(self._encoding) if self._encoding is not None else NULL
|
| 349 |
+
|
| 350 |
+
orig_options = ctxt.options
|
| 351 |
+
with nogil:
|
| 352 |
+
if ctxt.html:
|
| 353 |
+
result = htmlparser.htmlCtxtReadIO(
|
| 354 |
+
ctxt, _readFilelikeParser, NULL, c_callback_context,
|
| 355 |
+
self._c_url, c_encoding, options)
|
| 356 |
+
if result is not NULL:
|
| 357 |
+
if _fixHtmlDictNames(ctxt.dict, result) < 0:
|
| 358 |
+
tree.xmlFreeDoc(result)
|
| 359 |
+
result = NULL
|
| 360 |
+
else:
|
| 361 |
+
result = xmlparser.xmlCtxtReadIO(
|
| 362 |
+
ctxt, _readFilelikeParser, NULL, c_callback_context,
|
| 363 |
+
self._c_url, c_encoding, options)
|
| 364 |
+
ctxt.options = orig_options # work around libxml2 problem
|
| 365 |
+
|
| 366 |
+
try:
|
| 367 |
+
self._close_file()
|
| 368 |
+
except:
|
| 369 |
+
self._exc_context._store_raised()
|
| 370 |
+
finally:
|
| 371 |
+
return result # swallow any exceptions
|
| 372 |
+
|
| 373 |
+
cdef int copyToBuffer(self, char* c_buffer, int c_requested) noexcept:
|
| 374 |
+
cdef int c_byte_count = 0
|
| 375 |
+
cdef char* c_start
|
| 376 |
+
cdef Py_ssize_t byte_count, remaining
|
| 377 |
+
if self._bytes_read < 0:
|
| 378 |
+
return 0
|
| 379 |
+
try:
|
| 380 |
+
byte_count = python.PyBytes_GET_SIZE(self._bytes)
|
| 381 |
+
remaining = byte_count - self._bytes_read
|
| 382 |
+
while c_requested > remaining:
|
| 383 |
+
c_start = _cstr(self._bytes) + self._bytes_read
|
| 384 |
+
cstring_h.memcpy(c_buffer, c_start, remaining)
|
| 385 |
+
c_byte_count += remaining
|
| 386 |
+
c_buffer += remaining
|
| 387 |
+
c_requested -= remaining
|
| 388 |
+
|
| 389 |
+
self._bytes = self._filelike.read(c_requested)
|
| 390 |
+
if not isinstance(self._bytes, bytes):
|
| 391 |
+
if isinstance(self._bytes, unicode):
|
| 392 |
+
if self._encoding is None:
|
| 393 |
+
self._bytes = (<unicode>self._bytes).encode('utf8')
|
| 394 |
+
else:
|
| 395 |
+
self._bytes = python.PyUnicode_AsEncodedString(
|
| 396 |
+
self._bytes, _cstr(self._encoding), NULL)
|
| 397 |
+
else:
|
| 398 |
+
self._close_file()
|
| 399 |
+
raise TypeError, \
|
| 400 |
+
"reading from file-like objects must return byte strings or unicode strings"
|
| 401 |
+
|
| 402 |
+
remaining = python.PyBytes_GET_SIZE(self._bytes)
|
| 403 |
+
if remaining == 0:
|
| 404 |
+
self._bytes_read = -1
|
| 405 |
+
self._close_file()
|
| 406 |
+
return c_byte_count
|
| 407 |
+
self._bytes_read = 0
|
| 408 |
+
|
| 409 |
+
if c_requested > 0:
|
| 410 |
+
c_start = _cstr(self._bytes) + self._bytes_read
|
| 411 |
+
cstring_h.memcpy(c_buffer, c_start, c_requested)
|
| 412 |
+
c_byte_count += c_requested
|
| 413 |
+
self._bytes_read += c_requested
|
| 414 |
+
except:
|
| 415 |
+
c_byte_count = -1
|
| 416 |
+
self._exc_context._store_raised()
|
| 417 |
+
try:
|
| 418 |
+
self._close_file()
|
| 419 |
+
except:
|
| 420 |
+
self._exc_context._store_raised()
|
| 421 |
+
finally:
|
| 422 |
+
return c_byte_count # swallow any exceptions
|
| 423 |
+
|
| 424 |
+
cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) noexcept with gil:
|
| 425 |
+
return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size)
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
############################################################
|
| 429 |
+
## support for custom document loaders
|
| 430 |
+
############################################################
|
| 431 |
+
|
| 432 |
+
cdef xmlparser.xmlParserInput* _local_resolver(const_char* c_url, const_char* c_pubid,
|
| 433 |
+
xmlparser.xmlParserCtxt* c_context) noexcept with gil:
|
| 434 |
+
cdef _ResolverContext context
|
| 435 |
+
cdef xmlparser.xmlParserInput* c_input
|
| 436 |
+
cdef _InputDocument doc_ref
|
| 437 |
+
cdef _FileReaderContext file_context
|
| 438 |
+
# if there is no _ParserContext associated with the xmlParserCtxt
|
| 439 |
+
# passed, check to see if the thread state object has an implied
|
| 440 |
+
# context.
|
| 441 |
+
if c_context._private is not NULL:
|
| 442 |
+
context = <_ResolverContext>c_context._private
|
| 443 |
+
else:
|
| 444 |
+
context = __GLOBAL_PARSER_CONTEXT.findImpliedContext()
|
| 445 |
+
|
| 446 |
+
if context is None:
|
| 447 |
+
if __DEFAULT_ENTITY_LOADER is NULL:
|
| 448 |
+
return NULL
|
| 449 |
+
with nogil:
|
| 450 |
+
# free the GIL as we might do serious I/O here (e.g. HTTP)
|
| 451 |
+
c_input = __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context)
|
| 452 |
+
return c_input
|
| 453 |
+
|
| 454 |
+
try:
|
| 455 |
+
if c_url is NULL:
|
| 456 |
+
url = None
|
| 457 |
+
else:
|
| 458 |
+
# parsing a related document (DTD etc.) => UTF-8 encoded URL?
|
| 459 |
+
url = _decodeFilename(<const_xmlChar*>c_url)
|
| 460 |
+
if c_pubid is NULL:
|
| 461 |
+
pubid = None
|
| 462 |
+
else:
|
| 463 |
+
pubid = funicode(<const_xmlChar*>c_pubid) # always UTF-8
|
| 464 |
+
|
| 465 |
+
doc_ref = context._resolvers.resolve(url, pubid, context)
|
| 466 |
+
except:
|
| 467 |
+
context._store_raised()
|
| 468 |
+
return NULL
|
| 469 |
+
|
| 470 |
+
if doc_ref is not None:
|
| 471 |
+
if doc_ref._type == PARSER_DATA_STRING:
|
| 472 |
+
data = doc_ref._data_bytes
|
| 473 |
+
filename = doc_ref._filename
|
| 474 |
+
if not filename:
|
| 475 |
+
filename = None
|
| 476 |
+
elif not isinstance(filename, bytes):
|
| 477 |
+
# most likely a text URL
|
| 478 |
+
filename = filename.encode('utf8')
|
| 479 |
+
if not isinstance(filename, bytes):
|
| 480 |
+
filename = None
|
| 481 |
+
|
| 482 |
+
if tree.LIBXML_VERSION >= 21400:
|
| 483 |
+
c_filename = <char *>tree.xmlStrdup(_xcstr(filename)) if filename is not None else NULL
|
| 484 |
+
c_input = xmlparser.xmlNewInputFromMemory(
|
| 485 |
+
c_filename, _xcstr(data), <size_t> python.PyBytes_GET_SIZE(data), 0)
|
| 486 |
+
else:
|
| 487 |
+
c_input = xmlparser.xmlNewInputStream(c_context)
|
| 488 |
+
if c_input is not NULL:
|
| 489 |
+
if filename is not None:
|
| 490 |
+
c_input.filename = <char *>tree.xmlStrdup(_xcstr(filename))
|
| 491 |
+
c_input.base = _xcstr(data)
|
| 492 |
+
c_input.length = python.PyBytes_GET_SIZE(data)
|
| 493 |
+
c_input.cur = c_input.base
|
| 494 |
+
c_input.end = c_input.base + c_input.length
|
| 495 |
+
elif doc_ref._type == PARSER_DATA_FILENAME:
|
| 496 |
+
data = None
|
| 497 |
+
c_filename = _cstr(doc_ref._filename)
|
| 498 |
+
with nogil:
|
| 499 |
+
# free the GIL as we might do serious I/O here
|
| 500 |
+
c_input = xmlparser.xmlNewInputFromFile(
|
| 501 |
+
c_context, c_filename)
|
| 502 |
+
elif doc_ref._type == PARSER_DATA_FILE:
|
| 503 |
+
file_context = _FileReaderContext(doc_ref._file, context, url,
|
| 504 |
+
None, doc_ref._close_file)
|
| 505 |
+
c_input = file_context._createParserInput(c_context)
|
| 506 |
+
data = file_context
|
| 507 |
+
else:
|
| 508 |
+
data = None
|
| 509 |
+
c_input = NULL
|
| 510 |
+
|
| 511 |
+
if data is not None:
|
| 512 |
+
context._storage.add(data)
|
| 513 |
+
if c_input is not NULL:
|
| 514 |
+
return c_input
|
| 515 |
+
|
| 516 |
+
if __DEFAULT_ENTITY_LOADER is NULL:
|
| 517 |
+
return NULL
|
| 518 |
+
|
| 519 |
+
with nogil:
|
| 520 |
+
# free the GIL as we might do serious I/O here (e.g. HTTP)
|
| 521 |
+
c_input = __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context)
|
| 522 |
+
return c_input
|
| 523 |
+
|
| 524 |
+
cdef xmlparser.xmlExternalEntityLoader __DEFAULT_ENTITY_LOADER
|
| 525 |
+
__DEFAULT_ENTITY_LOADER = xmlparser.xmlGetExternalEntityLoader()
|
| 526 |
+
|
| 527 |
+
|
| 528 |
+
cdef xmlparser.xmlExternalEntityLoader _register_document_loader() noexcept nogil:
|
| 529 |
+
cdef xmlparser.xmlExternalEntityLoader old = xmlparser.xmlGetExternalEntityLoader()
|
| 530 |
+
xmlparser.xmlSetExternalEntityLoader(<xmlparser.xmlExternalEntityLoader>_local_resolver)
|
| 531 |
+
return old
|
| 532 |
+
|
| 533 |
+
cdef void _reset_document_loader(xmlparser.xmlExternalEntityLoader old) noexcept nogil:
|
| 534 |
+
xmlparser.xmlSetExternalEntityLoader(old)
|
| 535 |
+
|
| 536 |
+
|
| 537 |
+
############################################################
|
| 538 |
+
## Parsers
|
| 539 |
+
############################################################
|
| 540 |
+
|
| 541 |
+
@cython.no_gc_clear # May have to call "self._validator.disconnect()" on dealloc.
|
| 542 |
+
@cython.internal
|
| 543 |
+
cdef class _ParserContext(_ResolverContext):
|
| 544 |
+
cdef _ErrorLog _error_log
|
| 545 |
+
cdef _ParserSchemaValidationContext _validator
|
| 546 |
+
cdef xmlparser.xmlParserCtxt* _c_ctxt
|
| 547 |
+
cdef xmlparser.xmlExternalEntityLoader _orig_loader
|
| 548 |
+
cdef python.PyThread_type_lock _lock
|
| 549 |
+
cdef _Document _doc
|
| 550 |
+
cdef bint _collect_ids
|
| 551 |
+
|
| 552 |
+
def __cinit__(self):
|
| 553 |
+
self._collect_ids = True
|
| 554 |
+
if config.ENABLE_THREADING:
|
| 555 |
+
self._lock = python.PyThread_allocate_lock()
|
| 556 |
+
self._error_log = _ErrorLog()
|
| 557 |
+
|
| 558 |
+
def __dealloc__(self):
|
| 559 |
+
if config.ENABLE_THREADING and self._lock is not NULL:
|
| 560 |
+
python.PyThread_free_lock(self._lock)
|
| 561 |
+
self._lock = NULL
|
| 562 |
+
if self._c_ctxt is not NULL:
|
| 563 |
+
if <void*>self._validator is not NULL and self._validator is not None:
|
| 564 |
+
# If the parser was not closed correctly (e.g. interrupted iterparse()),
|
| 565 |
+
# and the schema validator wasn't freed and cleaned up yet, the libxml2 SAX
|
| 566 |
+
# validator plug might still be in place, which will make xmlFreeParserCtxt()
|
| 567 |
+
# crash when trying to xmlFree() a static SAX handler.
|
| 568 |
+
# Thus, make sure we disconnect the handler interceptor here at the latest.
|
| 569 |
+
self._validator.disconnect()
|
| 570 |
+
xmlparser.xmlFreeParserCtxt(self._c_ctxt)
|
| 571 |
+
|
| 572 |
+
cdef _ParserContext _copy(self):
|
| 573 |
+
cdef _ParserContext context
|
| 574 |
+
context = self.__class__()
|
| 575 |
+
context._collect_ids = self._collect_ids
|
| 576 |
+
context._validator = self._validator.copy()
|
| 577 |
+
_initParserContext(context, self._resolvers._copy(), NULL)
|
| 578 |
+
return context
|
| 579 |
+
|
| 580 |
+
cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
|
| 581 |
+
"""
|
| 582 |
+
Connects the libxml2-level context to the lxml-level parser context.
|
| 583 |
+
"""
|
| 584 |
+
self._c_ctxt = c_ctxt
|
| 585 |
+
c_ctxt._private = <void*>self
|
| 586 |
+
|
| 587 |
+
cdef void _resetParserContext(self) noexcept:
|
| 588 |
+
if self._c_ctxt is not NULL:
|
| 589 |
+
if self._c_ctxt.html:
|
| 590 |
+
htmlparser.htmlCtxtReset(self._c_ctxt)
|
| 591 |
+
self._c_ctxt.disableSAX = 0 # work around bug in libxml2
|
| 592 |
+
else:
|
| 593 |
+
xmlparser.xmlClearParserCtxt(self._c_ctxt)
|
| 594 |
+
# work around bug in libxml2 [2.9.10 .. 2.9.14]:
|
| 595 |
+
# https://gitlab.gnome.org/GNOME/libxml2/-/issues/378
|
| 596 |
+
self._c_ctxt.nsNr = 0
|
| 597 |
+
|
| 598 |
+
cdef int prepare(self, bint set_document_loader=True) except -1:
|
| 599 |
+
cdef int result
|
| 600 |
+
if config.ENABLE_THREADING and self._lock is not NULL:
|
| 601 |
+
with nogil:
|
| 602 |
+
result = python.PyThread_acquire_lock(
|
| 603 |
+
self._lock, python.WAIT_LOCK)
|
| 604 |
+
if result == 0:
|
| 605 |
+
raise ParserError, "parser locking failed"
|
| 606 |
+
self._error_log.clear()
|
| 607 |
+
self._doc = None
|
| 608 |
+
# Connect the lxml error log with libxml2's error handling. In the case of parsing
|
| 609 |
+
# HTML, ctxt->sax is not set to null, so this always works. The libxml2 function
|
| 610 |
+
# that does this is htmlInitParserCtxt in HTMLparser.c. For HTML (and possibly XML
|
| 611 |
+
# too), libxml2's SAX's serror is set to be the place where errors are sent when
|
| 612 |
+
# schannel is set to ctxt->sax->serror in xmlCtxtErrMemory in libxml2's
|
| 613 |
+
# parserInternals.c.
|
| 614 |
+
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
|
| 615 |
+
self._c_ctxt.sax.serror = <xmlerror.xmlStructuredErrorFunc> _receiveParserError
|
| 616 |
+
self._orig_loader = _register_document_loader() if set_document_loader else NULL
|
| 617 |
+
if self._validator is not None:
|
| 618 |
+
self._validator.connect(self._c_ctxt, self._error_log)
|
| 619 |
+
return 0
|
| 620 |
+
|
| 621 |
+
cdef int cleanup(self) except -1:
|
| 622 |
+
if self._orig_loader is not NULL:
|
| 623 |
+
_reset_document_loader(self._orig_loader)
|
| 624 |
+
try:
|
| 625 |
+
if self._validator is not None:
|
| 626 |
+
self._validator.disconnect()
|
| 627 |
+
self._resetParserContext()
|
| 628 |
+
self.clear()
|
| 629 |
+
self._doc = None
|
| 630 |
+
self._c_ctxt.sax.serror = NULL
|
| 631 |
+
finally:
|
| 632 |
+
if config.ENABLE_THREADING and self._lock is not NULL:
|
| 633 |
+
python.PyThread_release_lock(self._lock)
|
| 634 |
+
return 0
|
| 635 |
+
|
| 636 |
+
cdef object _handleParseResult(self, _BaseParser parser,
|
| 637 |
+
xmlDoc* result, filename):
|
| 638 |
+
c_doc = self._handleParseResultDoc(parser, result, filename)
|
| 639 |
+
if self._doc is not None and self._doc._c_doc is c_doc:
|
| 640 |
+
return self._doc
|
| 641 |
+
else:
|
| 642 |
+
return _documentFactory(c_doc, parser)
|
| 643 |
+
|
| 644 |
+
cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser,
|
| 645 |
+
xmlDoc* result, filename) except NULL:
|
| 646 |
+
recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
|
| 647 |
+
return _handleParseResult(self, self._c_ctxt, result,
|
| 648 |
+
filename, recover,
|
| 649 |
+
free_doc=self._doc is None)
|
| 650 |
+
|
| 651 |
+
cdef _initParserContext(_ParserContext context,
|
| 652 |
+
_ResolverRegistry resolvers,
|
| 653 |
+
xmlparser.xmlParserCtxt* c_ctxt):
|
| 654 |
+
_initResolverContext(context, resolvers)
|
| 655 |
+
if c_ctxt is not NULL:
|
| 656 |
+
context._initParserContext(c_ctxt)
|
| 657 |
+
|
| 658 |
+
cdef void _forwardParserError(xmlparser.xmlParserCtxt* _parser_context, const xmlerror.xmlError* error) noexcept with gil:
|
| 659 |
+
"""
|
| 660 |
+
Add an error created by libxml2 to the lxml-level error_log.
|
| 661 |
+
"""
|
| 662 |
+
(<_ParserContext>_parser_context._private)._error_log._receive(error)
|
| 663 |
+
|
| 664 |
+
cdef void _receiveParserError(void* c_context, const xmlerror.xmlError* error) noexcept nogil:
|
| 665 |
+
if __DEBUG:
|
| 666 |
+
if c_context is NULL or (<xmlparser.xmlParserCtxt*>c_context)._private is NULL:
|
| 667 |
+
_forwardError(NULL, error)
|
| 668 |
+
else:
|
| 669 |
+
_forwardParserError(<xmlparser.xmlParserCtxt*>c_context, error)
|
| 670 |
+
|
| 671 |
+
cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename,
|
| 672 |
+
_ErrorLog error_log) except -1:
|
| 673 |
+
if filename is not None and \
|
| 674 |
+
ctxt.lastError.domain == xmlerror.XML_FROM_IO:
|
| 675 |
+
if isinstance(filename, bytes):
|
| 676 |
+
filename = _decodeFilenameWithLength(
|
| 677 |
+
<bytes>filename, len(<bytes>filename))
|
| 678 |
+
if ctxt.lastError.message is not NULL:
|
| 679 |
+
try:
|
| 680 |
+
message = ctxt.lastError.message.decode('utf-8')
|
| 681 |
+
except UnicodeDecodeError:
|
| 682 |
+
# the filename may be in there => play it safe
|
| 683 |
+
message = ctxt.lastError.message.decode('iso8859-1')
|
| 684 |
+
message = f"Error reading file '{filename}': {message.strip()}"
|
| 685 |
+
else:
|
| 686 |
+
message = f"Error reading '{filename}'"
|
| 687 |
+
raise IOError, message
|
| 688 |
+
elif error_log:
|
| 689 |
+
raise error_log._buildParseException(
|
| 690 |
+
XMLSyntaxError, "Document is not well formed")
|
| 691 |
+
elif ctxt.lastError.message is not NULL:
|
| 692 |
+
message = ctxt.lastError.message.strip()
|
| 693 |
+
code = ctxt.lastError.code
|
| 694 |
+
line = ctxt.lastError.line
|
| 695 |
+
column = ctxt.lastError.int2
|
| 696 |
+
if ctxt.lastError.line > 0:
|
| 697 |
+
message = f"line {line}: {message}"
|
| 698 |
+
raise XMLSyntaxError(message, code, line, column, filename)
|
| 699 |
+
else:
|
| 700 |
+
raise XMLSyntaxError(None, xmlerror.XML_ERR_INTERNAL_ERROR, 0, 0,
|
| 701 |
+
filename)
|
| 702 |
+
|
| 703 |
+
cdef xmlDoc* _handleParseResult(_ParserContext context,
|
| 704 |
+
xmlparser.xmlParserCtxt* c_ctxt,
|
| 705 |
+
xmlDoc* result, filename,
|
| 706 |
+
bint recover, bint free_doc) except NULL:
|
| 707 |
+
# The C-level argument xmlDoc* result is passed in as NULL if the parser was not able
|
| 708 |
+
# to parse the document.
|
| 709 |
+
cdef bint well_formed
|
| 710 |
+
if result is not NULL:
|
| 711 |
+
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
|
| 712 |
+
|
| 713 |
+
if c_ctxt.myDoc is not NULL:
|
| 714 |
+
if c_ctxt.myDoc is not result:
|
| 715 |
+
__GLOBAL_PARSER_CONTEXT.initDocDict(c_ctxt.myDoc)
|
| 716 |
+
tree.xmlFreeDoc(c_ctxt.myDoc)
|
| 717 |
+
c_ctxt.myDoc = NULL
|
| 718 |
+
|
| 719 |
+
if result is not NULL:
|
| 720 |
+
# "wellFormed" in libxml2 is 0 if the parser found fatal errors. It still returns a
|
| 721 |
+
# parse result document if 'recover=True'. Here, we determine if we can present
|
| 722 |
+
# the document to the user or consider it incorrect or broken enough to raise an error.
|
| 723 |
+
if (context._validator is not None and
|
| 724 |
+
not context._validator.isvalid()):
|
| 725 |
+
well_formed = 0 # actually not 'valid', but anyway ...
|
| 726 |
+
elif (not c_ctxt.wellFormed and not c_ctxt.html and
|
| 727 |
+
c_ctxt.charset == tree.XML_CHAR_ENCODING_8859_1 and
|
| 728 |
+
[1 for error in context._error_log
|
| 729 |
+
if error.type == ErrorTypes.ERR_INVALID_CHAR]):
|
| 730 |
+
# An encoding error occurred and libxml2 switched from UTF-8
|
| 731 |
+
# input to (undecoded) Latin-1, at some arbitrary point in the
|
| 732 |
+
# document. Better raise an error than allowing for a broken
|
| 733 |
+
# tree with mixed encodings. This is fixed in libxml2 2.12.
|
| 734 |
+
well_formed = 0
|
| 735 |
+
elif recover or (c_ctxt.wellFormed and
|
| 736 |
+
c_ctxt.lastError.level < xmlerror.XML_ERR_ERROR):
|
| 737 |
+
well_formed = 1
|
| 738 |
+
elif not c_ctxt.replaceEntities and not c_ctxt.validate \
|
| 739 |
+
and context is not None:
|
| 740 |
+
# in this mode, we ignore errors about undefined entities
|
| 741 |
+
for error in context._error_log.filter_from_errors():
|
| 742 |
+
if error.type != ErrorTypes.WAR_UNDECLARED_ENTITY and \
|
| 743 |
+
error.type != ErrorTypes.ERR_UNDECLARED_ENTITY:
|
| 744 |
+
well_formed = 0
|
| 745 |
+
break
|
| 746 |
+
else:
|
| 747 |
+
well_formed = 1
|
| 748 |
+
else:
|
| 749 |
+
well_formed = 0
|
| 750 |
+
|
| 751 |
+
if not well_formed:
|
| 752 |
+
if free_doc:
|
| 753 |
+
tree.xmlFreeDoc(result)
|
| 754 |
+
result = NULL
|
| 755 |
+
|
| 756 |
+
if context is not None and context._has_raised():
|
| 757 |
+
if result is not NULL:
|
| 758 |
+
if free_doc:
|
| 759 |
+
tree.xmlFreeDoc(result)
|
| 760 |
+
result = NULL
|
| 761 |
+
context._raise_if_stored()
|
| 762 |
+
|
| 763 |
+
if result is NULL:
|
| 764 |
+
if context is not None:
|
| 765 |
+
_raiseParseError(c_ctxt, filename, context._error_log)
|
| 766 |
+
else:
|
| 767 |
+
_raiseParseError(c_ctxt, filename, None)
|
| 768 |
+
else:
|
| 769 |
+
if result.URL is NULL and filename is not None:
|
| 770 |
+
result.URL = tree.xmlStrdup(_xcstr(filename))
|
| 771 |
+
if result.encoding is NULL:
|
| 772 |
+
result.encoding = tree.xmlStrdup(<unsigned char*>"UTF-8")
|
| 773 |
+
|
| 774 |
+
if context._validator is not None and \
|
| 775 |
+
context._validator._add_default_attributes:
|
| 776 |
+
# we currently need to do this here as libxml2 does not
|
| 777 |
+
# support inserting default attributes during parse-time
|
| 778 |
+
# validation
|
| 779 |
+
context._validator.inject_default_attributes(result)
|
| 780 |
+
|
| 781 |
+
return result
|
| 782 |
+
|
| 783 |
+
cdef int _fixHtmlDictNames(tree.xmlDict* c_dict, xmlDoc* c_doc) noexcept nogil:
|
| 784 |
+
cdef xmlNode* c_node
|
| 785 |
+
if c_doc is NULL:
|
| 786 |
+
return 0
|
| 787 |
+
c_node = c_doc.children
|
| 788 |
+
tree.BEGIN_FOR_EACH_ELEMENT_FROM(<xmlNode*>c_doc, c_node, 1)
|
| 789 |
+
if c_node.type == tree.XML_ELEMENT_NODE:
|
| 790 |
+
if _fixHtmlDictNodeNames(c_dict, c_node) < 0:
|
| 791 |
+
return -1
|
| 792 |
+
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
| 793 |
+
return 0
|
| 794 |
+
|
| 795 |
+
cdef int _fixHtmlDictSubtreeNames(tree.xmlDict* c_dict, xmlDoc* c_doc,
|
| 796 |
+
xmlNode* c_start_node) noexcept nogil:
|
| 797 |
+
"""
|
| 798 |
+
Move names to the dict, iterating in document order, starting at
|
| 799 |
+
c_start_node. This is used in incremental parsing after each chunk.
|
| 800 |
+
"""
|
| 801 |
+
cdef xmlNode* c_node
|
| 802 |
+
if not c_doc:
|
| 803 |
+
return 0
|
| 804 |
+
if not c_start_node:
|
| 805 |
+
return _fixHtmlDictNames(c_dict, c_doc)
|
| 806 |
+
c_node = c_start_node
|
| 807 |
+
tree.BEGIN_FOR_EACH_ELEMENT_FROM(<xmlNode*>c_doc, c_node, 1)
|
| 808 |
+
if c_node.type == tree.XML_ELEMENT_NODE:
|
| 809 |
+
if _fixHtmlDictNodeNames(c_dict, c_node) < 0:
|
| 810 |
+
return -1
|
| 811 |
+
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
| 812 |
+
return 0
|
| 813 |
+
|
| 814 |
+
cdef inline int _fixHtmlDictNodeNames(tree.xmlDict* c_dict,
|
| 815 |
+
xmlNode* c_node) noexcept nogil:
|
| 816 |
+
cdef xmlNode* c_attr
|
| 817 |
+
c_name = tree.xmlDictLookup(c_dict, c_node.name, -1)
|
| 818 |
+
if c_name is NULL:
|
| 819 |
+
return -1
|
| 820 |
+
if c_name is not c_node.name:
|
| 821 |
+
tree.xmlFree(<char*>c_node.name)
|
| 822 |
+
c_node.name = c_name
|
| 823 |
+
c_attr = <xmlNode*>c_node.properties
|
| 824 |
+
while c_attr is not NULL:
|
| 825 |
+
c_name = tree.xmlDictLookup(c_dict, c_attr.name, -1)
|
| 826 |
+
if c_name is NULL:
|
| 827 |
+
return -1
|
| 828 |
+
if c_name is not c_attr.name:
|
| 829 |
+
tree.xmlFree(<char*>c_attr.name)
|
| 830 |
+
c_attr.name = c_name
|
| 831 |
+
c_attr = c_attr.next
|
| 832 |
+
return 0
|
| 833 |
+
|
| 834 |
+
|
| 835 |
+
@cython.internal
|
| 836 |
+
cdef class _BaseParser:
|
| 837 |
+
cdef ElementClassLookup _class_lookup
|
| 838 |
+
cdef _ResolverRegistry _resolvers
|
| 839 |
+
cdef _ParserContext _parser_context
|
| 840 |
+
cdef _ParserContext _push_parser_context
|
| 841 |
+
cdef int _parse_options
|
| 842 |
+
cdef bint _for_html
|
| 843 |
+
cdef bint _remove_comments
|
| 844 |
+
cdef bint _remove_pis
|
| 845 |
+
cdef bint _strip_cdata
|
| 846 |
+
cdef bint _collect_ids
|
| 847 |
+
cdef bint _resolve_external_entities
|
| 848 |
+
cdef XMLSchema _schema
|
| 849 |
+
cdef bytes _filename
|
| 850 |
+
cdef readonly object target
|
| 851 |
+
cdef object _default_encoding
|
| 852 |
+
cdef tuple _events_to_collect # (event_types, tag)
|
| 853 |
+
|
| 854 |
+
def __init__(self, int parse_options, bint for_html, XMLSchema schema,
|
| 855 |
+
remove_comments, remove_pis, strip_cdata, collect_ids,
|
| 856 |
+
target, encoding, bint resolve_external_entities=True):
|
| 857 |
+
cdef tree.xmlCharEncodingHandler* enchandler
|
| 858 |
+
cdef int c_encoding
|
| 859 |
+
if not isinstance(self, (XMLParser, HTMLParser)):
|
| 860 |
+
raise TypeError, "This class cannot be instantiated"
|
| 861 |
+
|
| 862 |
+
if not collect_ids and tree.LIBXML_VERSION >= 21500:
|
| 863 |
+
parse_options |= xmlparser.XML_PARSE_SKIP_IDS
|
| 864 |
+
|
| 865 |
+
self._parse_options = parse_options
|
| 866 |
+
self.target = target
|
| 867 |
+
self._for_html = for_html
|
| 868 |
+
self._remove_comments = remove_comments
|
| 869 |
+
self._remove_pis = remove_pis
|
| 870 |
+
self._strip_cdata = strip_cdata
|
| 871 |
+
self._collect_ids = collect_ids
|
| 872 |
+
self._resolve_external_entities = resolve_external_entities
|
| 873 |
+
self._schema = schema
|
| 874 |
+
|
| 875 |
+
self._resolvers = _ResolverRegistry()
|
| 876 |
+
|
| 877 |
+
if encoding is None:
|
| 878 |
+
self._default_encoding = None
|
| 879 |
+
else:
|
| 880 |
+
encoding = _utf8(encoding)
|
| 881 |
+
enchandler = tree.xmlFindCharEncodingHandler(_cstr(encoding))
|
| 882 |
+
if enchandler is NULL:
|
| 883 |
+
raise LookupError, f"unknown encoding: '{encoding}'"
|
| 884 |
+
tree.xmlCharEncCloseFunc(enchandler)
|
| 885 |
+
self._default_encoding = encoding
|
| 886 |
+
|
| 887 |
+
cdef _setBaseURL(self, base_url):
|
| 888 |
+
self._filename = _encodeFilename(base_url)
|
| 889 |
+
|
| 890 |
+
cdef _collectEvents(self, event_types, tag):
|
| 891 |
+
if event_types is None:
|
| 892 |
+
event_types = ()
|
| 893 |
+
else:
|
| 894 |
+
event_types = tuple(set(event_types))
|
| 895 |
+
_buildParseEventFilter(event_types) # purely for validation
|
| 896 |
+
self._events_to_collect = (event_types, tag)
|
| 897 |
+
|
| 898 |
+
cdef _ParserContext _getParserContext(self):
|
| 899 |
+
cdef xmlparser.xmlParserCtxt* pctxt
|
| 900 |
+
if self._parser_context is None:
|
| 901 |
+
self._parser_context = self._createContext(self.target, None)
|
| 902 |
+
self._parser_context._collect_ids = self._collect_ids
|
| 903 |
+
if self._schema is not None:
|
| 904 |
+
self._parser_context._validator = \
|
| 905 |
+
self._schema._newSaxValidator(
|
| 906 |
+
self._parse_options & xmlparser.XML_PARSE_DTDATTR)
|
| 907 |
+
pctxt = self._newParserCtxt()
|
| 908 |
+
_initParserContext(self._parser_context, self._resolvers, pctxt)
|
| 909 |
+
self._configureSaxContext(pctxt)
|
| 910 |
+
return self._parser_context
|
| 911 |
+
|
| 912 |
+
cdef _ParserContext _getPushParserContext(self):
|
| 913 |
+
cdef xmlparser.xmlParserCtxt* pctxt
|
| 914 |
+
if self._push_parser_context is None:
|
| 915 |
+
self._push_parser_context = self._createContext(
|
| 916 |
+
self.target, self._events_to_collect)
|
| 917 |
+
self._push_parser_context._collect_ids = self._collect_ids
|
| 918 |
+
if self._schema is not None:
|
| 919 |
+
self._push_parser_context._validator = \
|
| 920 |
+
self._schema._newSaxValidator(
|
| 921 |
+
self._parse_options & xmlparser.XML_PARSE_DTDATTR)
|
| 922 |
+
pctxt = self._newPushParserCtxt()
|
| 923 |
+
_initParserContext(
|
| 924 |
+
self._push_parser_context, self._resolvers, pctxt)
|
| 925 |
+
self._configureSaxContext(pctxt)
|
| 926 |
+
return self._push_parser_context
|
| 927 |
+
|
| 928 |
+
cdef _ParserContext _createContext(self, target, events_to_collect):
|
| 929 |
+
"""
|
| 930 |
+
This method creates and configures the lxml-level parser.
|
| 931 |
+
"""
|
| 932 |
+
cdef _SaxParserContext sax_context
|
| 933 |
+
if target is not None:
|
| 934 |
+
sax_context = _TargetParserContext(self)
|
| 935 |
+
(<_TargetParserContext>sax_context)._setTarget(target)
|
| 936 |
+
elif events_to_collect:
|
| 937 |
+
sax_context = _SaxParserContext(self)
|
| 938 |
+
else:
|
| 939 |
+
# nothing special to configure
|
| 940 |
+
return _ParserContext()
|
| 941 |
+
if events_to_collect:
|
| 942 |
+
events, tag = events_to_collect
|
| 943 |
+
sax_context._setEventFilter(events, tag)
|
| 944 |
+
return sax_context
|
| 945 |
+
|
| 946 |
+
@cython.final
|
| 947 |
+
cdef int _configureSaxContext(self, xmlparser.xmlParserCtxt* pctxt) except -1:
|
| 948 |
+
if self._remove_comments:
|
| 949 |
+
pctxt.sax.comment = NULL
|
| 950 |
+
if self._remove_pis:
|
| 951 |
+
pctxt.sax.processingInstruction = NULL
|
| 952 |
+
if self._strip_cdata:
|
| 953 |
+
# hard switch-off for CDATA nodes => makes them plain text
|
| 954 |
+
pctxt.sax.cdataBlock = NULL
|
| 955 |
+
if not self._resolve_external_entities:
|
| 956 |
+
pctxt.sax.getEntity = _getInternalEntityOnly
|
| 957 |
+
|
| 958 |
+
cdef int _registerHtmlErrorHandler(self, xmlparser.xmlParserCtxt* c_ctxt) except -1:
|
| 959 |
+
cdef xmlparser.xmlSAXHandler* sax = c_ctxt.sax
|
| 960 |
+
if sax is not NULL and sax.initialized and sax.initialized != xmlparser.XML_SAX2_MAGIC:
|
| 961 |
+
# need to extend SAX1 context to SAX2 to get proper error reports
|
| 962 |
+
if <xmlparser.xmlSAXHandlerV1*>sax is &htmlparser.htmlDefaultSAXHandler:
|
| 963 |
+
sax = <xmlparser.xmlSAXHandler*> tree.xmlMalloc(sizeof(xmlparser.xmlSAXHandler))
|
| 964 |
+
if sax is NULL:
|
| 965 |
+
raise MemoryError()
|
| 966 |
+
cstring_h.memcpy(sax, &htmlparser.htmlDefaultSAXHandler,
|
| 967 |
+
sizeof(htmlparser.htmlDefaultSAXHandler))
|
| 968 |
+
c_ctxt.sax = sax
|
| 969 |
+
sax.initialized = xmlparser.XML_SAX2_MAGIC
|
| 970 |
+
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
|
| 971 |
+
sax.serror = <xmlerror.xmlStructuredErrorFunc> _receiveParserError
|
| 972 |
+
sax.startElementNs = NULL
|
| 973 |
+
sax.endElementNs = NULL
|
| 974 |
+
sax._private = NULL
|
| 975 |
+
return 0
|
| 976 |
+
|
| 977 |
+
cdef xmlparser.xmlParserCtxt* _newParserCtxt(self) except NULL:
|
| 978 |
+
"""
|
| 979 |
+
Create and initialise a libxml2-level parser context.
|
| 980 |
+
"""
|
| 981 |
+
cdef xmlparser.xmlParserCtxt* c_ctxt
|
| 982 |
+
if self._for_html:
|
| 983 |
+
c_ctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5)
|
| 984 |
+
if c_ctxt is not NULL:
|
| 985 |
+
self._registerHtmlErrorHandler(c_ctxt)
|
| 986 |
+
else:
|
| 987 |
+
c_ctxt = xmlparser.xmlNewParserCtxt()
|
| 988 |
+
if c_ctxt is NULL:
|
| 989 |
+
raise MemoryError
|
| 990 |
+
c_ctxt.sax.startDocument = _initSaxDocument
|
| 991 |
+
return c_ctxt
|
| 992 |
+
|
| 993 |
+
cdef xmlparser.xmlParserCtxt* _newPushParserCtxt(self) except NULL:
|
| 994 |
+
cdef xmlparser.xmlParserCtxt* c_ctxt
|
| 995 |
+
cdef char* c_filename = _cstr(self._filename) if self._filename is not None else NULL
|
| 996 |
+
if self._for_html:
|
| 997 |
+
c_ctxt = htmlparser.htmlCreatePushParserCtxt(
|
| 998 |
+
NULL, NULL, NULL, 0, c_filename, tree.XML_CHAR_ENCODING_NONE)
|
| 999 |
+
if c_ctxt is not NULL:
|
| 1000 |
+
self._registerHtmlErrorHandler(c_ctxt)
|
| 1001 |
+
htmlparser.htmlCtxtUseOptions(c_ctxt, self._parse_options)
|
| 1002 |
+
else:
|
| 1003 |
+
c_ctxt = xmlparser.xmlCreatePushParserCtxt(
|
| 1004 |
+
NULL, NULL, NULL, 0, c_filename)
|
| 1005 |
+
if c_ctxt is not NULL:
|
| 1006 |
+
xmlparser.xmlCtxtUseOptions(c_ctxt, self._parse_options)
|
| 1007 |
+
if c_ctxt is NULL:
|
| 1008 |
+
raise MemoryError()
|
| 1009 |
+
c_ctxt.sax.startDocument = _initSaxDocument
|
| 1010 |
+
return c_ctxt
|
| 1011 |
+
|
| 1012 |
+
@property
|
| 1013 |
+
def error_log(self):
|
| 1014 |
+
"""The error log of the last parser run.
|
| 1015 |
+
"""
|
| 1016 |
+
cdef _ParserContext context
|
| 1017 |
+
context = self._getParserContext()
|
| 1018 |
+
return context._error_log.copy()
|
| 1019 |
+
|
| 1020 |
+
@property
|
| 1021 |
+
def resolvers(self):
|
| 1022 |
+
"""The custom resolver registry of this parser."""
|
| 1023 |
+
return self._resolvers
|
| 1024 |
+
|
| 1025 |
+
@property
|
| 1026 |
+
def version(self):
|
| 1027 |
+
"""The version of the underlying XML parser."""
|
| 1028 |
+
return "libxml2 %d.%d.%d" % LIBXML_VERSION
|
| 1029 |
+
|
| 1030 |
+
def set_element_class_lookup(self, ElementClassLookup lookup = None):
|
| 1031 |
+
"""set_element_class_lookup(self, lookup = None)
|
| 1032 |
+
|
| 1033 |
+
Set a lookup scheme for element classes generated from this parser.
|
| 1034 |
+
|
| 1035 |
+
Reset it by passing None or nothing.
|
| 1036 |
+
"""
|
| 1037 |
+
self._class_lookup = lookup
|
| 1038 |
+
|
| 1039 |
+
cdef _BaseParser _copy(self):
|
| 1040 |
+
"Create a new parser with the same configuration."
|
| 1041 |
+
cdef _BaseParser parser
|
| 1042 |
+
parser = self.__class__()
|
| 1043 |
+
parser._parse_options = self._parse_options
|
| 1044 |
+
parser._for_html = self._for_html
|
| 1045 |
+
parser._remove_comments = self._remove_comments
|
| 1046 |
+
parser._remove_pis = self._remove_pis
|
| 1047 |
+
parser._strip_cdata = self._strip_cdata
|
| 1048 |
+
parser._filename = self._filename
|
| 1049 |
+
parser._resolvers = self._resolvers
|
| 1050 |
+
parser.target = self.target
|
| 1051 |
+
parser._class_lookup = self._class_lookup
|
| 1052 |
+
parser._default_encoding = self._default_encoding
|
| 1053 |
+
parser._schema = self._schema
|
| 1054 |
+
parser._events_to_collect = self._events_to_collect
|
| 1055 |
+
return parser
|
| 1056 |
+
|
| 1057 |
+
def copy(self):
|
| 1058 |
+
"""copy(self)
|
| 1059 |
+
|
| 1060 |
+
Create a new parser with the same configuration.
|
| 1061 |
+
"""
|
| 1062 |
+
return self._copy()
|
| 1063 |
+
|
| 1064 |
+
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
|
| 1065 |
+
"""makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
|
| 1066 |
+
|
| 1067 |
+
Creates a new element associated with this parser.
|
| 1068 |
+
"""
|
| 1069 |
+
return _makeElement(_tag, NULL, None, self, None, None,
|
| 1070 |
+
attrib, nsmap, _extra)
|
| 1071 |
+
|
| 1072 |
+
# internal parser methods
|
| 1073 |
+
|
| 1074 |
+
cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL:
|
| 1075 |
+
"""Parse unicode document, share dictionary if possible.
|
| 1076 |
+
"""
|
| 1077 |
+
cdef _ParserContext context
|
| 1078 |
+
cdef xmlDoc* result
|
| 1079 |
+
cdef xmlparser.xmlParserCtxt* pctxt
|
| 1080 |
+
cdef Py_ssize_t py_buffer_len
|
| 1081 |
+
cdef int buffer_len, c_kind
|
| 1082 |
+
cdef const_char* c_text
|
| 1083 |
+
cdef const_char* c_encoding = _PY_UNICODE_ENCODING
|
| 1084 |
+
if python.PyUnicode_IS_READY(utext):
|
| 1085 |
+
# PEP-393 string
|
| 1086 |
+
c_text = <const_char*>python.PyUnicode_DATA(utext)
|
| 1087 |
+
py_buffer_len = python.PyUnicode_GET_LENGTH(utext)
|
| 1088 |
+
c_kind = python.PyUnicode_KIND(utext)
|
| 1089 |
+
if c_kind == 1:
|
| 1090 |
+
if python.PyUnicode_MAX_CHAR_VALUE(utext) <= 127:
|
| 1091 |
+
c_encoding = 'UTF-8'
|
| 1092 |
+
else:
|
| 1093 |
+
c_encoding = 'ISO-8859-1'
|
| 1094 |
+
elif c_kind == 2:
|
| 1095 |
+
py_buffer_len *= 2
|
| 1096 |
+
if python.PY_BIG_ENDIAN:
|
| 1097 |
+
c_encoding = 'UTF-16BE' # actually UCS-2
|
| 1098 |
+
else:
|
| 1099 |
+
c_encoding = 'UTF-16LE' # actually UCS-2
|
| 1100 |
+
elif c_kind == 4:
|
| 1101 |
+
py_buffer_len *= 4
|
| 1102 |
+
if python.PY_BIG_ENDIAN:
|
| 1103 |
+
c_encoding = 'UTF-32BE' # actually UCS-4
|
| 1104 |
+
else:
|
| 1105 |
+
c_encoding = 'UTF-32LE' # actually UCS-4
|
| 1106 |
+
else:
|
| 1107 |
+
assert False, f"Illegal Unicode kind {c_kind}"
|
| 1108 |
+
else:
|
| 1109 |
+
# old Py_UNICODE string
|
| 1110 |
+
py_buffer_len = python.PyUnicode_GET_DATA_SIZE(utext)
|
| 1111 |
+
c_text = python.PyUnicode_AS_DATA(utext)
|
| 1112 |
+
assert 0 <= py_buffer_len <= limits.INT_MAX
|
| 1113 |
+
buffer_len = py_buffer_len
|
| 1114 |
+
|
| 1115 |
+
context = self._getParserContext()
|
| 1116 |
+
context.prepare()
|
| 1117 |
+
try:
|
| 1118 |
+
pctxt = context._c_ctxt
|
| 1119 |
+
__GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
|
| 1120 |
+
orig_options = pctxt.options
|
| 1121 |
+
with nogil:
|
| 1122 |
+
if self._for_html:
|
| 1123 |
+
result = htmlparser.htmlCtxtReadMemory(
|
| 1124 |
+
pctxt, c_text, buffer_len, c_filename, c_encoding,
|
| 1125 |
+
self._parse_options)
|
| 1126 |
+
if result is not NULL:
|
| 1127 |
+
if _fixHtmlDictNames(pctxt.dict, result) < 0:
|
| 1128 |
+
tree.xmlFreeDoc(result)
|
| 1129 |
+
result = NULL
|
| 1130 |
+
else:
|
| 1131 |
+
result = xmlparser.xmlCtxtReadMemory(
|
| 1132 |
+
pctxt, c_text, buffer_len, c_filename, c_encoding,
|
| 1133 |
+
self._parse_options)
|
| 1134 |
+
pctxt.options = orig_options # work around libxml2 problem
|
| 1135 |
+
|
| 1136 |
+
return context._handleParseResultDoc(self, result, None)
|
| 1137 |
+
finally:
|
| 1138 |
+
context.cleanup()
|
| 1139 |
+
|
| 1140 |
+
cdef xmlDoc* _parseDoc(self, const char* c_text, int c_len, char* c_filename) except NULL:
|
| 1141 |
+
"""Parse document, share dictionary if possible.
|
| 1142 |
+
"""
|
| 1143 |
+
cdef _ParserContext context
|
| 1144 |
+
cdef xmlDoc* result
|
| 1145 |
+
cdef xmlparser.xmlParserCtxt* pctxt
|
| 1146 |
+
cdef char* c_encoding
|
| 1147 |
+
cdef tree.xmlCharEncoding enc
|
| 1148 |
+
context = self._getParserContext()
|
| 1149 |
+
context.prepare()
|
| 1150 |
+
try:
|
| 1151 |
+
pctxt = context._c_ctxt
|
| 1152 |
+
__GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
|
| 1153 |
+
|
| 1154 |
+
if self._default_encoding is None:
|
| 1155 |
+
c_encoding = NULL
|
| 1156 |
+
# libxml2 (at least 2.9.3) does not recognise UTF-32 BOMs
|
| 1157 |
+
# NOTE: limit to problematic cases because it changes character offsets
|
| 1158 |
+
if c_len >= 4 and (c_text[0] == b'\xFF' and c_text[1] == b'\xFE' and
|
| 1159 |
+
c_text[2] == 0 and c_text[3] == 0):
|
| 1160 |
+
c_encoding = "UTF-32LE"
|
| 1161 |
+
c_text += 4
|
| 1162 |
+
c_len -= 4
|
| 1163 |
+
elif c_len >= 4 and (c_text[0] == 0 and c_text[1] == 0 and
|
| 1164 |
+
c_text[2] == b'\xFE' and c_text[3] == b'\xFF'):
|
| 1165 |
+
c_encoding = "UTF-32BE"
|
| 1166 |
+
c_text += 4
|
| 1167 |
+
c_len -= 4
|
| 1168 |
+
else:
|
| 1169 |
+
# no BOM => try to determine encoding
|
| 1170 |
+
enc = tree.xmlDetectCharEncoding(<const_xmlChar*>c_text, c_len)
|
| 1171 |
+
if enc == tree.XML_CHAR_ENCODING_UCS4LE:
|
| 1172 |
+
c_encoding = 'UTF-32LE'
|
| 1173 |
+
elif enc == tree.XML_CHAR_ENCODING_UCS4BE:
|
| 1174 |
+
c_encoding = 'UTF-32BE'
|
| 1175 |
+
else:
|
| 1176 |
+
c_encoding = _cstr(self._default_encoding)
|
| 1177 |
+
|
| 1178 |
+
orig_options = pctxt.options
|
| 1179 |
+
with nogil:
|
| 1180 |
+
if self._for_html:
|
| 1181 |
+
result = htmlparser.htmlCtxtReadMemory(
|
| 1182 |
+
pctxt, c_text, c_len, c_filename,
|
| 1183 |
+
c_encoding, self._parse_options)
|
| 1184 |
+
if result is not NULL:
|
| 1185 |
+
if _fixHtmlDictNames(pctxt.dict, result) < 0:
|
| 1186 |
+
tree.xmlFreeDoc(result)
|
| 1187 |
+
result = NULL
|
| 1188 |
+
else:
|
| 1189 |
+
result = xmlparser.xmlCtxtReadMemory(
|
| 1190 |
+
pctxt, c_text, c_len, c_filename,
|
| 1191 |
+
c_encoding, self._parse_options)
|
| 1192 |
+
pctxt.options = orig_options # work around libxml2 problem
|
| 1193 |
+
|
| 1194 |
+
return context._handleParseResultDoc(self, result, None)
|
| 1195 |
+
finally:
|
| 1196 |
+
context.cleanup()
|
| 1197 |
+
|
| 1198 |
+
cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL:
|
| 1199 |
+
cdef _ParserContext context
|
| 1200 |
+
cdef xmlDoc* result
|
| 1201 |
+
cdef xmlparser.xmlParserCtxt* pctxt
|
| 1202 |
+
cdef char* c_encoding
|
| 1203 |
+
result = NULL
|
| 1204 |
+
|
| 1205 |
+
context = self._getParserContext()
|
| 1206 |
+
context.prepare()
|
| 1207 |
+
try:
|
| 1208 |
+
pctxt = context._c_ctxt
|
| 1209 |
+
__GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
|
| 1210 |
+
|
| 1211 |
+
if self._default_encoding is None:
|
| 1212 |
+
c_encoding = NULL
|
| 1213 |
+
else:
|
| 1214 |
+
c_encoding = _cstr(self._default_encoding)
|
| 1215 |
+
|
| 1216 |
+
orig_options = pctxt.options
|
| 1217 |
+
with nogil:
|
| 1218 |
+
if self._for_html:
|
| 1219 |
+
result = htmlparser.htmlCtxtReadFile(
|
| 1220 |
+
pctxt, c_filename, c_encoding, self._parse_options)
|
| 1221 |
+
if result is not NULL:
|
| 1222 |
+
if _fixHtmlDictNames(pctxt.dict, result) < 0:
|
| 1223 |
+
tree.xmlFreeDoc(result)
|
| 1224 |
+
result = NULL
|
| 1225 |
+
else:
|
| 1226 |
+
result = xmlparser.xmlCtxtReadFile(
|
| 1227 |
+
pctxt, c_filename, c_encoding, self._parse_options)
|
| 1228 |
+
pctxt.options = orig_options # work around libxml2 problem
|
| 1229 |
+
|
| 1230 |
+
return context._handleParseResultDoc(self, result, c_filename)
|
| 1231 |
+
finally:
|
| 1232 |
+
context.cleanup()
|
| 1233 |
+
|
| 1234 |
+
cdef xmlDoc* _parseDocFromFilelike(self, filelike, filename,
|
| 1235 |
+
encoding) except NULL:
|
| 1236 |
+
cdef _ParserContext context
|
| 1237 |
+
cdef _FileReaderContext file_context
|
| 1238 |
+
cdef xmlDoc* result
|
| 1239 |
+
cdef xmlparser.xmlParserCtxt* pctxt
|
| 1240 |
+
cdef char* c_filename
|
| 1241 |
+
if not filename:
|
| 1242 |
+
filename = None
|
| 1243 |
+
|
| 1244 |
+
context = self._getParserContext()
|
| 1245 |
+
context.prepare()
|
| 1246 |
+
try:
|
| 1247 |
+
pctxt = context._c_ctxt
|
| 1248 |
+
__GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
|
| 1249 |
+
file_context = _FileReaderContext(
|
| 1250 |
+
filelike, context, filename,
|
| 1251 |
+
encoding or self._default_encoding)
|
| 1252 |
+
result = file_context._readDoc(pctxt, self._parse_options)
|
| 1253 |
+
|
| 1254 |
+
return context._handleParseResultDoc(
|
| 1255 |
+
self, result, filename)
|
| 1256 |
+
finally:
|
| 1257 |
+
context.cleanup()
|
| 1258 |
+
|
| 1259 |
+
|
| 1260 |
+
cdef tree.xmlEntity* _getInternalEntityOnly(void* ctxt, const_xmlChar* name) noexcept nogil:
|
| 1261 |
+
"""
|
| 1262 |
+
Callback function to intercept the entity resolution when external entity loading is disabled.
|
| 1263 |
+
"""
|
| 1264 |
+
cdef tree.xmlEntity* entity = xmlparser.xmlSAX2GetEntity(ctxt, name)
|
| 1265 |
+
if not entity:
|
| 1266 |
+
return NULL
|
| 1267 |
+
if entity.etype not in (
|
| 1268 |
+
tree.xmlEntityType.XML_EXTERNAL_GENERAL_PARSED_ENTITY,
|
| 1269 |
+
tree.xmlEntityType.XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
|
| 1270 |
+
tree.xmlEntityType.XML_EXTERNAL_PARAMETER_ENTITY):
|
| 1271 |
+
return entity
|
| 1272 |
+
|
| 1273 |
+
# Reject all external entities and fail the parsing instead. There is currently
|
| 1274 |
+
# no way in libxml2 to just prevent the entity resolution in this case.
|
| 1275 |
+
cdef xmlerror.xmlError c_error
|
| 1276 |
+
cdef xmlerror.xmlStructuredErrorFunc err_func
|
| 1277 |
+
cdef xmlparser.xmlParserInput* parser_input
|
| 1278 |
+
cdef void* err_context
|
| 1279 |
+
|
| 1280 |
+
c_ctxt = <xmlparser.xmlParserCtxt *> ctxt
|
| 1281 |
+
err_func = xmlerror.xmlStructuredError
|
| 1282 |
+
if err_func:
|
| 1283 |
+
parser_input = c_ctxt.input
|
| 1284 |
+
# Copied from xmlVErrParser() in libxml2: get current input from stack.
|
| 1285 |
+
if parser_input and parser_input.filename is NULL and c_ctxt.inputNr > 1:
|
| 1286 |
+
parser_input = c_ctxt.inputTab[c_ctxt.inputNr - 2]
|
| 1287 |
+
|
| 1288 |
+
c_error = xmlerror.xmlError(
|
| 1289 |
+
domain=xmlerror.xmlErrorDomain.XML_FROM_PARSER,
|
| 1290 |
+
code=xmlerror.xmlParserErrors.XML_ERR_EXT_ENTITY_STANDALONE,
|
| 1291 |
+
level=xmlerror.xmlErrorLevel.XML_ERR_FATAL,
|
| 1292 |
+
message=b"External entity resolution is disabled for security reasons "
|
| 1293 |
+
b"when resolving '&%s;'. Use 'XMLParser(resolve_entities=True)' "
|
| 1294 |
+
b"if you consider it safe to enable it.",
|
| 1295 |
+
file=parser_input.filename,
|
| 1296 |
+
node=entity,
|
| 1297 |
+
str1=<char*> name,
|
| 1298 |
+
str2=NULL,
|
| 1299 |
+
str3=NULL,
|
| 1300 |
+
line=parser_input.line if parser_input else 0,
|
| 1301 |
+
int1=0,
|
| 1302 |
+
int2=parser_input.col if parser_input else 0,
|
| 1303 |
+
)
|
| 1304 |
+
err_context = xmlerror.xmlStructuredErrorContext
|
| 1305 |
+
err_func(err_context, &c_error)
|
| 1306 |
+
|
| 1307 |
+
c_ctxt.wellFormed = 0
|
| 1308 |
+
# The entity was looked up and does not need to be freed.
|
| 1309 |
+
return NULL
|
| 1310 |
+
|
| 1311 |
+
|
| 1312 |
+
cdef void _initSaxDocument(void* ctxt) noexcept with gil:
|
| 1313 |
+
xmlparser.xmlSAX2StartDocument(ctxt)
|
| 1314 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 1315 |
+
c_doc = c_ctxt.myDoc
|
| 1316 |
+
|
| 1317 |
+
# set up document dict
|
| 1318 |
+
if c_doc and c_ctxt.dict and not c_doc.dict:
|
| 1319 |
+
# I have no idea why libxml2 disables this - we need it
|
| 1320 |
+
c_ctxt.dictNames = 1
|
| 1321 |
+
c_doc.dict = c_ctxt.dict
|
| 1322 |
+
xmlparser.xmlDictReference(c_ctxt.dict)
|
| 1323 |
+
|
| 1324 |
+
# set up XML ID hash table
|
| 1325 |
+
if c_ctxt._private:
|
| 1326 |
+
context = <_ParserContext>c_ctxt._private
|
| 1327 |
+
if context._collect_ids:
|
| 1328 |
+
# keep the global parser dict from filling up with XML IDs
|
| 1329 |
+
if c_doc and not c_doc.ids:
|
| 1330 |
+
# memory errors are not fatal here
|
| 1331 |
+
c_dict = xmlparser.xmlDictCreate()
|
| 1332 |
+
if c_dict:
|
| 1333 |
+
c_doc.ids = tree.xmlHashCreateDict(0, c_dict)
|
| 1334 |
+
xmlparser.xmlDictFree(c_dict)
|
| 1335 |
+
else:
|
| 1336 |
+
c_doc.ids = tree.xmlHashCreate(0)
|
| 1337 |
+
else:
|
| 1338 |
+
c_ctxt.loadsubset |= xmlparser.XML_SKIP_IDS
|
| 1339 |
+
if c_doc and c_doc.ids and not tree.xmlHashSize(c_doc.ids):
|
| 1340 |
+
# already initialised but empty => clear
|
| 1341 |
+
tree.xmlHashFree(c_doc.ids, NULL)
|
| 1342 |
+
c_doc.ids = NULL
|
| 1343 |
+
|
| 1344 |
+
|
| 1345 |
+
############################################################
|
| 1346 |
+
## ET feed parser
|
| 1347 |
+
############################################################
|
| 1348 |
+
|
| 1349 |
+
cdef class _FeedParser(_BaseParser):
|
| 1350 |
+
cdef bint _feed_parser_running
|
| 1351 |
+
|
| 1352 |
+
@property
|
| 1353 |
+
def feed_error_log(self):
|
| 1354 |
+
"""The error log of the last (or current) run of the feed parser.
|
| 1355 |
+
|
| 1356 |
+
Note that this is local to the feed parser and thus is
|
| 1357 |
+
different from what the ``error_log`` property returns.
|
| 1358 |
+
"""
|
| 1359 |
+
return self._getPushParserContext()._error_log.copy()
|
| 1360 |
+
|
| 1361 |
+
cpdef feed(self, data):
|
| 1362 |
+
"""feed(self, data)
|
| 1363 |
+
|
| 1364 |
+
Feeds data to the parser. The argument should be an 8-bit string
|
| 1365 |
+
buffer containing encoded data, although Unicode is supported as long
|
| 1366 |
+
as both string types are not mixed.
|
| 1367 |
+
|
| 1368 |
+
This is the main entry point to the consumer interface of a
|
| 1369 |
+
parser. The parser will parse as much of the XML stream as it
|
| 1370 |
+
can on each call. To finish parsing or to reset the parser,
|
| 1371 |
+
call the ``close()`` method. Both methods may raise
|
| 1372 |
+
ParseError if errors occur in the input data. If an error is
|
| 1373 |
+
raised, there is no longer a need to call ``close()``.
|
| 1374 |
+
|
| 1375 |
+
The feed parser interface is independent of the normal parser
|
| 1376 |
+
usage. You can use the same parser as a feed parser and in
|
| 1377 |
+
the ``parse()`` function concurrently.
|
| 1378 |
+
"""
|
| 1379 |
+
cdef _ParserContext context
|
| 1380 |
+
cdef bytes bstring
|
| 1381 |
+
cdef xmlparser.xmlParserCtxt* pctxt
|
| 1382 |
+
cdef Py_ssize_t py_buffer_len, ustart
|
| 1383 |
+
cdef const_char* char_data
|
| 1384 |
+
cdef const_char* c_encoding
|
| 1385 |
+
cdef int buffer_len
|
| 1386 |
+
cdef int error
|
| 1387 |
+
cdef bint recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
|
| 1388 |
+
|
| 1389 |
+
if isinstance(data, bytes):
|
| 1390 |
+
if self._default_encoding is None:
|
| 1391 |
+
c_encoding = NULL
|
| 1392 |
+
else:
|
| 1393 |
+
c_encoding = self._default_encoding
|
| 1394 |
+
char_data = _cstr(data)
|
| 1395 |
+
py_buffer_len = python.PyBytes_GET_SIZE(data)
|
| 1396 |
+
ustart = 0
|
| 1397 |
+
elif isinstance(data, unicode):
|
| 1398 |
+
c_encoding = b"UTF-8"
|
| 1399 |
+
char_data = NULL
|
| 1400 |
+
py_buffer_len = len(<unicode> data)
|
| 1401 |
+
ustart = 0
|
| 1402 |
+
else:
|
| 1403 |
+
raise TypeError, "Parsing requires string data"
|
| 1404 |
+
|
| 1405 |
+
context = self._getPushParserContext()
|
| 1406 |
+
pctxt = context._c_ctxt
|
| 1407 |
+
error = 0
|
| 1408 |
+
if not self._feed_parser_running:
|
| 1409 |
+
context.prepare(set_document_loader=False)
|
| 1410 |
+
self._feed_parser_running = 1
|
| 1411 |
+
c_filename = (_cstr(self._filename)
|
| 1412 |
+
if self._filename is not None else NULL)
|
| 1413 |
+
|
| 1414 |
+
# We have to give *mlCtxtResetPush() enough input to figure
|
| 1415 |
+
# out the character encoding (at least four bytes),
|
| 1416 |
+
# however if we give it all we got, we'll have nothing for
|
| 1417 |
+
# *mlParseChunk() and things go wrong.
|
| 1418 |
+
buffer_len = 0
|
| 1419 |
+
if char_data is not NULL:
|
| 1420 |
+
buffer_len = 4 if py_buffer_len > 4 else <int>py_buffer_len
|
| 1421 |
+
orig_loader = _register_document_loader()
|
| 1422 |
+
if self._for_html:
|
| 1423 |
+
error = _htmlCtxtResetPush(
|
| 1424 |
+
pctxt, char_data, buffer_len, c_filename, c_encoding,
|
| 1425 |
+
self._parse_options)
|
| 1426 |
+
else:
|
| 1427 |
+
xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options)
|
| 1428 |
+
error = xmlparser.xmlCtxtResetPush(
|
| 1429 |
+
pctxt, char_data, buffer_len, c_filename, c_encoding)
|
| 1430 |
+
_reset_document_loader(orig_loader)
|
| 1431 |
+
py_buffer_len -= buffer_len
|
| 1432 |
+
char_data += buffer_len
|
| 1433 |
+
if error:
|
| 1434 |
+
raise MemoryError()
|
| 1435 |
+
__GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
|
| 1436 |
+
|
| 1437 |
+
#print pctxt.charset, 'NONE' if c_encoding is NULL else c_encoding
|
| 1438 |
+
|
| 1439 |
+
fixup_error = 0
|
| 1440 |
+
while py_buffer_len > 0 and (error == 0 or recover):
|
| 1441 |
+
if char_data is NULL:
|
| 1442 |
+
# Unicode parsing by converting chunks to UTF-8
|
| 1443 |
+
buffer_len = 2**19 # len(bytes) <= 4 * (2**19) == 2 MiB
|
| 1444 |
+
bstring = (<unicode> data)[ustart : ustart+buffer_len].encode('UTF-8')
|
| 1445 |
+
ustart += buffer_len
|
| 1446 |
+
py_buffer_len -= buffer_len # may end up < 0
|
| 1447 |
+
error, fixup_error = _parse_data_chunk(pctxt, <const char*> bstring, <int> len(bstring))
|
| 1448 |
+
else:
|
| 1449 |
+
# Direct byte string parsing.
|
| 1450 |
+
buffer_len = <int>py_buffer_len if py_buffer_len <= limits.INT_MAX else limits.INT_MAX
|
| 1451 |
+
error, fixup_error = _parse_data_chunk(pctxt, char_data, buffer_len)
|
| 1452 |
+
py_buffer_len -= buffer_len
|
| 1453 |
+
char_data += buffer_len
|
| 1454 |
+
|
| 1455 |
+
if fixup_error:
|
| 1456 |
+
context.store_exception(MemoryError())
|
| 1457 |
+
|
| 1458 |
+
if context._has_raised():
|
| 1459 |
+
# propagate Python exceptions immediately
|
| 1460 |
+
recover = 0
|
| 1461 |
+
error = 1
|
| 1462 |
+
break
|
| 1463 |
+
|
| 1464 |
+
if error and not pctxt.replaceEntities and not pctxt.validate:
|
| 1465 |
+
# in this mode, we ignore errors about undefined entities
|
| 1466 |
+
for entry in context._error_log.filter_from_errors():
|
| 1467 |
+
if entry.type != ErrorTypes.WAR_UNDECLARED_ENTITY and \
|
| 1468 |
+
entry.type != ErrorTypes.ERR_UNDECLARED_ENTITY:
|
| 1469 |
+
break
|
| 1470 |
+
else:
|
| 1471 |
+
error = 0
|
| 1472 |
+
|
| 1473 |
+
if not pctxt.wellFormed and xmlparser.xmlCtxtIsStopped(pctxt) and context._has_raised():
|
| 1474 |
+
# propagate Python exceptions immediately
|
| 1475 |
+
recover = 0
|
| 1476 |
+
error = 1
|
| 1477 |
+
|
| 1478 |
+
if fixup_error or not recover and (error or not pctxt.wellFormed):
|
| 1479 |
+
self._feed_parser_running = 0
|
| 1480 |
+
try:
|
| 1481 |
+
context._handleParseResult(self, pctxt.myDoc, None)
|
| 1482 |
+
finally:
|
| 1483 |
+
context.cleanup()
|
| 1484 |
+
|
| 1485 |
+
cpdef close(self):
|
| 1486 |
+
"""close(self)
|
| 1487 |
+
|
| 1488 |
+
Terminates feeding data to this parser. This tells the parser to
|
| 1489 |
+
process any remaining data in the feed buffer, and then returns the
|
| 1490 |
+
root Element of the tree that was parsed.
|
| 1491 |
+
|
| 1492 |
+
This method must be called after passing the last chunk of data into
|
| 1493 |
+
the ``feed()`` method. It should only be called when using the feed
|
| 1494 |
+
parser interface, all other usage is undefined.
|
| 1495 |
+
"""
|
| 1496 |
+
if not self._feed_parser_running:
|
| 1497 |
+
raise XMLSyntaxError("no element found",
|
| 1498 |
+
xmlerror.XML_ERR_INTERNAL_ERROR, 0, 0,
|
| 1499 |
+
self._filename)
|
| 1500 |
+
|
| 1501 |
+
context = self._getPushParserContext()
|
| 1502 |
+
pctxt = context._c_ctxt
|
| 1503 |
+
|
| 1504 |
+
self._feed_parser_running = 0
|
| 1505 |
+
if self._for_html:
|
| 1506 |
+
htmlparser.htmlParseChunk(pctxt, NULL, 0, 1)
|
| 1507 |
+
else:
|
| 1508 |
+
xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
|
| 1509 |
+
|
| 1510 |
+
if (pctxt.recovery and not xmlparser.xmlCtxtIsStopped(pctxt) and
|
| 1511 |
+
isinstance(context, _SaxParserContext)):
|
| 1512 |
+
# apply any left-over 'end' events
|
| 1513 |
+
(<_SaxParserContext>context).flushEvents()
|
| 1514 |
+
|
| 1515 |
+
try:
|
| 1516 |
+
result = context._handleParseResult(self, pctxt.myDoc, None)
|
| 1517 |
+
finally:
|
| 1518 |
+
context.cleanup()
|
| 1519 |
+
|
| 1520 |
+
if isinstance(result, _Document):
|
| 1521 |
+
return (<_Document>result).getroot()
|
| 1522 |
+
else:
|
| 1523 |
+
return result
|
| 1524 |
+
|
| 1525 |
+
|
| 1526 |
+
cdef (int, int) _parse_data_chunk(xmlparser.xmlParserCtxt* c_ctxt,
|
| 1527 |
+
const char* char_data, int buffer_len):
|
| 1528 |
+
fixup_error = 0
|
| 1529 |
+
with nogil:
|
| 1530 |
+
if c_ctxt.html:
|
| 1531 |
+
c_node = c_ctxt.node # last node where the parser stopped
|
| 1532 |
+
orig_loader = _register_document_loader()
|
| 1533 |
+
error = htmlparser.htmlParseChunk(c_ctxt, char_data, buffer_len, 0)
|
| 1534 |
+
_reset_document_loader(orig_loader)
|
| 1535 |
+
# and now for the fun part: move node names to the dict
|
| 1536 |
+
if c_ctxt.myDoc:
|
| 1537 |
+
fixup_error = _fixHtmlDictSubtreeNames(
|
| 1538 |
+
c_ctxt.dict, c_ctxt.myDoc, c_node)
|
| 1539 |
+
if c_ctxt.myDoc.dict and c_ctxt.myDoc.dict is not c_ctxt.dict:
|
| 1540 |
+
xmlparser.xmlDictFree(c_ctxt.myDoc.dict)
|
| 1541 |
+
c_ctxt.myDoc.dict = c_ctxt.dict
|
| 1542 |
+
xmlparser.xmlDictReference(c_ctxt.dict)
|
| 1543 |
+
else:
|
| 1544 |
+
orig_loader = _register_document_loader()
|
| 1545 |
+
error = xmlparser.xmlParseChunk(c_ctxt, char_data, buffer_len, 0)
|
| 1546 |
+
_reset_document_loader(orig_loader)
|
| 1547 |
+
return (error, fixup_error)
|
| 1548 |
+
|
| 1549 |
+
|
| 1550 |
+
cdef int _htmlCtxtResetPush(xmlparser.xmlParserCtxt* c_ctxt,
|
| 1551 |
+
const_char* c_data, int buffer_len,
|
| 1552 |
+
const_char* c_filename, const_char* c_encoding,
|
| 1553 |
+
int parse_options) except -1:
|
| 1554 |
+
cdef xmlparser.xmlParserInput* c_input_stream
|
| 1555 |
+
# libxml2 lacks an HTML push parser setup function
|
| 1556 |
+
error = xmlparser.xmlCtxtResetPush(
|
| 1557 |
+
c_ctxt, c_data, buffer_len, c_filename, c_encoding)
|
| 1558 |
+
if error:
|
| 1559 |
+
return error
|
| 1560 |
+
|
| 1561 |
+
# fix libxml2 setup for HTML
|
| 1562 |
+
if tree.LIBXML_VERSION < 21400:
|
| 1563 |
+
c_ctxt.progressive = 1 # TODO: remove
|
| 1564 |
+
c_ctxt.html = 1
|
| 1565 |
+
htmlparser.htmlCtxtUseOptions(c_ctxt, parse_options)
|
| 1566 |
+
|
| 1567 |
+
return 0
|
| 1568 |
+
|
| 1569 |
+
|
| 1570 |
+
############################################################
|
| 1571 |
+
## XML parser
|
| 1572 |
+
############################################################
|
| 1573 |
+
|
| 1574 |
+
cdef int _XML_DEFAULT_PARSE_OPTIONS
|
| 1575 |
+
_XML_DEFAULT_PARSE_OPTIONS = (
|
| 1576 |
+
xmlparser.XML_PARSE_NOENT |
|
| 1577 |
+
xmlparser.XML_PARSE_NOCDATA |
|
| 1578 |
+
xmlparser.XML_PARSE_NONET |
|
| 1579 |
+
xmlparser.XML_PARSE_COMPACT |
|
| 1580 |
+
xmlparser.XML_PARSE_BIG_LINES
|
| 1581 |
+
)
|
| 1582 |
+
|
| 1583 |
+
cdef class XMLParser(_FeedParser):
|
| 1584 |
+
"""XMLParser(self, encoding=None, attribute_defaults=False, dtd_validation=False, \
|
| 1585 |
+
load_dtd=False, no_network=True, decompress=False, ns_clean=False, \
|
| 1586 |
+
recover=False, schema: XMLSchema =None, huge_tree=False, \
|
| 1587 |
+
remove_blank_text=False, resolve_entities=True, \
|
| 1588 |
+
remove_comments=False, remove_pis=False, strip_cdata=True, \
|
| 1589 |
+
collect_ids=True, target=None, compact=True)
|
| 1590 |
+
|
| 1591 |
+
The XML parser.
|
| 1592 |
+
|
| 1593 |
+
Parsers can be supplied as additional argument to various parse
|
| 1594 |
+
functions of the lxml API. A default parser is always available
|
| 1595 |
+
and can be replaced by a call to the global function
|
| 1596 |
+
'set_default_parser'. New parsers can be created at any time
|
| 1597 |
+
without a major run-time overhead.
|
| 1598 |
+
|
| 1599 |
+
The keyword arguments in the constructor are mainly based on the
|
| 1600 |
+
libxml2 parser configuration. A DTD will also be loaded if DTD
|
| 1601 |
+
validation or attribute default values are requested (unless you
|
| 1602 |
+
additionally provide an XMLSchema from which the default
|
| 1603 |
+
attributes can be read).
|
| 1604 |
+
|
| 1605 |
+
Available boolean keyword arguments:
|
| 1606 |
+
|
| 1607 |
+
- attribute_defaults - inject default attributes from DTD or XMLSchema
|
| 1608 |
+
- dtd_validation - validate against a DTD referenced by the document
|
| 1609 |
+
- load_dtd - use DTD for parsing
|
| 1610 |
+
- no_network - prevent network access for related files (default: True)
|
| 1611 |
+
- decompress - automatically decompress gzip input
|
| 1612 |
+
(default: False, changed in lxml 6.0, disabling only affects libxml2 2.15+)
|
| 1613 |
+
- ns_clean - clean up redundant namespace declarations
|
| 1614 |
+
- recover - try hard to parse through broken XML
|
| 1615 |
+
- remove_blank_text - discard blank text nodes that appear ignorable
|
| 1616 |
+
- remove_comments - discard comments
|
| 1617 |
+
- remove_pis - discard processing instructions
|
| 1618 |
+
- strip_cdata - replace CDATA sections by normal text content (default: True)
|
| 1619 |
+
- compact - save memory for short text content (default: True)
|
| 1620 |
+
- collect_ids - use a hash table of XML IDs for fast access
|
| 1621 |
+
(default: True, always True with DTD validation)
|
| 1622 |
+
- huge_tree - disable security restrictions and support very deep trees
|
| 1623 |
+
and very long text content
|
| 1624 |
+
|
| 1625 |
+
Other keyword arguments:
|
| 1626 |
+
|
| 1627 |
+
- resolve_entities - replace entities by their text value: False for keeping the
|
| 1628 |
+
entity references, True for resolving them, and 'internal' for resolving
|
| 1629 |
+
internal definitions only (no external file/URL access).
|
| 1630 |
+
The default used to be True and was changed to 'internal' in lxml 5.0.
|
| 1631 |
+
- encoding - override the document encoding (note: libiconv encoding name)
|
| 1632 |
+
- target - a parser target object that will receive the parse events
|
| 1633 |
+
- schema - an XMLSchema to validate against
|
| 1634 |
+
|
| 1635 |
+
Note that you should avoid sharing parsers between threads. While this is
|
| 1636 |
+
not harmful, it is more efficient to use separate parsers. This does not
|
| 1637 |
+
apply to the default parser.
|
| 1638 |
+
"""
|
| 1639 |
+
def __init__(self, *, encoding=None, attribute_defaults=False,
|
| 1640 |
+
dtd_validation=False, load_dtd=False, no_network=True, decompress=False,
|
| 1641 |
+
ns_clean=False, recover=False, XMLSchema schema=None,
|
| 1642 |
+
huge_tree=False, remove_blank_text=False, resolve_entities='internal',
|
| 1643 |
+
remove_comments=False, remove_pis=False, strip_cdata=True,
|
| 1644 |
+
collect_ids=True, target=None, compact=True):
|
| 1645 |
+
cdef int parse_options
|
| 1646 |
+
cdef bint resolve_external = True
|
| 1647 |
+
parse_options = _XML_DEFAULT_PARSE_OPTIONS
|
| 1648 |
+
if load_dtd:
|
| 1649 |
+
parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD
|
| 1650 |
+
if dtd_validation:
|
| 1651 |
+
parse_options = parse_options | xmlparser.XML_PARSE_DTDVALID | \
|
| 1652 |
+
xmlparser.XML_PARSE_DTDLOAD
|
| 1653 |
+
if attribute_defaults:
|
| 1654 |
+
parse_options = parse_options | xmlparser.XML_PARSE_DTDATTR
|
| 1655 |
+
if schema is None:
|
| 1656 |
+
parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD
|
| 1657 |
+
if ns_clean:
|
| 1658 |
+
parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN
|
| 1659 |
+
if recover:
|
| 1660 |
+
parse_options = parse_options | xmlparser.XML_PARSE_RECOVER
|
| 1661 |
+
if remove_blank_text:
|
| 1662 |
+
parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS
|
| 1663 |
+
if huge_tree:
|
| 1664 |
+
parse_options = parse_options | xmlparser.XML_PARSE_HUGE
|
| 1665 |
+
if not no_network:
|
| 1666 |
+
parse_options = parse_options ^ xmlparser.XML_PARSE_NONET
|
| 1667 |
+
if not compact:
|
| 1668 |
+
parse_options = parse_options ^ xmlparser.XML_PARSE_COMPACT
|
| 1669 |
+
if not resolve_entities:
|
| 1670 |
+
parse_options = parse_options ^ xmlparser.XML_PARSE_NOENT
|
| 1671 |
+
elif resolve_entities == 'internal':
|
| 1672 |
+
resolve_external = False
|
| 1673 |
+
if not strip_cdata:
|
| 1674 |
+
parse_options = parse_options ^ xmlparser.XML_PARSE_NOCDATA
|
| 1675 |
+
if decompress:
|
| 1676 |
+
parse_options |= xmlparser.XML_PARSE_UNZIP
|
| 1677 |
+
|
| 1678 |
+
_BaseParser.__init__(self, parse_options, False, schema,
|
| 1679 |
+
remove_comments, remove_pis, strip_cdata,
|
| 1680 |
+
collect_ids, target, encoding, resolve_external)
|
| 1681 |
+
|
| 1682 |
+
# Allow subscripting XMLParser in type annotions (PEP 560)
|
| 1683 |
+
def __class_getitem__(cls, item):
|
| 1684 |
+
return _GenericAlias(cls, item)
|
| 1685 |
+
|
| 1686 |
+
|
| 1687 |
+
cdef class XMLPullParser(XMLParser):
|
| 1688 |
+
"""XMLPullParser(self, events=None, *, tag=None, **kwargs)
|
| 1689 |
+
|
| 1690 |
+
XML parser that collects parse events in an iterator.
|
| 1691 |
+
|
| 1692 |
+
The collected events are the same as for iterparse(), but the
|
| 1693 |
+
parser itself is non-blocking in the sense that it receives
|
| 1694 |
+
data chunks incrementally through its .feed() method, instead
|
| 1695 |
+
of reading them directly from a file(-like) object all by itself.
|
| 1696 |
+
|
| 1697 |
+
By default, it collects Element end events. To change that,
|
| 1698 |
+
pass any subset of the available events into the ``events``
|
| 1699 |
+
argument: ``'start'``, ``'end'``, ``'start-ns'``,
|
| 1700 |
+
``'end-ns'``, ``'comment'``, ``'pi'``.
|
| 1701 |
+
|
| 1702 |
+
To support loading external dependencies relative to the input
|
| 1703 |
+
source, you can pass the ``base_url``.
|
| 1704 |
+
"""
|
| 1705 |
+
def __init__(self, events=None, *, tag=None, base_url=None, **kwargs):
|
| 1706 |
+
XMLParser.__init__(self, **kwargs)
|
| 1707 |
+
if events is None:
|
| 1708 |
+
events = ('end',)
|
| 1709 |
+
self._setBaseURL(base_url)
|
| 1710 |
+
self._collectEvents(events, tag)
|
| 1711 |
+
|
| 1712 |
+
def read_events(self):
|
| 1713 |
+
return (<_SaxParserContext?>self._getPushParserContext()).events_iterator
|
| 1714 |
+
|
| 1715 |
+
|
| 1716 |
+
cdef class ETCompatXMLParser(XMLParser):
|
| 1717 |
+
"""ETCompatXMLParser(self, encoding=None, attribute_defaults=False, \
|
| 1718 |
+
dtd_validation=False, load_dtd=False, no_network=True, decompress=False, \
|
| 1719 |
+
ns_clean=False, recover=False, schema=None, \
|
| 1720 |
+
huge_tree=False, remove_blank_text=False, resolve_entities=True, \
|
| 1721 |
+
remove_comments=True, remove_pis=True, strip_cdata=True, \
|
| 1722 |
+
target=None, compact=True)
|
| 1723 |
+
|
| 1724 |
+
An XML parser with an ElementTree compatible default setup.
|
| 1725 |
+
|
| 1726 |
+
See the XMLParser class for details.
|
| 1727 |
+
|
| 1728 |
+
This parser has ``remove_comments`` and ``remove_pis`` enabled by default
|
| 1729 |
+
and thus ignores comments and processing instructions.
|
| 1730 |
+
"""
|
| 1731 |
+
def __init__(self, *, encoding=None, attribute_defaults=False,
|
| 1732 |
+
dtd_validation=False, load_dtd=False, no_network=True, decompress=False,
|
| 1733 |
+
ns_clean=False, recover=False, schema=None,
|
| 1734 |
+
huge_tree=False, remove_blank_text=False, resolve_entities=True,
|
| 1735 |
+
remove_comments=True, remove_pis=True, strip_cdata=True,
|
| 1736 |
+
target=None, compact=True):
|
| 1737 |
+
XMLParser.__init__(self,
|
| 1738 |
+
attribute_defaults=attribute_defaults,
|
| 1739 |
+
dtd_validation=dtd_validation,
|
| 1740 |
+
load_dtd=load_dtd,
|
| 1741 |
+
no_network=no_network,
|
| 1742 |
+
decompress=decompress,
|
| 1743 |
+
ns_clean=ns_clean,
|
| 1744 |
+
recover=recover,
|
| 1745 |
+
remove_blank_text=remove_blank_text,
|
| 1746 |
+
huge_tree=huge_tree,
|
| 1747 |
+
compact=compact,
|
| 1748 |
+
resolve_entities=resolve_entities,
|
| 1749 |
+
remove_comments=remove_comments,
|
| 1750 |
+
remove_pis=remove_pis,
|
| 1751 |
+
strip_cdata=strip_cdata,
|
| 1752 |
+
target=target,
|
| 1753 |
+
encoding=encoding,
|
| 1754 |
+
schema=schema,
|
| 1755 |
+
)
|
| 1756 |
+
|
| 1757 |
+
# ET 1.2 compatible name
|
| 1758 |
+
XMLTreeBuilder = ETCompatXMLParser
|
| 1759 |
+
|
| 1760 |
+
|
| 1761 |
+
cdef XMLParser __DEFAULT_XML_PARSER
|
| 1762 |
+
__DEFAULT_XML_PARSER = XMLParser()
|
| 1763 |
+
|
| 1764 |
+
__GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER)
|
| 1765 |
+
|
| 1766 |
+
def set_default_parser(_BaseParser parser=None):
|
| 1767 |
+
"""set_default_parser(parser=None)
|
| 1768 |
+
|
| 1769 |
+
Set a default parser for the current thread. This parser is used
|
| 1770 |
+
globally whenever no parser is supplied to the various parse functions of
|
| 1771 |
+
the lxml API. If this function is called without a parser (or if it is
|
| 1772 |
+
None), the default parser is reset to the original configuration.
|
| 1773 |
+
|
| 1774 |
+
Note that the pre-installed default parser is not thread-safe. Avoid the
|
| 1775 |
+
default parser in multi-threaded environments. You can create a separate
|
| 1776 |
+
parser for each thread explicitly or use a parser pool.
|
| 1777 |
+
"""
|
| 1778 |
+
if parser is None:
|
| 1779 |
+
parser = __DEFAULT_XML_PARSER
|
| 1780 |
+
__GLOBAL_PARSER_CONTEXT.setDefaultParser(parser)
|
| 1781 |
+
|
| 1782 |
+
def get_default_parser():
|
| 1783 |
+
"get_default_parser()"
|
| 1784 |
+
return __GLOBAL_PARSER_CONTEXT.getDefaultParser()
|
| 1785 |
+
|
| 1786 |
+
############################################################
|
| 1787 |
+
## HTML parser
|
| 1788 |
+
############################################################
|
| 1789 |
+
|
| 1790 |
+
cdef int _HTML_DEFAULT_PARSE_OPTIONS
|
| 1791 |
+
_HTML_DEFAULT_PARSE_OPTIONS = (
|
| 1792 |
+
htmlparser.HTML_PARSE_RECOVER |
|
| 1793 |
+
htmlparser.HTML_PARSE_NONET |
|
| 1794 |
+
htmlparser.HTML_PARSE_COMPACT
|
| 1795 |
+
)
|
| 1796 |
+
|
| 1797 |
+
cdef object _UNUSED = object()
|
| 1798 |
+
|
| 1799 |
+
cdef class HTMLParser(_FeedParser):
|
| 1800 |
+
"""HTMLParser(self, encoding=None, remove_blank_text=False, \
|
| 1801 |
+
remove_comments=False, remove_pis=False, \
|
| 1802 |
+
no_network=True, decompress=False, target=None, schema: XMLSchema =None, \
|
| 1803 |
+
recover=True, compact=True, collect_ids=True, huge_tree=False)
|
| 1804 |
+
|
| 1805 |
+
The HTML parser.
|
| 1806 |
+
|
| 1807 |
+
This parser allows reading HTML into a normal XML tree. By
|
| 1808 |
+
default, it can read broken (non well-formed) HTML, depending on
|
| 1809 |
+
the capabilities of libxml2. Use the 'recover' option to switch
|
| 1810 |
+
this off.
|
| 1811 |
+
|
| 1812 |
+
Available boolean keyword arguments:
|
| 1813 |
+
|
| 1814 |
+
- recover - try hard to parse through broken HTML (default: True)
|
| 1815 |
+
- no_network - prevent network access for related files (default: True)
|
| 1816 |
+
- decompress - automatically decompress gzip input
|
| 1817 |
+
(default: False, changed in lxml 6.0, disabling only affects libxml2 2.15+)
|
| 1818 |
+
- remove_blank_text - discard empty text nodes that are ignorable (i.e. not actual text content)
|
| 1819 |
+
- remove_comments - discard comments
|
| 1820 |
+
- remove_pis - discard processing instructions
|
| 1821 |
+
- compact - save memory for short text content (default: True)
|
| 1822 |
+
- default_doctype - add a default doctype even if it is not found in the HTML (default: True)
|
| 1823 |
+
- collect_ids - use a hash table of XML IDs for fast access (default: True)
|
| 1824 |
+
- huge_tree - disable security restrictions and support very deep trees
|
| 1825 |
+
and very long text content
|
| 1826 |
+
|
| 1827 |
+
Other keyword arguments:
|
| 1828 |
+
|
| 1829 |
+
- encoding - override the document encoding (note: libiconv encoding name)
|
| 1830 |
+
- target - a parser target object that will receive the parse events
|
| 1831 |
+
- schema - an XMLSchema to validate against
|
| 1832 |
+
|
| 1833 |
+
Note that you should avoid sharing parsers between threads for performance
|
| 1834 |
+
reasons.
|
| 1835 |
+
"""
|
| 1836 |
+
def __init__(self, *, encoding=None, remove_blank_text=False,
|
| 1837 |
+
remove_comments=False, remove_pis=False, strip_cdata=_UNUSED,
|
| 1838 |
+
no_network=True, decompress=False, target=None, XMLSchema schema=None,
|
| 1839 |
+
recover=True, compact=True, default_doctype=True,
|
| 1840 |
+
collect_ids=True, huge_tree=False):
|
| 1841 |
+
cdef int parse_options
|
| 1842 |
+
parse_options = _HTML_DEFAULT_PARSE_OPTIONS
|
| 1843 |
+
if remove_blank_text:
|
| 1844 |
+
parse_options = parse_options | htmlparser.HTML_PARSE_NOBLANKS
|
| 1845 |
+
if not recover:
|
| 1846 |
+
parse_options = parse_options ^ htmlparser.HTML_PARSE_RECOVER
|
| 1847 |
+
if not no_network:
|
| 1848 |
+
parse_options = parse_options ^ htmlparser.HTML_PARSE_NONET
|
| 1849 |
+
if not compact:
|
| 1850 |
+
parse_options = parse_options ^ htmlparser.HTML_PARSE_COMPACT
|
| 1851 |
+
if not default_doctype:
|
| 1852 |
+
parse_options = parse_options ^ htmlparser.HTML_PARSE_NODEFDTD
|
| 1853 |
+
if huge_tree:
|
| 1854 |
+
parse_options = parse_options | xmlparser.XML_PARSE_HUGE
|
| 1855 |
+
if decompress:
|
| 1856 |
+
parse_options |= xmlparser.XML_PARSE_UNZIP
|
| 1857 |
+
|
| 1858 |
+
if strip_cdata is not _UNUSED:
|
| 1859 |
+
import warnings
|
| 1860 |
+
warnings.warn(
|
| 1861 |
+
"The 'strip_cdata' option of HTMLParser() has never done anything and will eventually be removed.",
|
| 1862 |
+
DeprecationWarning)
|
| 1863 |
+
_BaseParser.__init__(self, parse_options, True, schema,
|
| 1864 |
+
remove_comments, remove_pis, strip_cdata,
|
| 1865 |
+
collect_ids, target, encoding)
|
| 1866 |
+
|
| 1867 |
+
# Allow subscripting HTMLParser in type annotions (PEP 560)
|
| 1868 |
+
def __class_getitem__(cls, item):
|
| 1869 |
+
return _GenericAlias(cls, item)
|
| 1870 |
+
|
| 1871 |
+
|
| 1872 |
+
cdef HTMLParser __DEFAULT_HTML_PARSER
|
| 1873 |
+
__DEFAULT_HTML_PARSER = HTMLParser()
|
| 1874 |
+
|
| 1875 |
+
|
| 1876 |
+
cdef class HTMLPullParser(HTMLParser):
|
| 1877 |
+
"""HTMLPullParser(self, events=None, *, tag=None, base_url=None, **kwargs)
|
| 1878 |
+
|
| 1879 |
+
HTML parser that collects parse events in an iterator.
|
| 1880 |
+
|
| 1881 |
+
The collected events are the same as for iterparse(), but the
|
| 1882 |
+
parser itself is non-blocking in the sense that it receives
|
| 1883 |
+
data chunks incrementally through its .feed() method, instead
|
| 1884 |
+
of reading them directly from a file(-like) object all by itself.
|
| 1885 |
+
|
| 1886 |
+
By default, it collects Element end events. To change that,
|
| 1887 |
+
pass any subset of the available events into the ``events``
|
| 1888 |
+
argument: ``'start'``, ``'end'``, ``'start-ns'``,
|
| 1889 |
+
``'end-ns'``, ``'comment'``, ``'pi'``.
|
| 1890 |
+
|
| 1891 |
+
To support loading external dependencies relative to the input
|
| 1892 |
+
source, you can pass the ``base_url``.
|
| 1893 |
+
"""
|
| 1894 |
+
def __init__(self, events=None, *, tag=None, base_url=None, **kwargs):
|
| 1895 |
+
HTMLParser.__init__(self, **kwargs)
|
| 1896 |
+
if events is None:
|
| 1897 |
+
events = ('end',)
|
| 1898 |
+
self._setBaseURL(base_url)
|
| 1899 |
+
self._collectEvents(events, tag)
|
| 1900 |
+
|
| 1901 |
+
def read_events(self):
|
| 1902 |
+
return (<_SaxParserContext?>self._getPushParserContext()).events_iterator
|
| 1903 |
+
|
| 1904 |
+
|
| 1905 |
+
############################################################
|
| 1906 |
+
## helper functions for document creation
|
| 1907 |
+
############################################################
|
| 1908 |
+
|
| 1909 |
+
cdef xmlDoc* _parseDoc(text, filename, _BaseParser parser) except NULL:
|
| 1910 |
+
cdef char* c_filename
|
| 1911 |
+
if parser is None:
|
| 1912 |
+
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
|
| 1913 |
+
if not filename:
|
| 1914 |
+
c_filename = NULL
|
| 1915 |
+
else:
|
| 1916 |
+
filename_utf = _encodeFilenameUTF8(filename)
|
| 1917 |
+
c_filename = _cstr(filename_utf)
|
| 1918 |
+
if isinstance(text, bytes):
|
| 1919 |
+
return _parseDoc_bytes(<bytes> text, filename, c_filename, parser)
|
| 1920 |
+
elif isinstance(text, unicode):
|
| 1921 |
+
return _parseDoc_unicode(<unicode> text, filename, c_filename, parser)
|
| 1922 |
+
else:
|
| 1923 |
+
return _parseDoc_charbuffer(text, filename, c_filename, parser)
|
| 1924 |
+
|
| 1925 |
+
|
| 1926 |
+
cdef xmlDoc* _parseDoc_unicode(unicode text, filename, char* c_filename, _BaseParser parser) except NULL:
|
| 1927 |
+
cdef Py_ssize_t c_len
|
| 1928 |
+
if python.PyUnicode_IS_READY(text):
|
| 1929 |
+
# PEP-393 Unicode string
|
| 1930 |
+
c_len = python.PyUnicode_GET_LENGTH(text) * python.PyUnicode_KIND(text)
|
| 1931 |
+
else:
|
| 1932 |
+
# old Py_UNICODE string
|
| 1933 |
+
c_len = python.PyUnicode_GET_DATA_SIZE(text)
|
| 1934 |
+
if c_len > limits.INT_MAX:
|
| 1935 |
+
return parser._parseDocFromFilelike(
|
| 1936 |
+
StringIO(text), filename, None)
|
| 1937 |
+
return parser._parseUnicodeDoc(text, c_filename)
|
| 1938 |
+
|
| 1939 |
+
|
| 1940 |
+
cdef xmlDoc* _parseDoc_bytes(bytes text, filename, char* c_filename, _BaseParser parser) except NULL:
|
| 1941 |
+
cdef Py_ssize_t c_len = len(text)
|
| 1942 |
+
if c_len > limits.INT_MAX:
|
| 1943 |
+
return parser._parseDocFromFilelike(BytesIO(text), filename, None)
|
| 1944 |
+
return parser._parseDoc(text, c_len, c_filename)
|
| 1945 |
+
|
| 1946 |
+
|
| 1947 |
+
cdef xmlDoc* _parseDoc_charbuffer(text, filename, char* c_filename, _BaseParser parser) except NULL:
|
| 1948 |
+
cdef const unsigned char[::1] data = memoryview(text).cast('B') # cast to 'unsigned char' buffer
|
| 1949 |
+
cdef Py_ssize_t c_len = len(data)
|
| 1950 |
+
if c_len > limits.INT_MAX:
|
| 1951 |
+
return parser._parseDocFromFilelike(BytesIO(text), filename, None)
|
| 1952 |
+
return parser._parseDoc(<const char*>&data[0], c_len, c_filename)
|
| 1953 |
+
|
| 1954 |
+
|
| 1955 |
+
cdef xmlDoc* _parseDocFromFile(filename8, _BaseParser parser) except NULL:
|
| 1956 |
+
if parser is None:
|
| 1957 |
+
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
|
| 1958 |
+
return (<_BaseParser>parser)._parseDocFromFile(_cstr(filename8))
|
| 1959 |
+
|
| 1960 |
+
|
| 1961 |
+
cdef xmlDoc* _parseDocFromFilelike(source, filename,
|
| 1962 |
+
_BaseParser parser) except NULL:
|
| 1963 |
+
if parser is None:
|
| 1964 |
+
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
|
| 1965 |
+
return (<_BaseParser>parser)._parseDocFromFilelike(source, filename, None)
|
| 1966 |
+
|
| 1967 |
+
|
| 1968 |
+
cdef xmlDoc* _newXMLDoc() except NULL:
|
| 1969 |
+
cdef xmlDoc* result
|
| 1970 |
+
result = tree.xmlNewDoc(NULL)
|
| 1971 |
+
if result is NULL:
|
| 1972 |
+
raise MemoryError()
|
| 1973 |
+
if result.encoding is NULL:
|
| 1974 |
+
result.encoding = tree.xmlStrdup(<unsigned char*>"UTF-8")
|
| 1975 |
+
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
|
| 1976 |
+
return result
|
| 1977 |
+
|
| 1978 |
+
cdef xmlDoc* _newHTMLDoc() except NULL:
|
| 1979 |
+
cdef xmlDoc* result
|
| 1980 |
+
result = tree.htmlNewDoc(NULL, NULL)
|
| 1981 |
+
if result is NULL:
|
| 1982 |
+
raise MemoryError()
|
| 1983 |
+
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
|
| 1984 |
+
return result
|
| 1985 |
+
|
| 1986 |
+
cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive) except NULL:
|
| 1987 |
+
cdef xmlDoc* result
|
| 1988 |
+
if recursive:
|
| 1989 |
+
with nogil:
|
| 1990 |
+
result = tree.xmlCopyDoc(c_doc, recursive)
|
| 1991 |
+
else:
|
| 1992 |
+
result = tree.xmlCopyDoc(c_doc, 0)
|
| 1993 |
+
if result is NULL:
|
| 1994 |
+
raise MemoryError()
|
| 1995 |
+
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
|
| 1996 |
+
return result
|
| 1997 |
+
|
| 1998 |
+
cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root) except NULL:
|
| 1999 |
+
"Recursively copy the document and make c_new_root the new root node."
|
| 2000 |
+
cdef xmlDoc* result
|
| 2001 |
+
cdef xmlNode* c_node
|
| 2002 |
+
result = tree.xmlCopyDoc(c_doc, 0) # non recursive
|
| 2003 |
+
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
|
| 2004 |
+
with nogil:
|
| 2005 |
+
c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive
|
| 2006 |
+
if c_node is NULL:
|
| 2007 |
+
raise MemoryError()
|
| 2008 |
+
tree.xmlDocSetRootElement(result, c_node)
|
| 2009 |
+
_copyTail(c_new_root.next, c_node)
|
| 2010 |
+
return result
|
| 2011 |
+
|
| 2012 |
+
cdef xmlNode* _copyNodeToDoc(xmlNode* c_node, xmlDoc* c_doc) except NULL:
|
| 2013 |
+
"Recursively copy the element into the document. c_doc is not modified."
|
| 2014 |
+
cdef xmlNode* c_root
|
| 2015 |
+
c_root = tree.xmlDocCopyNode(c_node, c_doc, 1) # recursive
|
| 2016 |
+
if c_root is NULL:
|
| 2017 |
+
raise MemoryError()
|
| 2018 |
+
_copyTail(c_node.next, c_root)
|
| 2019 |
+
return c_root
|
| 2020 |
+
|
| 2021 |
+
|
| 2022 |
+
############################################################
|
| 2023 |
+
## API level helper functions for _Document creation
|
| 2024 |
+
############################################################
|
| 2025 |
+
|
| 2026 |
+
cdef _Document _parseDocument(source, _BaseParser parser, base_url):
|
| 2027 |
+
cdef _Document doc
|
| 2028 |
+
source = _getFSPathOrObject(source)
|
| 2029 |
+
if _isString(source):
|
| 2030 |
+
# parse the file directly from the filesystem
|
| 2031 |
+
doc = _parseDocumentFromURL(_encodeFilename(source), parser)
|
| 2032 |
+
# fix base URL if requested
|
| 2033 |
+
if base_url is not None:
|
| 2034 |
+
base_url = _encodeFilenameUTF8(base_url)
|
| 2035 |
+
if doc._c_doc.URL is not NULL:
|
| 2036 |
+
tree.xmlFree(<char*>doc._c_doc.URL)
|
| 2037 |
+
doc._c_doc.URL = tree.xmlStrdup(_xcstr(base_url))
|
| 2038 |
+
return doc
|
| 2039 |
+
|
| 2040 |
+
if base_url is not None:
|
| 2041 |
+
url = base_url
|
| 2042 |
+
else:
|
| 2043 |
+
url = _getFilenameForFile(source)
|
| 2044 |
+
|
| 2045 |
+
if hasattr(source, 'getvalue') and hasattr(source, 'tell'):
|
| 2046 |
+
# StringIO - reading from start?
|
| 2047 |
+
if source.tell() == 0:
|
| 2048 |
+
return _parseMemoryDocument(source.getvalue(), url, parser)
|
| 2049 |
+
|
| 2050 |
+
# Support for file-like objects (urlgrabber.urlopen, ...)
|
| 2051 |
+
if hasattr(source, 'read'):
|
| 2052 |
+
return _parseFilelikeDocument(source, url, parser)
|
| 2053 |
+
|
| 2054 |
+
raise TypeError, f"cannot parse from '{python._fqtypename(source).decode('UTF-8')}'"
|
| 2055 |
+
|
| 2056 |
+
cdef _Document _parseDocumentFromURL(url, _BaseParser parser):
|
| 2057 |
+
c_doc = _parseDocFromFile(url, parser)
|
| 2058 |
+
return _documentFactory(c_doc, parser)
|
| 2059 |
+
|
| 2060 |
+
cdef _Document _parseMemoryDocument(text, url, _BaseParser parser):
|
| 2061 |
+
if isinstance(text, unicode):
|
| 2062 |
+
if _hasEncodingDeclaration(text):
|
| 2063 |
+
raise ValueError(
|
| 2064 |
+
"Unicode strings with encoding declaration are not supported. "
|
| 2065 |
+
"Please use bytes input or XML fragments without declaration.")
|
| 2066 |
+
c_doc = _parseDoc(text, url, parser)
|
| 2067 |
+
return _documentFactory(c_doc, parser)
|
| 2068 |
+
|
| 2069 |
+
cdef _Document _parseFilelikeDocument(source, url, _BaseParser parser):
|
| 2070 |
+
c_doc = _parseDocFromFilelike(source, url, parser)
|
| 2071 |
+
return _documentFactory(c_doc, parser)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/parsertarget.pxi
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Parser target context (ET target interface)
|
| 2 |
+
|
| 3 |
+
cdef object inspect_getargspec
|
| 4 |
+
try:
|
| 5 |
+
from inspect import getfullargspec as inspect_getargspec
|
| 6 |
+
except ImportError:
|
| 7 |
+
from inspect import getargspec as inspect_getargspec
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class _TargetParserResult(Exception):
|
| 11 |
+
# Admittedly, this is somewhat ugly, but it's the easiest way
|
| 12 |
+
# to push the Python level parser result through the parser
|
| 13 |
+
# machinery towards the API level functions
|
| 14 |
+
def __init__(self, result):
|
| 15 |
+
self.result = result
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@cython.final
|
| 19 |
+
@cython.internal
|
| 20 |
+
cdef class _PythonSaxParserTarget(_SaxParserTarget):
|
| 21 |
+
cdef object _target_start
|
| 22 |
+
cdef object _target_end
|
| 23 |
+
cdef object _target_data
|
| 24 |
+
cdef object _target_start_ns
|
| 25 |
+
cdef object _target_end_ns
|
| 26 |
+
cdef object _target_doctype
|
| 27 |
+
cdef object _target_pi
|
| 28 |
+
cdef object _target_comment
|
| 29 |
+
cdef bint _start_takes_nsmap
|
| 30 |
+
|
| 31 |
+
def __cinit__(self, target):
|
| 32 |
+
cdef int event_filter
|
| 33 |
+
event_filter = 0
|
| 34 |
+
self._start_takes_nsmap = 0
|
| 35 |
+
try:
|
| 36 |
+
self._target_start = target.start
|
| 37 |
+
if self._target_start is not None:
|
| 38 |
+
event_filter |= SAX_EVENT_START
|
| 39 |
+
except AttributeError:
|
| 40 |
+
pass
|
| 41 |
+
else:
|
| 42 |
+
try:
|
| 43 |
+
arguments = inspect_getargspec(self._target_start)
|
| 44 |
+
if len(arguments[0]) > 3 or arguments[1] is not None:
|
| 45 |
+
self._start_takes_nsmap = 1
|
| 46 |
+
except TypeError:
|
| 47 |
+
pass
|
| 48 |
+
try:
|
| 49 |
+
self._target_end = target.end
|
| 50 |
+
if self._target_end is not None:
|
| 51 |
+
event_filter |= SAX_EVENT_END
|
| 52 |
+
except AttributeError:
|
| 53 |
+
pass
|
| 54 |
+
try:
|
| 55 |
+
self._target_start_ns = target.start_ns
|
| 56 |
+
if self._target_start_ns is not None:
|
| 57 |
+
event_filter |= SAX_EVENT_START_NS
|
| 58 |
+
except AttributeError:
|
| 59 |
+
pass
|
| 60 |
+
try:
|
| 61 |
+
self._target_end_ns = target.end_ns
|
| 62 |
+
if self._target_end_ns is not None:
|
| 63 |
+
event_filter |= SAX_EVENT_END_NS
|
| 64 |
+
except AttributeError:
|
| 65 |
+
pass
|
| 66 |
+
try:
|
| 67 |
+
self._target_data = target.data
|
| 68 |
+
if self._target_data is not None:
|
| 69 |
+
event_filter |= SAX_EVENT_DATA
|
| 70 |
+
except AttributeError:
|
| 71 |
+
pass
|
| 72 |
+
try:
|
| 73 |
+
self._target_doctype = target.doctype
|
| 74 |
+
if self._target_doctype is not None:
|
| 75 |
+
event_filter |= SAX_EVENT_DOCTYPE
|
| 76 |
+
except AttributeError:
|
| 77 |
+
pass
|
| 78 |
+
try:
|
| 79 |
+
self._target_pi = target.pi
|
| 80 |
+
if self._target_pi is not None:
|
| 81 |
+
event_filter |= SAX_EVENT_PI
|
| 82 |
+
except AttributeError:
|
| 83 |
+
pass
|
| 84 |
+
try:
|
| 85 |
+
self._target_comment = target.comment
|
| 86 |
+
if self._target_comment is not None:
|
| 87 |
+
event_filter |= SAX_EVENT_COMMENT
|
| 88 |
+
except AttributeError:
|
| 89 |
+
pass
|
| 90 |
+
self._sax_event_filter = event_filter
|
| 91 |
+
|
| 92 |
+
cdef _handleSaxStart(self, tag, attrib, nsmap):
|
| 93 |
+
if self._start_takes_nsmap:
|
| 94 |
+
return self._target_start(tag, attrib, nsmap)
|
| 95 |
+
else:
|
| 96 |
+
return self._target_start(tag, attrib)
|
| 97 |
+
|
| 98 |
+
cdef _handleSaxEnd(self, tag):
|
| 99 |
+
return self._target_end(tag)
|
| 100 |
+
|
| 101 |
+
cdef _handleSaxStartNs(self, prefix, uri):
|
| 102 |
+
return self._target_start_ns(prefix, uri)
|
| 103 |
+
|
| 104 |
+
cdef _handleSaxEndNs(self, prefix):
|
| 105 |
+
return self._target_end_ns(prefix)
|
| 106 |
+
|
| 107 |
+
cdef int _handleSaxData(self, data) except -1:
|
| 108 |
+
self._target_data(data)
|
| 109 |
+
|
| 110 |
+
cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
|
| 111 |
+
self._target_doctype(root_tag, public_id, system_id)
|
| 112 |
+
|
| 113 |
+
cdef _handleSaxPi(self, target, data):
|
| 114 |
+
return self._target_pi(target, data)
|
| 115 |
+
|
| 116 |
+
cdef _handleSaxComment(self, comment):
|
| 117 |
+
return self._target_comment(comment)
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
@cython.final
|
| 121 |
+
@cython.internal
|
| 122 |
+
@cython.no_gc_clear # Required because parent class uses it - Cython bug.
|
| 123 |
+
cdef class _TargetParserContext(_SaxParserContext):
|
| 124 |
+
"""This class maps SAX2 events to the ET parser target interface.
|
| 125 |
+
"""
|
| 126 |
+
cdef object _python_target
|
| 127 |
+
cdef int _setTarget(self, target) except -1:
|
| 128 |
+
self._python_target = target
|
| 129 |
+
if not isinstance(target, _SaxParserTarget) or \
|
| 130 |
+
hasattr(target, '__dict__'):
|
| 131 |
+
target = _PythonSaxParserTarget(target)
|
| 132 |
+
self._setSaxParserTarget(target)
|
| 133 |
+
return 0
|
| 134 |
+
|
| 135 |
+
cdef _ParserContext _copy(self):
|
| 136 |
+
cdef _TargetParserContext context
|
| 137 |
+
context = _ParserContext._copy(self)
|
| 138 |
+
context._setTarget(self._python_target)
|
| 139 |
+
return context
|
| 140 |
+
|
| 141 |
+
cdef void _cleanupTargetParserContext(self, xmlDoc* result) noexcept:
|
| 142 |
+
if self._c_ctxt.myDoc is not NULL:
|
| 143 |
+
if self._c_ctxt.myDoc is not result and \
|
| 144 |
+
self._c_ctxt.myDoc._private is NULL:
|
| 145 |
+
# no _Document proxy => orphen
|
| 146 |
+
tree.xmlFreeDoc(self._c_ctxt.myDoc)
|
| 147 |
+
self._c_ctxt.myDoc = NULL
|
| 148 |
+
|
| 149 |
+
cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result,
|
| 150 |
+
filename):
|
| 151 |
+
cdef bint recover
|
| 152 |
+
recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
|
| 153 |
+
try:
|
| 154 |
+
if self._has_raised():
|
| 155 |
+
self._cleanupTargetParserContext(result)
|
| 156 |
+
self._raise_if_stored()
|
| 157 |
+
if not self._c_ctxt.wellFormed and not recover:
|
| 158 |
+
_raiseParseError(self._c_ctxt, filename, self._error_log)
|
| 159 |
+
except:
|
| 160 |
+
self._python_target.close()
|
| 161 |
+
raise
|
| 162 |
+
return self._python_target.close()
|
| 163 |
+
|
| 164 |
+
cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser,
|
| 165 |
+
xmlDoc* result, filename) except NULL:
|
| 166 |
+
cdef bint recover
|
| 167 |
+
recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
|
| 168 |
+
if result is not NULL and result._private is NULL:
|
| 169 |
+
# no _Document proxy => orphen
|
| 170 |
+
tree.xmlFreeDoc(result)
|
| 171 |
+
try:
|
| 172 |
+
self._cleanupTargetParserContext(result)
|
| 173 |
+
self._raise_if_stored()
|
| 174 |
+
if not self._c_ctxt.wellFormed and not recover:
|
| 175 |
+
_raiseParseError(self._c_ctxt, filename, self._error_log)
|
| 176 |
+
except:
|
| 177 |
+
self._python_target.close()
|
| 178 |
+
raise
|
| 179 |
+
parse_result = self._python_target.close()
|
| 180 |
+
raise _TargetParserResult(parse_result)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi
ADDED
|
@@ -0,0 +1,565 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# read-only tree implementation
|
| 2 |
+
|
| 3 |
+
@cython.internal
|
| 4 |
+
cdef class _ReadOnlyProxy:
|
| 5 |
+
"A read-only proxy class suitable for PIs/Comments (for internal use only!)."
|
| 6 |
+
cdef bint _free_after_use
|
| 7 |
+
cdef xmlNode* _c_node
|
| 8 |
+
cdef _ReadOnlyProxy _source_proxy
|
| 9 |
+
cdef list _dependent_proxies
|
| 10 |
+
def __cinit__(self):
|
| 11 |
+
self._c_node = NULL
|
| 12 |
+
self._free_after_use = 0
|
| 13 |
+
|
| 14 |
+
cdef int _assertNode(self) except -1:
|
| 15 |
+
"""This is our way of saying: this proxy is invalid!
|
| 16 |
+
"""
|
| 17 |
+
if not self._c_node:
|
| 18 |
+
raise ReferenceError("Proxy invalidated!")
|
| 19 |
+
return 0
|
| 20 |
+
|
| 21 |
+
cdef int _raise_unsupported_type(self) except -1:
|
| 22 |
+
raise TypeError(f"Unsupported node type: {self._c_node.type}")
|
| 23 |
+
|
| 24 |
+
cdef void free_after_use(self) noexcept:
|
| 25 |
+
"""Should the xmlNode* be freed when releasing the proxy?
|
| 26 |
+
"""
|
| 27 |
+
self._free_after_use = 1
|
| 28 |
+
|
| 29 |
+
@property
|
| 30 |
+
def tag(self):
|
| 31 |
+
"""Element tag
|
| 32 |
+
"""
|
| 33 |
+
self._assertNode()
|
| 34 |
+
if self._c_node.type == tree.XML_ELEMENT_NODE:
|
| 35 |
+
return _namespacedName(self._c_node)
|
| 36 |
+
elif self._c_node.type == tree.XML_PI_NODE:
|
| 37 |
+
return ProcessingInstruction
|
| 38 |
+
elif self._c_node.type == tree.XML_COMMENT_NODE:
|
| 39 |
+
return Comment
|
| 40 |
+
elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
|
| 41 |
+
return Entity
|
| 42 |
+
else:
|
| 43 |
+
self._raise_unsupported_type()
|
| 44 |
+
|
| 45 |
+
@property
|
| 46 |
+
def text(self):
|
| 47 |
+
"""Text before the first subelement. This is either a string or
|
| 48 |
+
the value None, if there was no text.
|
| 49 |
+
"""
|
| 50 |
+
self._assertNode()
|
| 51 |
+
if self._c_node.type == tree.XML_ELEMENT_NODE:
|
| 52 |
+
return _collectText(self._c_node.children)
|
| 53 |
+
elif self._c_node.type in (tree.XML_PI_NODE,
|
| 54 |
+
tree.XML_COMMENT_NODE):
|
| 55 |
+
if self._c_node.content is NULL:
|
| 56 |
+
return ''
|
| 57 |
+
else:
|
| 58 |
+
return funicode(self._c_node.content)
|
| 59 |
+
elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
|
| 60 |
+
return f'&{funicode(self._c_node.name)};'
|
| 61 |
+
else:
|
| 62 |
+
self._raise_unsupported_type()
|
| 63 |
+
|
| 64 |
+
@property
|
| 65 |
+
def tail(self):
|
| 66 |
+
"""Text after this element's end tag, but before the next sibling
|
| 67 |
+
element's start tag. This is either a string or the value None, if
|
| 68 |
+
there was no text.
|
| 69 |
+
"""
|
| 70 |
+
self._assertNode()
|
| 71 |
+
return _collectText(self._c_node.next)
|
| 72 |
+
|
| 73 |
+
@property
|
| 74 |
+
def sourceline(self):
|
| 75 |
+
"""Original line number as found by the parser or None if unknown.
|
| 76 |
+
"""
|
| 77 |
+
cdef long line
|
| 78 |
+
self._assertNode()
|
| 79 |
+
line = tree.xmlGetLineNo(self._c_node)
|
| 80 |
+
if line > 0:
|
| 81 |
+
return line
|
| 82 |
+
else:
|
| 83 |
+
return None
|
| 84 |
+
|
| 85 |
+
def __repr__(self):
|
| 86 |
+
self._assertNode()
|
| 87 |
+
if self._c_node.type == tree.XML_ELEMENT_NODE:
|
| 88 |
+
return "<Element %s at 0x%x>" % (self.tag, id(self))
|
| 89 |
+
elif self._c_node.type == tree.XML_COMMENT_NODE:
|
| 90 |
+
return "<!--%s-->" % self.text
|
| 91 |
+
elif self._c_node.type == tree.XML_ENTITY_NODE:
|
| 92 |
+
return "&%s;" % funicode(self._c_node.name)
|
| 93 |
+
elif self._c_node.type == tree.XML_PI_NODE:
|
| 94 |
+
text = self.text
|
| 95 |
+
if text:
|
| 96 |
+
return "<?%s %s?>" % (self.target, text)
|
| 97 |
+
else:
|
| 98 |
+
return "<?%s?>" % self.target
|
| 99 |
+
else:
|
| 100 |
+
self._raise_unsupported_type()
|
| 101 |
+
|
| 102 |
+
def __getitem__(self, x):
|
| 103 |
+
"""Returns the subelement at the given position or the requested
|
| 104 |
+
slice.
|
| 105 |
+
"""
|
| 106 |
+
cdef xmlNode* c_node = NULL
|
| 107 |
+
cdef Py_ssize_t step = 0, slicelength = 0
|
| 108 |
+
cdef Py_ssize_t c, i
|
| 109 |
+
cdef _node_to_node_function next_element
|
| 110 |
+
cdef list result
|
| 111 |
+
self._assertNode()
|
| 112 |
+
if isinstance(x, slice):
|
| 113 |
+
# slicing
|
| 114 |
+
if _isFullSlice(<slice>x):
|
| 115 |
+
return _collectChildren(self)
|
| 116 |
+
_findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
|
| 117 |
+
if c_node is NULL:
|
| 118 |
+
return []
|
| 119 |
+
if step > 0:
|
| 120 |
+
next_element = _nextElement
|
| 121 |
+
else:
|
| 122 |
+
step = -step
|
| 123 |
+
next_element = _previousElement
|
| 124 |
+
result = []
|
| 125 |
+
c = 0
|
| 126 |
+
while c_node is not NULL and c < slicelength:
|
| 127 |
+
result.append(_newReadOnlyProxy(self._source_proxy, c_node))
|
| 128 |
+
result.append(_elementFactory(self._doc, c_node))
|
| 129 |
+
c = c + 1
|
| 130 |
+
for i from 0 <= i < step:
|
| 131 |
+
c_node = next_element(c_node)
|
| 132 |
+
return result
|
| 133 |
+
else:
|
| 134 |
+
# indexing
|
| 135 |
+
c_node = _findChild(self._c_node, x)
|
| 136 |
+
if c_node is NULL:
|
| 137 |
+
raise IndexError, "list index out of range"
|
| 138 |
+
return _newReadOnlyProxy(self._source_proxy, c_node)
|
| 139 |
+
|
| 140 |
+
def __len__(self):
|
| 141 |
+
"""Returns the number of subelements.
|
| 142 |
+
"""
|
| 143 |
+
cdef Py_ssize_t c
|
| 144 |
+
cdef xmlNode* c_node
|
| 145 |
+
self._assertNode()
|
| 146 |
+
c = 0
|
| 147 |
+
c_node = self._c_node.children
|
| 148 |
+
while c_node is not NULL:
|
| 149 |
+
if tree._isElement(c_node):
|
| 150 |
+
c = c + 1
|
| 151 |
+
c_node = c_node.next
|
| 152 |
+
return c
|
| 153 |
+
|
| 154 |
+
def __bool__(self):
|
| 155 |
+
cdef xmlNode* c_node
|
| 156 |
+
self._assertNode()
|
| 157 |
+
c_node = _findChildBackwards(self._c_node, 0)
|
| 158 |
+
return c_node != NULL
|
| 159 |
+
|
| 160 |
+
def __deepcopy__(self, memo):
|
| 161 |
+
"__deepcopy__(self, memo)"
|
| 162 |
+
return self.__copy__()
|
| 163 |
+
|
| 164 |
+
cpdef __copy__(self):
|
| 165 |
+
"__copy__(self)"
|
| 166 |
+
cdef xmlDoc* c_doc
|
| 167 |
+
cdef xmlNode* c_node
|
| 168 |
+
cdef _Document new_doc
|
| 169 |
+
if self._c_node is NULL:
|
| 170 |
+
return self
|
| 171 |
+
c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive
|
| 172 |
+
new_doc = _documentFactory(c_doc, None)
|
| 173 |
+
root = new_doc.getroot()
|
| 174 |
+
if root is not None:
|
| 175 |
+
return root
|
| 176 |
+
# Comment/PI
|
| 177 |
+
c_node = c_doc.children
|
| 178 |
+
while c_node is not NULL and c_node.type != self._c_node.type:
|
| 179 |
+
c_node = c_node.next
|
| 180 |
+
if c_node is NULL:
|
| 181 |
+
return None
|
| 182 |
+
return _elementFactory(new_doc, c_node)
|
| 183 |
+
|
| 184 |
+
def __iter__(self):
|
| 185 |
+
return iter(self.getchildren())
|
| 186 |
+
|
| 187 |
+
def iterchildren(self, tag=None, *, reversed=False):
|
| 188 |
+
"""iterchildren(self, tag=None, reversed=False)
|
| 189 |
+
|
| 190 |
+
Iterate over the children of this element.
|
| 191 |
+
"""
|
| 192 |
+
children = self.getchildren()
|
| 193 |
+
if tag is not None and tag != '*':
|
| 194 |
+
children = [ el for el in children if el.tag == tag ]
|
| 195 |
+
if reversed:
|
| 196 |
+
children = children[::-1]
|
| 197 |
+
return iter(children)
|
| 198 |
+
|
| 199 |
+
cpdef getchildren(self):
|
| 200 |
+
"""Returns all subelements. The elements are returned in document
|
| 201 |
+
order.
|
| 202 |
+
"""
|
| 203 |
+
cdef xmlNode* c_node
|
| 204 |
+
cdef list result
|
| 205 |
+
self._assertNode()
|
| 206 |
+
result = []
|
| 207 |
+
c_node = self._c_node.children
|
| 208 |
+
while c_node is not NULL:
|
| 209 |
+
if tree._isElement(c_node):
|
| 210 |
+
result.append(_newReadOnlyProxy(self._source_proxy, c_node))
|
| 211 |
+
c_node = c_node.next
|
| 212 |
+
return result
|
| 213 |
+
|
| 214 |
+
def getparent(self):
|
| 215 |
+
"""Returns the parent of this element or None for the root element.
|
| 216 |
+
"""
|
| 217 |
+
cdef xmlNode* c_parent
|
| 218 |
+
self._assertNode()
|
| 219 |
+
c_parent = self._c_node.parent
|
| 220 |
+
if c_parent is NULL or not tree._isElement(c_parent):
|
| 221 |
+
return None
|
| 222 |
+
else:
|
| 223 |
+
return _newReadOnlyProxy(self._source_proxy, c_parent)
|
| 224 |
+
|
| 225 |
+
def getnext(self):
|
| 226 |
+
"""Returns the following sibling of this element or None.
|
| 227 |
+
"""
|
| 228 |
+
cdef xmlNode* c_node
|
| 229 |
+
self._assertNode()
|
| 230 |
+
c_node = _nextElement(self._c_node)
|
| 231 |
+
if c_node is not NULL:
|
| 232 |
+
return _newReadOnlyProxy(self._source_proxy, c_node)
|
| 233 |
+
return None
|
| 234 |
+
|
| 235 |
+
def getprevious(self):
|
| 236 |
+
"""Returns the preceding sibling of this element or None.
|
| 237 |
+
"""
|
| 238 |
+
cdef xmlNode* c_node
|
| 239 |
+
self._assertNode()
|
| 240 |
+
c_node = _previousElement(self._c_node)
|
| 241 |
+
if c_node is not NULL:
|
| 242 |
+
return _newReadOnlyProxy(self._source_proxy, c_node)
|
| 243 |
+
return None
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
@cython.final
|
| 247 |
+
@cython.internal
|
| 248 |
+
cdef class _ReadOnlyPIProxy(_ReadOnlyProxy):
|
| 249 |
+
"""A read-only proxy for processing instructions (for internal use only!)"""
|
| 250 |
+
@property
|
| 251 |
+
def target(self):
|
| 252 |
+
self._assertNode()
|
| 253 |
+
return funicode(self._c_node.name)
|
| 254 |
+
|
| 255 |
+
@cython.final
|
| 256 |
+
@cython.internal
|
| 257 |
+
cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy):
|
| 258 |
+
"""A read-only proxy for entity references (for internal use only!)"""
|
| 259 |
+
property name:
|
| 260 |
+
def __get__(self):
|
| 261 |
+
return funicode(self._c_node.name)
|
| 262 |
+
|
| 263 |
+
def __set__(self, value):
|
| 264 |
+
value_utf = _utf8(value)
|
| 265 |
+
if '&' in value or ';' in value:
|
| 266 |
+
raise ValueError(f"Invalid entity name '{value}'")
|
| 267 |
+
tree.xmlNodeSetName(self._c_node, _xcstr(value_utf))
|
| 268 |
+
|
| 269 |
+
@property
|
| 270 |
+
def text(self):
|
| 271 |
+
return f'&{funicode(self._c_node.name)};'
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
@cython.internal
|
| 275 |
+
cdef class _ReadOnlyElementProxy(_ReadOnlyProxy):
|
| 276 |
+
"""The main read-only Element proxy class (for internal use only!)."""
|
| 277 |
+
|
| 278 |
+
@property
|
| 279 |
+
def attrib(self):
|
| 280 |
+
self._assertNode()
|
| 281 |
+
return dict(_collectAttributes(self._c_node, 3))
|
| 282 |
+
|
| 283 |
+
@property
|
| 284 |
+
def prefix(self):
|
| 285 |
+
"""Namespace prefix or None.
|
| 286 |
+
"""
|
| 287 |
+
self._assertNode()
|
| 288 |
+
if self._c_node.ns is not NULL:
|
| 289 |
+
if self._c_node.ns.prefix is not NULL:
|
| 290 |
+
return funicode(self._c_node.ns.prefix)
|
| 291 |
+
return None
|
| 292 |
+
|
| 293 |
+
@property
|
| 294 |
+
def nsmap(self):
|
| 295 |
+
"""Namespace prefix->URI mapping known in the context of this
|
| 296 |
+
Element. This includes all namespace declarations of the
|
| 297 |
+
parents.
|
| 298 |
+
|
| 299 |
+
Note that changing the returned dict has no effect on the Element.
|
| 300 |
+
"""
|
| 301 |
+
self._assertNode()
|
| 302 |
+
return _build_nsmap(self._c_node)
|
| 303 |
+
|
| 304 |
+
def get(self, key, default=None):
|
| 305 |
+
"""Gets an element attribute.
|
| 306 |
+
"""
|
| 307 |
+
self._assertNode()
|
| 308 |
+
return _getNodeAttributeValue(self._c_node, key, default)
|
| 309 |
+
|
| 310 |
+
def keys(self):
|
| 311 |
+
"""Gets a list of attribute names. The names are returned in an
|
| 312 |
+
arbitrary order (just like for an ordinary Python dictionary).
|
| 313 |
+
"""
|
| 314 |
+
self._assertNode()
|
| 315 |
+
return _collectAttributes(self._c_node, 1)
|
| 316 |
+
|
| 317 |
+
def values(self):
|
| 318 |
+
"""Gets element attributes, as a sequence. The attributes are returned
|
| 319 |
+
in an arbitrary order.
|
| 320 |
+
"""
|
| 321 |
+
self._assertNode()
|
| 322 |
+
return _collectAttributes(self._c_node, 2)
|
| 323 |
+
|
| 324 |
+
def items(self):
|
| 325 |
+
"""Gets element attributes, as a sequence. The attributes are returned
|
| 326 |
+
in an arbitrary order.
|
| 327 |
+
"""
|
| 328 |
+
self._assertNode()
|
| 329 |
+
return _collectAttributes(self._c_node, 3)
|
| 330 |
+
|
| 331 |
+
cdef _ReadOnlyProxy _newReadOnlyProxy(
|
| 332 |
+
_ReadOnlyProxy source_proxy, xmlNode* c_node):
|
| 333 |
+
cdef _ReadOnlyProxy el
|
| 334 |
+
if c_node.type == tree.XML_ELEMENT_NODE:
|
| 335 |
+
el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy)
|
| 336 |
+
elif c_node.type == tree.XML_PI_NODE:
|
| 337 |
+
el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy)
|
| 338 |
+
elif c_node.type in (tree.XML_COMMENT_NODE,
|
| 339 |
+
tree.XML_ENTITY_REF_NODE):
|
| 340 |
+
el = _ReadOnlyProxy.__new__(_ReadOnlyProxy)
|
| 341 |
+
else:
|
| 342 |
+
raise TypeError(f"Unsupported element type: {c_node.type}")
|
| 343 |
+
el._c_node = c_node
|
| 344 |
+
_initReadOnlyProxy(el, source_proxy)
|
| 345 |
+
return el
|
| 346 |
+
|
| 347 |
+
cdef inline _initReadOnlyProxy(_ReadOnlyProxy el,
|
| 348 |
+
_ReadOnlyProxy source_proxy):
|
| 349 |
+
if source_proxy is None:
|
| 350 |
+
el._source_proxy = el
|
| 351 |
+
el._dependent_proxies = [el]
|
| 352 |
+
else:
|
| 353 |
+
el._source_proxy = source_proxy
|
| 354 |
+
source_proxy._dependent_proxies.append(el)
|
| 355 |
+
|
| 356 |
+
cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy):
|
| 357 |
+
cdef xmlNode* c_node
|
| 358 |
+
cdef _ReadOnlyProxy el
|
| 359 |
+
if sourceProxy is None:
|
| 360 |
+
return
|
| 361 |
+
if sourceProxy._dependent_proxies is None:
|
| 362 |
+
return
|
| 363 |
+
for el in sourceProxy._dependent_proxies:
|
| 364 |
+
c_node = el._c_node
|
| 365 |
+
el._c_node = NULL
|
| 366 |
+
if el._free_after_use:
|
| 367 |
+
tree.xmlFreeNode(c_node)
|
| 368 |
+
del sourceProxy._dependent_proxies[:]
|
| 369 |
+
|
| 370 |
+
# opaque wrapper around non-element nodes, e.g. the document node
|
| 371 |
+
#
|
| 372 |
+
# This class does not imply any restrictions on modifiability or
|
| 373 |
+
# read-only status of the node, so use with caution.
|
| 374 |
+
|
| 375 |
+
@cython.internal
|
| 376 |
+
cdef class _OpaqueNodeWrapper:
|
| 377 |
+
cdef tree.xmlNode* _c_node
|
| 378 |
+
def __init__(self):
|
| 379 |
+
raise TypeError, "This type cannot be instantiated from Python"
|
| 380 |
+
|
| 381 |
+
@cython.final
|
| 382 |
+
@cython.internal
|
| 383 |
+
cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper):
|
| 384 |
+
cdef int _assertNode(self) except -1:
|
| 385 |
+
"""This is our way of saying: this proxy is invalid!
|
| 386 |
+
"""
|
| 387 |
+
assert self._c_node is not NULL, "Proxy invalidated!"
|
| 388 |
+
return 0
|
| 389 |
+
|
| 390 |
+
cpdef append(self, other_element):
|
| 391 |
+
"""Append a copy of an Element to the list of children.
|
| 392 |
+
"""
|
| 393 |
+
cdef xmlNode* c_next
|
| 394 |
+
cdef xmlNode* c_node
|
| 395 |
+
self._assertNode()
|
| 396 |
+
c_node = _roNodeOf(other_element)
|
| 397 |
+
if c_node.type == tree.XML_ELEMENT_NODE:
|
| 398 |
+
if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL:
|
| 399 |
+
raise ValueError, "cannot append, document already has a root element"
|
| 400 |
+
elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE):
|
| 401 |
+
raise TypeError, f"unsupported element type for top-level node: {c_node.type}"
|
| 402 |
+
c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node)
|
| 403 |
+
c_next = c_node.next
|
| 404 |
+
tree.xmlAddChild(self._c_node, c_node)
|
| 405 |
+
_moveTail(c_next, c_node)
|
| 406 |
+
|
| 407 |
+
def extend(self, elements):
|
| 408 |
+
"""Append a copy of all Elements from a sequence to the list of
|
| 409 |
+
children.
|
| 410 |
+
"""
|
| 411 |
+
self._assertNode()
|
| 412 |
+
for element in elements:
|
| 413 |
+
self.append(element)
|
| 414 |
+
|
| 415 |
+
cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node):
|
| 416 |
+
cdef _OpaqueNodeWrapper node
|
| 417 |
+
if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
|
| 418 |
+
node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper)
|
| 419 |
+
else:
|
| 420 |
+
node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper)
|
| 421 |
+
node._c_node = c_node
|
| 422 |
+
return node
|
| 423 |
+
|
| 424 |
+
# element proxies that allow restricted modification
|
| 425 |
+
|
| 426 |
+
@cython.internal
|
| 427 |
+
cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy):
|
| 428 |
+
"""A read-only proxy that allows changing the text content.
|
| 429 |
+
"""
|
| 430 |
+
property text:
|
| 431 |
+
def __get__(self):
|
| 432 |
+
self._assertNode()
|
| 433 |
+
if self._c_node.content is NULL:
|
| 434 |
+
return ''
|
| 435 |
+
else:
|
| 436 |
+
return funicode(self._c_node.content)
|
| 437 |
+
|
| 438 |
+
def __set__(self, value):
|
| 439 |
+
cdef tree.xmlDict* c_dict
|
| 440 |
+
self._assertNode()
|
| 441 |
+
if value is None:
|
| 442 |
+
c_text = <const_xmlChar*>NULL
|
| 443 |
+
else:
|
| 444 |
+
value = _utf8(value)
|
| 445 |
+
c_text = _xcstr(value)
|
| 446 |
+
tree.xmlNodeSetContent(self._c_node, c_text)
|
| 447 |
+
|
| 448 |
+
@cython.final
|
| 449 |
+
@cython.internal
|
| 450 |
+
cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy):
|
| 451 |
+
"""A read-only proxy that allows changing the text/target content of a
|
| 452 |
+
processing instruction.
|
| 453 |
+
"""
|
| 454 |
+
property target:
|
| 455 |
+
def __get__(self):
|
| 456 |
+
self._assertNode()
|
| 457 |
+
return funicode(self._c_node.name)
|
| 458 |
+
|
| 459 |
+
def __set__(self, value):
|
| 460 |
+
self._assertNode()
|
| 461 |
+
value = _utf8(value)
|
| 462 |
+
c_text = _xcstr(value)
|
| 463 |
+
tree.xmlNodeSetName(self._c_node, c_text)
|
| 464 |
+
|
| 465 |
+
@cython.final
|
| 466 |
+
@cython.internal
|
| 467 |
+
cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy):
|
| 468 |
+
"A read-only proxy for entity references (for internal use only!)"
|
| 469 |
+
property name:
|
| 470 |
+
def __get__(self):
|
| 471 |
+
return funicode(self._c_node.name)
|
| 472 |
+
|
| 473 |
+
def __set__(self, value):
|
| 474 |
+
value = _utf8(value)
|
| 475 |
+
assert '&' not in value and ';' not in value, \
|
| 476 |
+
f"Invalid entity name '{value}'"
|
| 477 |
+
c_text = _xcstr(value)
|
| 478 |
+
tree.xmlNodeSetName(self._c_node, c_text)
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
@cython.final
|
| 482 |
+
@cython.internal
|
| 483 |
+
cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy):
|
| 484 |
+
"""A read-only element that allows adding children and changing the
|
| 485 |
+
text content (i.e. everything that adds to the subtree).
|
| 486 |
+
"""
|
| 487 |
+
cpdef append(self, other_element):
|
| 488 |
+
"""Append a copy of an Element to the list of children.
|
| 489 |
+
"""
|
| 490 |
+
cdef xmlNode* c_next
|
| 491 |
+
cdef xmlNode* c_node
|
| 492 |
+
self._assertNode()
|
| 493 |
+
c_node = _roNodeOf(other_element)
|
| 494 |
+
c_node = _copyNodeToDoc(c_node, self._c_node.doc)
|
| 495 |
+
c_next = c_node.next
|
| 496 |
+
tree.xmlAddChild(self._c_node, c_node)
|
| 497 |
+
_moveTail(c_next, c_node)
|
| 498 |
+
|
| 499 |
+
def extend(self, elements):
|
| 500 |
+
"""Append a copy of all Elements from a sequence to the list of
|
| 501 |
+
children.
|
| 502 |
+
"""
|
| 503 |
+
self._assertNode()
|
| 504 |
+
for element in elements:
|
| 505 |
+
self.append(element)
|
| 506 |
+
|
| 507 |
+
property text:
|
| 508 |
+
"""Text before the first subelement. This is either a string or the
|
| 509 |
+
value None, if there was no text.
|
| 510 |
+
"""
|
| 511 |
+
def __get__(self):
|
| 512 |
+
self._assertNode()
|
| 513 |
+
return _collectText(self._c_node.children)
|
| 514 |
+
|
| 515 |
+
def __set__(self, value):
|
| 516 |
+
self._assertNode()
|
| 517 |
+
if isinstance(value, QName):
|
| 518 |
+
value = _resolveQNameText(self, value).decode('utf8')
|
| 519 |
+
_setNodeText(self._c_node, value)
|
| 520 |
+
|
| 521 |
+
|
| 522 |
+
cdef _ReadOnlyProxy _newAppendOnlyProxy(
|
| 523 |
+
_ReadOnlyProxy source_proxy, xmlNode* c_node):
|
| 524 |
+
cdef _ReadOnlyProxy el
|
| 525 |
+
if c_node.type == tree.XML_ELEMENT_NODE:
|
| 526 |
+
el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy)
|
| 527 |
+
elif c_node.type == tree.XML_PI_NODE:
|
| 528 |
+
el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy)
|
| 529 |
+
elif c_node.type == tree.XML_COMMENT_NODE:
|
| 530 |
+
el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy)
|
| 531 |
+
else:
|
| 532 |
+
raise TypeError(f"Unsupported element type: {c_node.type}")
|
| 533 |
+
el._c_node = c_node
|
| 534 |
+
_initReadOnlyProxy(el, source_proxy)
|
| 535 |
+
return el
|
| 536 |
+
|
| 537 |
+
cdef xmlNode* _roNodeOf(element) except NULL:
|
| 538 |
+
cdef xmlNode* c_node
|
| 539 |
+
if isinstance(element, _Element):
|
| 540 |
+
c_node = (<_Element>element)._c_node
|
| 541 |
+
elif isinstance(element, _ReadOnlyProxy):
|
| 542 |
+
c_node = (<_ReadOnlyProxy>element)._c_node
|
| 543 |
+
elif isinstance(element, _OpaqueNodeWrapper):
|
| 544 |
+
c_node = (<_OpaqueNodeWrapper>element)._c_node
|
| 545 |
+
else:
|
| 546 |
+
raise TypeError, f"invalid argument type {type(element)}"
|
| 547 |
+
|
| 548 |
+
if c_node is NULL:
|
| 549 |
+
raise TypeError, "invalid element"
|
| 550 |
+
return c_node
|
| 551 |
+
|
| 552 |
+
cdef xmlNode* _nonRoNodeOf(element) except NULL:
|
| 553 |
+
cdef xmlNode* c_node
|
| 554 |
+
if isinstance(element, _Element):
|
| 555 |
+
c_node = (<_Element>element)._c_node
|
| 556 |
+
elif isinstance(element, _AppendOnlyElementProxy):
|
| 557 |
+
c_node = (<_AppendOnlyElementProxy>element)._c_node
|
| 558 |
+
elif isinstance(element, _OpaqueNodeWrapper):
|
| 559 |
+
c_node = (<_OpaqueNodeWrapper>element)._c_node
|
| 560 |
+
else:
|
| 561 |
+
raise TypeError, f"invalid argument type {type(element)}"
|
| 562 |
+
|
| 563 |
+
if c_node is NULL:
|
| 564 |
+
raise TypeError, "invalid element"
|
| 565 |
+
return c_node
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi
ADDED
|
@@ -0,0 +1,875 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SAX-like interfaces
|
| 2 |
+
|
| 3 |
+
class XMLSyntaxAssertionError(XMLSyntaxError, AssertionError):
|
| 4 |
+
"""
|
| 5 |
+
An XMLSyntaxError that additionally inherits from AssertionError for
|
| 6 |
+
ElementTree / backwards compatibility reasons.
|
| 7 |
+
|
| 8 |
+
This class may get replaced by a plain XMLSyntaxError in a future version.
|
| 9 |
+
"""
|
| 10 |
+
def __init__(self, message):
|
| 11 |
+
XMLSyntaxError.__init__(self, message, None, 0, 1)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
ctypedef enum _SaxParserEvents:
|
| 15 |
+
SAX_EVENT_START = 1 << 0
|
| 16 |
+
SAX_EVENT_END = 1 << 1
|
| 17 |
+
SAX_EVENT_DATA = 1 << 2
|
| 18 |
+
SAX_EVENT_DOCTYPE = 1 << 3
|
| 19 |
+
SAX_EVENT_PI = 1 << 4
|
| 20 |
+
SAX_EVENT_COMMENT = 1 << 5
|
| 21 |
+
SAX_EVENT_START_NS = 1 << 6
|
| 22 |
+
SAX_EVENT_END_NS = 1 << 7
|
| 23 |
+
|
| 24 |
+
ctypedef enum _ParseEventFilter:
|
| 25 |
+
PARSE_EVENT_FILTER_START = 1 << 0
|
| 26 |
+
PARSE_EVENT_FILTER_END = 1 << 1
|
| 27 |
+
PARSE_EVENT_FILTER_START_NS = 1 << 2
|
| 28 |
+
PARSE_EVENT_FILTER_END_NS = 1 << 3
|
| 29 |
+
PARSE_EVENT_FILTER_COMMENT = 1 << 4
|
| 30 |
+
PARSE_EVENT_FILTER_PI = 1 << 5
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
cdef int _buildParseEventFilter(events) except -1:
|
| 34 |
+
cdef int event_filter = 0
|
| 35 |
+
for event in events:
|
| 36 |
+
if event == 'start':
|
| 37 |
+
event_filter |= PARSE_EVENT_FILTER_START
|
| 38 |
+
elif event == 'end':
|
| 39 |
+
event_filter |= PARSE_EVENT_FILTER_END
|
| 40 |
+
elif event == 'start-ns':
|
| 41 |
+
event_filter |= PARSE_EVENT_FILTER_START_NS
|
| 42 |
+
elif event == 'end-ns':
|
| 43 |
+
event_filter |= PARSE_EVENT_FILTER_END_NS
|
| 44 |
+
elif event == 'comment':
|
| 45 |
+
event_filter |= PARSE_EVENT_FILTER_COMMENT
|
| 46 |
+
elif event == 'pi':
|
| 47 |
+
event_filter |= PARSE_EVENT_FILTER_PI
|
| 48 |
+
else:
|
| 49 |
+
raise ValueError, f"invalid event name '{event}'"
|
| 50 |
+
return event_filter
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
cdef class _SaxParserTarget:
|
| 54 |
+
cdef int _sax_event_filter
|
| 55 |
+
|
| 56 |
+
cdef _handleSaxStart(self, tag, attrib, nsmap):
|
| 57 |
+
return None
|
| 58 |
+
cdef _handleSaxEnd(self, tag):
|
| 59 |
+
return None
|
| 60 |
+
cdef int _handleSaxData(self, data) except -1:
|
| 61 |
+
return 0
|
| 62 |
+
cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
|
| 63 |
+
return 0
|
| 64 |
+
cdef _handleSaxPi(self, target, data):
|
| 65 |
+
return None
|
| 66 |
+
cdef _handleSaxComment(self, comment):
|
| 67 |
+
return None
|
| 68 |
+
cdef _handleSaxStartNs(self, prefix, uri):
|
| 69 |
+
return None
|
| 70 |
+
cdef _handleSaxEndNs(self, prefix):
|
| 71 |
+
return None
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
#@cython.final
|
| 75 |
+
@cython.internal
|
| 76 |
+
@cython.no_gc_clear # Required because parent class uses it - Cython bug.
|
| 77 |
+
cdef class _SaxParserContext(_ParserContext):
|
| 78 |
+
"""This class maps SAX2 events to parser target events.
|
| 79 |
+
"""
|
| 80 |
+
cdef _SaxParserTarget _target
|
| 81 |
+
cdef _BaseParser _parser
|
| 82 |
+
cdef xmlparser.startElementNsSAX2Func _origSaxStart
|
| 83 |
+
cdef xmlparser.endElementNsSAX2Func _origSaxEnd
|
| 84 |
+
cdef xmlparser.startElementSAXFunc _origSaxStartNoNs
|
| 85 |
+
cdef xmlparser.endElementSAXFunc _origSaxEndNoNs
|
| 86 |
+
cdef xmlparser.charactersSAXFunc _origSaxData
|
| 87 |
+
cdef xmlparser.cdataBlockSAXFunc _origSaxCData
|
| 88 |
+
cdef xmlparser.internalSubsetSAXFunc _origSaxDoctype
|
| 89 |
+
cdef xmlparser.commentSAXFunc _origSaxComment
|
| 90 |
+
cdef xmlparser.processingInstructionSAXFunc _origSaxPI
|
| 91 |
+
cdef xmlparser.startDocumentSAXFunc _origSaxStartDocument
|
| 92 |
+
|
| 93 |
+
# for event collecting
|
| 94 |
+
cdef int _event_filter
|
| 95 |
+
cdef list _ns_stack
|
| 96 |
+
cdef list _node_stack
|
| 97 |
+
cdef _ParseEventsIterator events_iterator
|
| 98 |
+
|
| 99 |
+
# for iterparse
|
| 100 |
+
cdef _Element _root
|
| 101 |
+
cdef _MultiTagMatcher _matcher
|
| 102 |
+
|
| 103 |
+
def __cinit__(self, _BaseParser parser):
|
| 104 |
+
self._ns_stack = []
|
| 105 |
+
self._node_stack = []
|
| 106 |
+
self._parser = parser
|
| 107 |
+
self.events_iterator = _ParseEventsIterator()
|
| 108 |
+
|
| 109 |
+
cdef void _setSaxParserTarget(self, _SaxParserTarget target) noexcept:
|
| 110 |
+
self._target = target
|
| 111 |
+
|
| 112 |
+
cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
|
| 113 |
+
_ParserContext._initParserContext(self, c_ctxt)
|
| 114 |
+
if self._target is not None:
|
| 115 |
+
self._connectTarget(c_ctxt)
|
| 116 |
+
elif self._event_filter:
|
| 117 |
+
self._connectEvents(c_ctxt)
|
| 118 |
+
|
| 119 |
+
cdef void _connectTarget(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
|
| 120 |
+
"""Wrap original SAX2 callbacks to call into parser target.
|
| 121 |
+
"""
|
| 122 |
+
sax = c_ctxt.sax
|
| 123 |
+
self._origSaxStart = sax.startElementNs = NULL
|
| 124 |
+
self._origSaxStartNoNs = sax.startElement = NULL
|
| 125 |
+
if self._target._sax_event_filter & (SAX_EVENT_START |
|
| 126 |
+
SAX_EVENT_START_NS |
|
| 127 |
+
SAX_EVENT_END_NS):
|
| 128 |
+
# intercept => overwrite orig callback
|
| 129 |
+
# FIXME: also intercept on when collecting END events
|
| 130 |
+
if sax.initialized == xmlparser.XML_SAX2_MAGIC:
|
| 131 |
+
sax.startElementNs = _handleSaxTargetStart
|
| 132 |
+
if self._target._sax_event_filter & SAX_EVENT_START:
|
| 133 |
+
sax.startElement = _handleSaxTargetStartNoNs
|
| 134 |
+
|
| 135 |
+
self._origSaxEnd = sax.endElementNs = NULL
|
| 136 |
+
self._origSaxEndNoNs = sax.endElement = NULL
|
| 137 |
+
if self._target._sax_event_filter & (SAX_EVENT_END |
|
| 138 |
+
SAX_EVENT_END_NS):
|
| 139 |
+
if sax.initialized == xmlparser.XML_SAX2_MAGIC:
|
| 140 |
+
sax.endElementNs = _handleSaxEnd
|
| 141 |
+
if self._target._sax_event_filter & SAX_EVENT_END:
|
| 142 |
+
sax.endElement = _handleSaxEndNoNs
|
| 143 |
+
|
| 144 |
+
self._origSaxData = sax.characters = sax.cdataBlock = NULL
|
| 145 |
+
if self._target._sax_event_filter & SAX_EVENT_DATA:
|
| 146 |
+
sax.characters = sax.cdataBlock = _handleSaxData
|
| 147 |
+
|
| 148 |
+
# doctype propagation is always required for entity replacement
|
| 149 |
+
self._origSaxDoctype = sax.internalSubset
|
| 150 |
+
if self._target._sax_event_filter & SAX_EVENT_DOCTYPE:
|
| 151 |
+
sax.internalSubset = _handleSaxTargetDoctype
|
| 152 |
+
|
| 153 |
+
self._origSaxPI = sax.processingInstruction = NULL
|
| 154 |
+
if self._target._sax_event_filter & SAX_EVENT_PI:
|
| 155 |
+
sax.processingInstruction = _handleSaxTargetPI
|
| 156 |
+
|
| 157 |
+
self._origSaxComment = sax.comment = NULL
|
| 158 |
+
if self._target._sax_event_filter & SAX_EVENT_COMMENT:
|
| 159 |
+
sax.comment = _handleSaxTargetComment
|
| 160 |
+
|
| 161 |
+
# enforce entity replacement
|
| 162 |
+
sax.reference = NULL
|
| 163 |
+
c_ctxt.replaceEntities = 1
|
| 164 |
+
|
| 165 |
+
cdef void _connectEvents(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
|
| 166 |
+
"""Wrap original SAX2 callbacks to collect parse events without parser target.
|
| 167 |
+
"""
|
| 168 |
+
sax = c_ctxt.sax
|
| 169 |
+
self._origSaxStartDocument = sax.startDocument
|
| 170 |
+
sax.startDocument = _handleSaxStartDocument
|
| 171 |
+
|
| 172 |
+
# only override "start" event handler if needed
|
| 173 |
+
self._origSaxStart = sax.startElementNs
|
| 174 |
+
if self._event_filter == 0 or c_ctxt.html or \
|
| 175 |
+
self._event_filter & (PARSE_EVENT_FILTER_START |
|
| 176 |
+
PARSE_EVENT_FILTER_END |
|
| 177 |
+
PARSE_EVENT_FILTER_START_NS |
|
| 178 |
+
PARSE_EVENT_FILTER_END_NS):
|
| 179 |
+
sax.startElementNs = <xmlparser.startElementNsSAX2Func>_handleSaxStart
|
| 180 |
+
|
| 181 |
+
self._origSaxStartNoNs = sax.startElement
|
| 182 |
+
if self._event_filter == 0 or c_ctxt.html or \
|
| 183 |
+
self._event_filter & (PARSE_EVENT_FILTER_START |
|
| 184 |
+
PARSE_EVENT_FILTER_END):
|
| 185 |
+
sax.startElement = <xmlparser.startElementSAXFunc>_handleSaxStartNoNs
|
| 186 |
+
|
| 187 |
+
# only override "end" event handler if needed
|
| 188 |
+
self._origSaxEnd = sax.endElementNs
|
| 189 |
+
if self._event_filter == 0 or \
|
| 190 |
+
self._event_filter & (PARSE_EVENT_FILTER_END |
|
| 191 |
+
PARSE_EVENT_FILTER_END_NS):
|
| 192 |
+
sax.endElementNs = <xmlparser.endElementNsSAX2Func>_handleSaxEnd
|
| 193 |
+
|
| 194 |
+
self._origSaxEndNoNs = sax.endElement
|
| 195 |
+
if self._event_filter == 0 or \
|
| 196 |
+
self._event_filter & PARSE_EVENT_FILTER_END:
|
| 197 |
+
sax.endElement = <xmlparser.endElementSAXFunc>_handleSaxEndNoNs
|
| 198 |
+
|
| 199 |
+
self._origSaxComment = sax.comment
|
| 200 |
+
if self._event_filter & PARSE_EVENT_FILTER_COMMENT:
|
| 201 |
+
sax.comment = <xmlparser.commentSAXFunc>_handleSaxComment
|
| 202 |
+
|
| 203 |
+
self._origSaxPI = sax.processingInstruction
|
| 204 |
+
if self._event_filter & PARSE_EVENT_FILTER_PI:
|
| 205 |
+
sax.processingInstruction = <xmlparser.processingInstructionSAXFunc>_handleSaxPIEvent
|
| 206 |
+
|
| 207 |
+
cdef _setEventFilter(self, events, tag):
|
| 208 |
+
self._event_filter = _buildParseEventFilter(events)
|
| 209 |
+
if not self._event_filter or tag is None or tag == '*':
|
| 210 |
+
self._matcher = None
|
| 211 |
+
else:
|
| 212 |
+
self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
|
| 213 |
+
|
| 214 |
+
cdef int startDocument(self, xmlDoc* c_doc) except -1:
|
| 215 |
+
try:
|
| 216 |
+
self._doc = _documentFactory(c_doc, self._parser)
|
| 217 |
+
finally:
|
| 218 |
+
self._parser = None # clear circular reference ASAP
|
| 219 |
+
if self._matcher is not None:
|
| 220 |
+
self._matcher.cacheTags(self._doc, force_into_dict=True)
|
| 221 |
+
return 0
|
| 222 |
+
|
| 223 |
+
cdef int pushEvent(self, event, xmlNode* c_node) except -1:
|
| 224 |
+
cdef _Element root
|
| 225 |
+
if self._root is None:
|
| 226 |
+
root = self._doc.getroot()
|
| 227 |
+
if root is not None and root._c_node.type == tree.XML_ELEMENT_NODE:
|
| 228 |
+
self._root = root
|
| 229 |
+
node = _elementFactory(self._doc, c_node)
|
| 230 |
+
self.events_iterator._events.append( (event, node) )
|
| 231 |
+
return 0
|
| 232 |
+
|
| 233 |
+
cdef int flushEvents(self) except -1:
|
| 234 |
+
events = self.events_iterator._events
|
| 235 |
+
while self._node_stack:
|
| 236 |
+
events.append( ('end', self._node_stack.pop()) )
|
| 237 |
+
_pushSaxNsEndEvents(self)
|
| 238 |
+
while self._ns_stack:
|
| 239 |
+
_pushSaxNsEndEvents(self)
|
| 240 |
+
|
| 241 |
+
cdef void _handleSaxException(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
|
| 242 |
+
if c_ctxt.errNo == xmlerror.XML_ERR_OK:
|
| 243 |
+
c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
|
| 244 |
+
# stop parsing immediately
|
| 245 |
+
c_ctxt.wellFormed = 0
|
| 246 |
+
c_ctxt.disableSAX = 1
|
| 247 |
+
c_ctxt.instate = xmlparser.XML_PARSER_EOF
|
| 248 |
+
self._store_raised()
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
@cython.final
|
| 252 |
+
@cython.internal
|
| 253 |
+
cdef class _ParseEventsIterator:
|
| 254 |
+
"""A reusable parse events iterator"""
|
| 255 |
+
cdef list _events
|
| 256 |
+
cdef int _event_index
|
| 257 |
+
|
| 258 |
+
def __cinit__(self):
|
| 259 |
+
self._events = []
|
| 260 |
+
self._event_index = 0
|
| 261 |
+
|
| 262 |
+
def __iter__(self):
|
| 263 |
+
return self
|
| 264 |
+
|
| 265 |
+
def __next__(self):
|
| 266 |
+
cdef int event_index = self._event_index
|
| 267 |
+
events = self._events
|
| 268 |
+
if event_index >= 2**10 or event_index * 2 >= len(events):
|
| 269 |
+
if event_index:
|
| 270 |
+
# clean up from time to time
|
| 271 |
+
del events[:event_index]
|
| 272 |
+
self._event_index = event_index = 0
|
| 273 |
+
if event_index >= len(events):
|
| 274 |
+
raise StopIteration
|
| 275 |
+
item = events[event_index]
|
| 276 |
+
self._event_index = event_index + 1
|
| 277 |
+
return item
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
cdef list _build_prefix_uri_list(_SaxParserContext context, int c_nb_namespaces,
|
| 281 |
+
const_xmlChar** c_namespaces):
|
| 282 |
+
"Build [(prefix, uri)] list of declared namespaces."
|
| 283 |
+
cdef int i
|
| 284 |
+
namespaces = []
|
| 285 |
+
for i in xrange(c_nb_namespaces):
|
| 286 |
+
namespaces.append((funicodeOrEmpty(c_namespaces[0]), funicode(c_namespaces[1])))
|
| 287 |
+
c_namespaces += 2
|
| 288 |
+
return namespaces
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
cdef void _handleSaxStart(
|
| 292 |
+
void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix,
|
| 293 |
+
const_xmlChar* c_namespace, int c_nb_namespaces,
|
| 294 |
+
const_xmlChar** c_namespaces,
|
| 295 |
+
int c_nb_attributes, int c_nb_defaulted,
|
| 296 |
+
const_xmlChar** c_attributes) noexcept with gil:
|
| 297 |
+
cdef int i
|
| 298 |
+
cdef size_t c_len
|
| 299 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 300 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 301 |
+
return
|
| 302 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 303 |
+
cdef int event_filter = context._event_filter
|
| 304 |
+
try:
|
| 305 |
+
if (c_nb_namespaces and
|
| 306 |
+
event_filter & (PARSE_EVENT_FILTER_START_NS |
|
| 307 |
+
PARSE_EVENT_FILTER_END_NS)):
|
| 308 |
+
declared_namespaces = _build_prefix_uri_list(
|
| 309 |
+
context, c_nb_namespaces, c_namespaces)
|
| 310 |
+
if event_filter & PARSE_EVENT_FILTER_START_NS:
|
| 311 |
+
for prefix_uri_tuple in declared_namespaces:
|
| 312 |
+
context.events_iterator._events.append(("start-ns", prefix_uri_tuple))
|
| 313 |
+
else:
|
| 314 |
+
declared_namespaces = None
|
| 315 |
+
|
| 316 |
+
context._origSaxStart(c_ctxt, c_localname, c_prefix, c_namespace,
|
| 317 |
+
c_nb_namespaces, c_namespaces, c_nb_attributes,
|
| 318 |
+
c_nb_defaulted, c_attributes)
|
| 319 |
+
if c_ctxt.html:
|
| 320 |
+
_fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node)
|
| 321 |
+
# The HTML parser in libxml2 reports the missing opening tags when it finds
|
| 322 |
+
# misplaced ones, but with tag names from C string constants that ignore the
|
| 323 |
+
# parser dict. Thus, we need to intern the name ourselves.
|
| 324 |
+
c_localname = tree.xmlDictLookup(c_ctxt.dict, c_localname, -1)
|
| 325 |
+
if c_localname is NULL:
|
| 326 |
+
raise MemoryError()
|
| 327 |
+
|
| 328 |
+
if event_filter & PARSE_EVENT_FILTER_END_NS:
|
| 329 |
+
context._ns_stack.append(declared_namespaces)
|
| 330 |
+
if event_filter & (PARSE_EVENT_FILTER_END |
|
| 331 |
+
PARSE_EVENT_FILTER_START):
|
| 332 |
+
_pushSaxStartEvent(context, c_ctxt, c_namespace, c_localname, None)
|
| 333 |
+
except:
|
| 334 |
+
context._handleSaxException(c_ctxt)
|
| 335 |
+
finally:
|
| 336 |
+
return # swallow any further exceptions
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
cdef void _handleSaxTargetStart(
|
| 340 |
+
void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix,
|
| 341 |
+
const_xmlChar* c_namespace, int c_nb_namespaces,
|
| 342 |
+
const_xmlChar** c_namespaces,
|
| 343 |
+
int c_nb_attributes, int c_nb_defaulted,
|
| 344 |
+
const_xmlChar** c_attributes) noexcept with gil:
|
| 345 |
+
cdef int i
|
| 346 |
+
cdef size_t c_len
|
| 347 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 348 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 349 |
+
return
|
| 350 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 351 |
+
|
| 352 |
+
cdef int event_filter = context._event_filter
|
| 353 |
+
cdef int sax_event_filter = context._target._sax_event_filter
|
| 354 |
+
try:
|
| 355 |
+
if c_nb_namespaces:
|
| 356 |
+
declared_namespaces = _build_prefix_uri_list(
|
| 357 |
+
context, c_nb_namespaces, c_namespaces)
|
| 358 |
+
|
| 359 |
+
if event_filter & PARSE_EVENT_FILTER_START_NS:
|
| 360 |
+
for prefix_uri_tuple in declared_namespaces:
|
| 361 |
+
context.events_iterator._events.append(("start-ns", prefix_uri_tuple))
|
| 362 |
+
|
| 363 |
+
if sax_event_filter & SAX_EVENT_START_NS:
|
| 364 |
+
for prefix, uri in declared_namespaces:
|
| 365 |
+
context._target._handleSaxStartNs(prefix, uri)
|
| 366 |
+
else:
|
| 367 |
+
declared_namespaces = None
|
| 368 |
+
|
| 369 |
+
if sax_event_filter & SAX_EVENT_START:
|
| 370 |
+
if c_nb_defaulted > 0:
|
| 371 |
+
# only add default attributes if we asked for them
|
| 372 |
+
if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0:
|
| 373 |
+
c_nb_attributes -= c_nb_defaulted
|
| 374 |
+
if c_nb_attributes == 0:
|
| 375 |
+
attrib = IMMUTABLE_EMPTY_MAPPING
|
| 376 |
+
else:
|
| 377 |
+
attrib = {}
|
| 378 |
+
for i in xrange(c_nb_attributes):
|
| 379 |
+
name = _namespacedNameFromNsName(
|
| 380 |
+
c_attributes[2], c_attributes[0])
|
| 381 |
+
if c_attributes[3] is NULL:
|
| 382 |
+
value = ''
|
| 383 |
+
else:
|
| 384 |
+
c_len = c_attributes[4] - c_attributes[3]
|
| 385 |
+
value = c_attributes[3][:c_len].decode('utf8')
|
| 386 |
+
attrib[name] = value
|
| 387 |
+
c_attributes += 5
|
| 388 |
+
|
| 389 |
+
nsmap = dict(declared_namespaces) if c_nb_namespaces else IMMUTABLE_EMPTY_MAPPING
|
| 390 |
+
|
| 391 |
+
element = _callTargetSaxStart(
|
| 392 |
+
context, c_ctxt,
|
| 393 |
+
_namespacedNameFromNsName(c_namespace, c_localname),
|
| 394 |
+
attrib, nsmap)
|
| 395 |
+
else:
|
| 396 |
+
element = None
|
| 397 |
+
|
| 398 |
+
if (event_filter & PARSE_EVENT_FILTER_END_NS or
|
| 399 |
+
sax_event_filter & SAX_EVENT_END_NS):
|
| 400 |
+
context._ns_stack.append(declared_namespaces)
|
| 401 |
+
if event_filter & (PARSE_EVENT_FILTER_END |
|
| 402 |
+
PARSE_EVENT_FILTER_START):
|
| 403 |
+
_pushSaxStartEvent(context, c_ctxt, c_namespace,
|
| 404 |
+
c_localname, element)
|
| 405 |
+
except:
|
| 406 |
+
context._handleSaxException(c_ctxt)
|
| 407 |
+
finally:
|
| 408 |
+
return # swallow any further exceptions
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
cdef void _handleSaxStartNoNs(void* ctxt, const_xmlChar* c_name,
|
| 412 |
+
const_xmlChar** c_attributes) noexcept with gil:
|
| 413 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 414 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 415 |
+
return
|
| 416 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 417 |
+
try:
|
| 418 |
+
context._origSaxStartNoNs(c_ctxt, c_name, c_attributes)
|
| 419 |
+
if c_ctxt.html:
|
| 420 |
+
_fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node)
|
| 421 |
+
# The HTML parser in libxml2 reports the missing opening tags when it finds
|
| 422 |
+
# misplaced ones, but with tag names from C string constants that ignore the
|
| 423 |
+
# parser dict. Thus, we need to intern the name ourselves.
|
| 424 |
+
c_name = tree.xmlDictLookup(c_ctxt.dict, c_name, -1)
|
| 425 |
+
if c_name is NULL:
|
| 426 |
+
raise MemoryError()
|
| 427 |
+
if context._event_filter & (PARSE_EVENT_FILTER_END |
|
| 428 |
+
PARSE_EVENT_FILTER_START):
|
| 429 |
+
_pushSaxStartEvent(context, c_ctxt, NULL, c_name, None)
|
| 430 |
+
except:
|
| 431 |
+
context._handleSaxException(c_ctxt)
|
| 432 |
+
finally:
|
| 433 |
+
return # swallow any further exceptions
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
cdef void _handleSaxTargetStartNoNs(void* ctxt, const_xmlChar* c_name,
|
| 437 |
+
const_xmlChar** c_attributes) noexcept with gil:
|
| 438 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 439 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 440 |
+
return
|
| 441 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 442 |
+
try:
|
| 443 |
+
if c_attributes is NULL:
|
| 444 |
+
attrib = IMMUTABLE_EMPTY_MAPPING
|
| 445 |
+
else:
|
| 446 |
+
attrib = {}
|
| 447 |
+
while c_attributes[0] is not NULL:
|
| 448 |
+
name = funicode(c_attributes[0])
|
| 449 |
+
attrib[name] = funicodeOrEmpty(c_attributes[1])
|
| 450 |
+
c_attributes += 2
|
| 451 |
+
element = _callTargetSaxStart(
|
| 452 |
+
context, c_ctxt, funicode(c_name),
|
| 453 |
+
attrib, IMMUTABLE_EMPTY_MAPPING)
|
| 454 |
+
if context._event_filter & (PARSE_EVENT_FILTER_END |
|
| 455 |
+
PARSE_EVENT_FILTER_START):
|
| 456 |
+
_pushSaxStartEvent(context, c_ctxt, NULL, c_name, element)
|
| 457 |
+
except:
|
| 458 |
+
context._handleSaxException(c_ctxt)
|
| 459 |
+
finally:
|
| 460 |
+
return # swallow any further exceptions
|
| 461 |
+
|
| 462 |
+
|
| 463 |
+
cdef _callTargetSaxStart(_SaxParserContext context,
|
| 464 |
+
xmlparser.xmlParserCtxt* c_ctxt,
|
| 465 |
+
tag, attrib, nsmap):
|
| 466 |
+
element = context._target._handleSaxStart(tag, attrib, nsmap)
|
| 467 |
+
if element is not None and c_ctxt.input is not NULL:
|
| 468 |
+
if isinstance(element, _Element):
|
| 469 |
+
(<_Element>element)._c_node.line = (
|
| 470 |
+
<unsigned short>c_ctxt.input.line
|
| 471 |
+
if c_ctxt.input.line < 65535 else 65535)
|
| 472 |
+
return element
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
cdef int _pushSaxStartEvent(_SaxParserContext context,
|
| 476 |
+
xmlparser.xmlParserCtxt* c_ctxt,
|
| 477 |
+
const_xmlChar* c_href,
|
| 478 |
+
const_xmlChar* c_name, node) except -1:
|
| 479 |
+
if (context._matcher is None or
|
| 480 |
+
context._matcher.matchesNsTag(c_href, c_name)):
|
| 481 |
+
if node is None and context._target is None:
|
| 482 |
+
assert context._doc is not None
|
| 483 |
+
node = _elementFactory(context._doc, c_ctxt.node)
|
| 484 |
+
if context._event_filter & PARSE_EVENT_FILTER_START:
|
| 485 |
+
context.events_iterator._events.append(('start', node))
|
| 486 |
+
if (context._target is None and
|
| 487 |
+
context._event_filter & PARSE_EVENT_FILTER_END):
|
| 488 |
+
context._node_stack.append(node)
|
| 489 |
+
return 0
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
cdef void _handleSaxEnd(void* ctxt, const_xmlChar* c_localname,
|
| 493 |
+
const_xmlChar* c_prefix,
|
| 494 |
+
const_xmlChar* c_namespace) noexcept with gil:
|
| 495 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 496 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 497 |
+
return
|
| 498 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 499 |
+
try:
|
| 500 |
+
if context._target is not None:
|
| 501 |
+
if context._target._sax_event_filter & SAX_EVENT_END:
|
| 502 |
+
node = context._target._handleSaxEnd(
|
| 503 |
+
_namespacedNameFromNsName(c_namespace, c_localname))
|
| 504 |
+
else:
|
| 505 |
+
node = None
|
| 506 |
+
else:
|
| 507 |
+
context._origSaxEnd(c_ctxt, c_localname, c_prefix, c_namespace)
|
| 508 |
+
node = None
|
| 509 |
+
_pushSaxEndEvent(context, c_namespace, c_localname, node)
|
| 510 |
+
_pushSaxNsEndEvents(context)
|
| 511 |
+
except:
|
| 512 |
+
context._handleSaxException(c_ctxt)
|
| 513 |
+
finally:
|
| 514 |
+
return # swallow any further exceptions
|
| 515 |
+
|
| 516 |
+
|
| 517 |
+
cdef void _handleSaxEndNoNs(void* ctxt, const_xmlChar* c_name) noexcept with gil:
|
| 518 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 519 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 520 |
+
return
|
| 521 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 522 |
+
try:
|
| 523 |
+
if context._target is not None:
|
| 524 |
+
node = context._target._handleSaxEnd(funicode(c_name))
|
| 525 |
+
else:
|
| 526 |
+
context._origSaxEndNoNs(c_ctxt, c_name)
|
| 527 |
+
node = None
|
| 528 |
+
_pushSaxEndEvent(context, NULL, c_name, node)
|
| 529 |
+
except:
|
| 530 |
+
context._handleSaxException(c_ctxt)
|
| 531 |
+
finally:
|
| 532 |
+
return # swallow any further exceptions
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
cdef int _pushSaxNsEndEvents(_SaxParserContext context) except -1:
|
| 536 |
+
cdef bint build_events = context._event_filter & PARSE_EVENT_FILTER_END_NS
|
| 537 |
+
cdef bint call_target = (
|
| 538 |
+
context._target is not None
|
| 539 |
+
and context._target._sax_event_filter & SAX_EVENT_END_NS)
|
| 540 |
+
if not build_events and not call_target:
|
| 541 |
+
return 0
|
| 542 |
+
|
| 543 |
+
cdef list declared_namespaces = context._ns_stack.pop()
|
| 544 |
+
if declared_namespaces is None:
|
| 545 |
+
return 0
|
| 546 |
+
|
| 547 |
+
cdef tuple prefix_uri
|
| 548 |
+
for prefix_uri in reversed(declared_namespaces):
|
| 549 |
+
if call_target:
|
| 550 |
+
context._target._handleSaxEndNs(prefix_uri[0])
|
| 551 |
+
if build_events:
|
| 552 |
+
context.events_iterator._events.append(('end-ns', None))
|
| 553 |
+
|
| 554 |
+
return 0
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
cdef int _pushSaxEndEvent(_SaxParserContext context,
|
| 558 |
+
const_xmlChar* c_href,
|
| 559 |
+
const_xmlChar* c_name, node) except -1:
|
| 560 |
+
if context._event_filter & PARSE_EVENT_FILTER_END:
|
| 561 |
+
if (context._matcher is None or
|
| 562 |
+
context._matcher.matchesNsTag(c_href, c_name)):
|
| 563 |
+
if context._target is None:
|
| 564 |
+
node = context._node_stack.pop()
|
| 565 |
+
context.events_iterator._events.append(('end', node))
|
| 566 |
+
return 0
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
cdef void _handleSaxData(void* ctxt, const_xmlChar* c_data, int data_len) noexcept with gil:
|
| 570 |
+
# can only be called if parsing with a target
|
| 571 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 572 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 573 |
+
return
|
| 574 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 575 |
+
try:
|
| 576 |
+
context._target._handleSaxData(
|
| 577 |
+
c_data[:data_len].decode('utf8'))
|
| 578 |
+
except:
|
| 579 |
+
context._handleSaxException(c_ctxt)
|
| 580 |
+
finally:
|
| 581 |
+
return # swallow any further exceptions
|
| 582 |
+
|
| 583 |
+
|
| 584 |
+
cdef void _handleSaxTargetDoctype(void* ctxt, const_xmlChar* c_name,
|
| 585 |
+
const_xmlChar* c_public,
|
| 586 |
+
const_xmlChar* c_system) noexcept with gil:
|
| 587 |
+
# can only be called if parsing with a target
|
| 588 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 589 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 590 |
+
return
|
| 591 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 592 |
+
try:
|
| 593 |
+
context._target._handleSaxDoctype(
|
| 594 |
+
funicodeOrNone(c_name),
|
| 595 |
+
funicodeOrNone(c_public),
|
| 596 |
+
funicodeOrNone(c_system))
|
| 597 |
+
except:
|
| 598 |
+
context._handleSaxException(c_ctxt)
|
| 599 |
+
finally:
|
| 600 |
+
return # swallow any further exceptions
|
| 601 |
+
|
| 602 |
+
|
| 603 |
+
cdef void _handleSaxStartDocument(void* ctxt) noexcept with gil:
|
| 604 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 605 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 606 |
+
return
|
| 607 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 608 |
+
context._origSaxStartDocument(ctxt)
|
| 609 |
+
c_doc = c_ctxt.myDoc
|
| 610 |
+
try:
|
| 611 |
+
context.startDocument(c_doc)
|
| 612 |
+
except:
|
| 613 |
+
context._handleSaxException(c_ctxt)
|
| 614 |
+
finally:
|
| 615 |
+
return # swallow any further exceptions
|
| 616 |
+
|
| 617 |
+
|
| 618 |
+
cdef void _handleSaxTargetPI(void* ctxt, const_xmlChar* c_target,
|
| 619 |
+
const_xmlChar* c_data) noexcept with gil:
|
| 620 |
+
# can only be called if parsing with a target
|
| 621 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 622 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 623 |
+
return
|
| 624 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 625 |
+
try:
|
| 626 |
+
pi = context._target._handleSaxPi(
|
| 627 |
+
funicodeOrNone(c_target),
|
| 628 |
+
funicodeOrEmpty(c_data))
|
| 629 |
+
if context._event_filter & PARSE_EVENT_FILTER_PI:
|
| 630 |
+
context.events_iterator._events.append(('pi', pi))
|
| 631 |
+
except:
|
| 632 |
+
context._handleSaxException(c_ctxt)
|
| 633 |
+
finally:
|
| 634 |
+
return # swallow any further exceptions
|
| 635 |
+
|
| 636 |
+
|
| 637 |
+
cdef void _handleSaxPIEvent(void* ctxt, const_xmlChar* target,
|
| 638 |
+
const_xmlChar* data) noexcept with gil:
|
| 639 |
+
# can only be called when collecting pi events
|
| 640 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 641 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 642 |
+
return
|
| 643 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 644 |
+
context._origSaxPI(ctxt, target, data)
|
| 645 |
+
c_node = _findLastEventNode(c_ctxt)
|
| 646 |
+
if c_node is NULL:
|
| 647 |
+
return
|
| 648 |
+
try:
|
| 649 |
+
context.pushEvent('pi', c_node)
|
| 650 |
+
except:
|
| 651 |
+
context._handleSaxException(c_ctxt)
|
| 652 |
+
finally:
|
| 653 |
+
return # swallow any further exceptions
|
| 654 |
+
|
| 655 |
+
|
| 656 |
+
cdef void _handleSaxTargetComment(void* ctxt, const_xmlChar* c_data) noexcept with gil:
|
| 657 |
+
# can only be called if parsing with a target
|
| 658 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 659 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 660 |
+
return
|
| 661 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 662 |
+
try:
|
| 663 |
+
comment = context._target._handleSaxComment(funicodeOrEmpty(c_data))
|
| 664 |
+
if context._event_filter & PARSE_EVENT_FILTER_COMMENT:
|
| 665 |
+
context.events_iterator._events.append(('comment', comment))
|
| 666 |
+
except:
|
| 667 |
+
context._handleSaxException(c_ctxt)
|
| 668 |
+
finally:
|
| 669 |
+
return # swallow any further exceptions
|
| 670 |
+
|
| 671 |
+
|
| 672 |
+
cdef void _handleSaxComment(void* ctxt, const_xmlChar* text) noexcept with gil:
|
| 673 |
+
# can only be called when collecting comment events
|
| 674 |
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
| 675 |
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
| 676 |
+
return
|
| 677 |
+
context = <_SaxParserContext>c_ctxt._private
|
| 678 |
+
context._origSaxComment(ctxt, text)
|
| 679 |
+
c_node = _findLastEventNode(c_ctxt)
|
| 680 |
+
if c_node is NULL:
|
| 681 |
+
return
|
| 682 |
+
try:
|
| 683 |
+
context.pushEvent('comment', c_node)
|
| 684 |
+
except:
|
| 685 |
+
context._handleSaxException(c_ctxt)
|
| 686 |
+
finally:
|
| 687 |
+
return # swallow any further exceptions
|
| 688 |
+
|
| 689 |
+
|
| 690 |
+
cdef inline xmlNode* _findLastEventNode(xmlparser.xmlParserCtxt* c_ctxt):
|
| 691 |
+
# this mimics what libxml2 creates for comments/PIs
|
| 692 |
+
if c_ctxt.inSubset == 1:
|
| 693 |
+
return c_ctxt.myDoc.intSubset.last
|
| 694 |
+
elif c_ctxt.inSubset == 2:
|
| 695 |
+
return c_ctxt.myDoc.extSubset.last
|
| 696 |
+
elif c_ctxt.node is NULL:
|
| 697 |
+
return c_ctxt.myDoc.last
|
| 698 |
+
elif c_ctxt.node.type == tree.XML_ELEMENT_NODE:
|
| 699 |
+
return c_ctxt.node.last
|
| 700 |
+
else:
|
| 701 |
+
return c_ctxt.node.next
|
| 702 |
+
|
| 703 |
+
|
| 704 |
+
############################################################
|
| 705 |
+
## ET compatible XML tree builder
|
| 706 |
+
############################################################
|
| 707 |
+
|
| 708 |
+
cdef class TreeBuilder(_SaxParserTarget):
|
| 709 |
+
"""TreeBuilder(self, element_factory=None, parser=None,
|
| 710 |
+
comment_factory=None, pi_factory=None,
|
| 711 |
+
insert_comments=True, insert_pis=True)
|
| 712 |
+
|
| 713 |
+
Parser target that builds a tree from parse event callbacks.
|
| 714 |
+
|
| 715 |
+
The factory arguments can be used to influence the creation of
|
| 716 |
+
elements, comments and processing instructions.
|
| 717 |
+
|
| 718 |
+
By default, comments and processing instructions are inserted into
|
| 719 |
+
the tree, but they can be ignored by passing the respective flags.
|
| 720 |
+
|
| 721 |
+
The final tree is returned by the ``close()`` method.
|
| 722 |
+
"""
|
| 723 |
+
cdef _BaseParser _parser
|
| 724 |
+
cdef object _factory
|
| 725 |
+
cdef object _comment_factory
|
| 726 |
+
cdef object _pi_factory
|
| 727 |
+
cdef list _data
|
| 728 |
+
cdef list _element_stack
|
| 729 |
+
cdef object _element_stack_pop
|
| 730 |
+
cdef _Element _last # may be None
|
| 731 |
+
cdef bint _in_tail
|
| 732 |
+
cdef bint _insert_comments
|
| 733 |
+
cdef bint _insert_pis
|
| 734 |
+
|
| 735 |
+
def __init__(self, *, element_factory=None, parser=None,
|
| 736 |
+
comment_factory=None, pi_factory=None,
|
| 737 |
+
bint insert_comments=True, bint insert_pis=True):
|
| 738 |
+
self._sax_event_filter = \
|
| 739 |
+
SAX_EVENT_START | SAX_EVENT_END | SAX_EVENT_DATA | \
|
| 740 |
+
SAX_EVENT_PI | SAX_EVENT_COMMENT
|
| 741 |
+
self._data = [] # data collector
|
| 742 |
+
self._element_stack = [] # element stack
|
| 743 |
+
self._element_stack_pop = self._element_stack.pop
|
| 744 |
+
self._last = None # last element
|
| 745 |
+
self._in_tail = 0 # true if we're after an end tag
|
| 746 |
+
self._factory = element_factory
|
| 747 |
+
self._comment_factory = comment_factory if comment_factory is not None else Comment
|
| 748 |
+
self._pi_factory = pi_factory if pi_factory is not None else ProcessingInstruction
|
| 749 |
+
self._insert_comments = insert_comments
|
| 750 |
+
self._insert_pis = insert_pis
|
| 751 |
+
self._parser = parser
|
| 752 |
+
|
| 753 |
+
@cython.final
|
| 754 |
+
cdef int _flush(self) except -1:
|
| 755 |
+
if self._data:
|
| 756 |
+
if self._last is not None:
|
| 757 |
+
text = "".join(self._data)
|
| 758 |
+
if self._in_tail:
|
| 759 |
+
assert self._last.tail is None, "internal error (tail)"
|
| 760 |
+
self._last.tail = text
|
| 761 |
+
else:
|
| 762 |
+
assert self._last.text is None, "internal error (text)"
|
| 763 |
+
self._last.text = text
|
| 764 |
+
del self._data[:]
|
| 765 |
+
return 0
|
| 766 |
+
|
| 767 |
+
# internal SAX event handlers
|
| 768 |
+
|
| 769 |
+
@cython.final
|
| 770 |
+
cdef _handleSaxStart(self, tag, attrib, nsmap):
|
| 771 |
+
self._flush()
|
| 772 |
+
if self._factory is not None:
|
| 773 |
+
self._last = self._factory(tag, attrib)
|
| 774 |
+
if self._element_stack:
|
| 775 |
+
_appendChild(self._element_stack[-1], self._last)
|
| 776 |
+
elif self._element_stack:
|
| 777 |
+
self._last = _makeSubElement(
|
| 778 |
+
self._element_stack[-1], tag, None, None, attrib, nsmap, None)
|
| 779 |
+
else:
|
| 780 |
+
self._last = _makeElement(
|
| 781 |
+
tag, NULL, None, self._parser, None, None, attrib, nsmap, None)
|
| 782 |
+
self._element_stack.append(self._last)
|
| 783 |
+
self._in_tail = 0
|
| 784 |
+
return self._last
|
| 785 |
+
|
| 786 |
+
@cython.final
|
| 787 |
+
cdef _handleSaxEnd(self, tag):
|
| 788 |
+
self._flush()
|
| 789 |
+
self._last = self._element_stack_pop()
|
| 790 |
+
self._in_tail = 1
|
| 791 |
+
return self._last
|
| 792 |
+
|
| 793 |
+
@cython.final
|
| 794 |
+
cdef int _handleSaxData(self, data) except -1:
|
| 795 |
+
self._data.append(data)
|
| 796 |
+
|
| 797 |
+
@cython.final
|
| 798 |
+
cdef _handleSaxPi(self, target, data):
|
| 799 |
+
elem = self._pi_factory(target, data)
|
| 800 |
+
if self._insert_pis:
|
| 801 |
+
self._flush()
|
| 802 |
+
self._last = elem
|
| 803 |
+
if self._element_stack:
|
| 804 |
+
_appendChild(self._element_stack[-1], self._last)
|
| 805 |
+
self._in_tail = 1
|
| 806 |
+
return self._last
|
| 807 |
+
|
| 808 |
+
@cython.final
|
| 809 |
+
cdef _handleSaxComment(self, comment):
|
| 810 |
+
elem = self._comment_factory(comment)
|
| 811 |
+
if self._insert_comments:
|
| 812 |
+
self._flush()
|
| 813 |
+
self._last = elem
|
| 814 |
+
if self._element_stack:
|
| 815 |
+
_appendChild(self._element_stack[-1], self._last)
|
| 816 |
+
self._in_tail = 1
|
| 817 |
+
return elem
|
| 818 |
+
|
| 819 |
+
# Python level event handlers
|
| 820 |
+
|
| 821 |
+
def close(self):
|
| 822 |
+
"""close(self)
|
| 823 |
+
|
| 824 |
+
Flushes the builder buffers, and returns the toplevel document
|
| 825 |
+
element. Raises XMLSyntaxError on inconsistencies.
|
| 826 |
+
"""
|
| 827 |
+
if self._element_stack:
|
| 828 |
+
raise XMLSyntaxAssertionError("missing end tags")
|
| 829 |
+
# TODO: this does not necessarily seem like an error case. Why not just return None?
|
| 830 |
+
if self._last is None:
|
| 831 |
+
raise XMLSyntaxAssertionError("missing toplevel element")
|
| 832 |
+
return self._last
|
| 833 |
+
|
| 834 |
+
def data(self, data):
|
| 835 |
+
"""data(self, data)
|
| 836 |
+
|
| 837 |
+
Adds text to the current element. The value should be either an
|
| 838 |
+
8-bit string containing ASCII text, or a Unicode string.
|
| 839 |
+
"""
|
| 840 |
+
self._handleSaxData(data)
|
| 841 |
+
|
| 842 |
+
def start(self, tag, attrs, nsmap=None):
|
| 843 |
+
"""start(self, tag, attrs, nsmap=None)
|
| 844 |
+
|
| 845 |
+
Opens a new element.
|
| 846 |
+
"""
|
| 847 |
+
if nsmap is None:
|
| 848 |
+
nsmap = IMMUTABLE_EMPTY_MAPPING
|
| 849 |
+
return self._handleSaxStart(tag, attrs, nsmap)
|
| 850 |
+
|
| 851 |
+
def end(self, tag):
|
| 852 |
+
"""end(self, tag)
|
| 853 |
+
|
| 854 |
+
Closes the current element.
|
| 855 |
+
"""
|
| 856 |
+
element = self._handleSaxEnd(tag)
|
| 857 |
+
assert self._last.tag == tag,\
|
| 858 |
+
f"end tag mismatch (expected {self._last.tag}, got {tag})"
|
| 859 |
+
return element
|
| 860 |
+
|
| 861 |
+
def pi(self, target, data=None):
|
| 862 |
+
"""pi(self, target, data=None)
|
| 863 |
+
|
| 864 |
+
Creates a processing instruction using the factory, appends it
|
| 865 |
+
(unless disabled) and returns it.
|
| 866 |
+
"""
|
| 867 |
+
return self._handleSaxPi(target, data)
|
| 868 |
+
|
| 869 |
+
def comment(self, comment):
|
| 870 |
+
"""comment(self, comment)
|
| 871 |
+
|
| 872 |
+
Creates a comment using the factory, appends it (unless disabled)
|
| 873 |
+
and returns it.
|
| 874 |
+
"""
|
| 875 |
+
return self._handleSaxComment(comment)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/usedoctest.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Doctest module for XML comparison.
|
| 2 |
+
|
| 3 |
+
Usage::
|
| 4 |
+
|
| 5 |
+
>>> import lxml.usedoctest
|
| 6 |
+
>>> # now do your XML doctests ...
|
| 7 |
+
|
| 8 |
+
See `lxml.doctestcompare`
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from lxml import doctestcompare
|
| 12 |
+
|
| 13 |
+
doctestcompare.temp_install(del_module=__name__)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xmlid.pxi
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cdef object _find_id_attributes
|
| 2 |
+
|
| 3 |
+
def XMLID(text, parser=None, *, base_url=None):
|
| 4 |
+
"""XMLID(text, parser=None, base_url=None)
|
| 5 |
+
|
| 6 |
+
Parse the text and return a tuple (root node, ID dictionary). The root
|
| 7 |
+
node is the same as returned by the XML() function. The dictionary
|
| 8 |
+
contains string-element pairs. The dictionary keys are the values of 'id'
|
| 9 |
+
attributes. The elements referenced by the ID are stored as dictionary
|
| 10 |
+
values.
|
| 11 |
+
"""
|
| 12 |
+
cdef dict dic
|
| 13 |
+
global _find_id_attributes
|
| 14 |
+
if _find_id_attributes is None:
|
| 15 |
+
_find_id_attributes = XPath('//*[string(@id)]')
|
| 16 |
+
|
| 17 |
+
# ElementTree compatible implementation: parse and look for 'id' attributes
|
| 18 |
+
root = XML(text, parser, base_url=base_url)
|
| 19 |
+
dic = {}
|
| 20 |
+
for elem in _find_id_attributes(root):
|
| 21 |
+
dic[elem.get('id')] = elem
|
| 22 |
+
return root, dic
|
| 23 |
+
|
| 24 |
+
def XMLDTDID(text, parser=None, *, base_url=None):
|
| 25 |
+
"""XMLDTDID(text, parser=None, base_url=None)
|
| 26 |
+
|
| 27 |
+
Parse the text and return a tuple (root node, ID dictionary). The root
|
| 28 |
+
node is the same as returned by the XML() function. The dictionary
|
| 29 |
+
contains string-element pairs. The dictionary keys are the values of ID
|
| 30 |
+
attributes as defined by the DTD. The elements referenced by the ID are
|
| 31 |
+
stored as dictionary values.
|
| 32 |
+
|
| 33 |
+
Note that you must not modify the XML tree if you use the ID dictionary.
|
| 34 |
+
The results are undefined.
|
| 35 |
+
"""
|
| 36 |
+
cdef _Element root
|
| 37 |
+
root = XML(text, parser, base_url=base_url)
|
| 38 |
+
# xml:id spec compatible implementation: use DTD ID attributes from libxml2
|
| 39 |
+
if root._doc._c_doc.ids is NULL:
|
| 40 |
+
return root, {}
|
| 41 |
+
else:
|
| 42 |
+
return root, _IDDict(root)
|
| 43 |
+
|
| 44 |
+
def parseid(source, parser=None, *, base_url=None):
|
| 45 |
+
"""parseid(source, parser=None)
|
| 46 |
+
|
| 47 |
+
Parses the source into a tuple containing an ElementTree object and an
|
| 48 |
+
ID dictionary. If no parser is provided as second argument, the default
|
| 49 |
+
parser is used.
|
| 50 |
+
|
| 51 |
+
Note that you must not modify the XML tree if you use the ID dictionary.
|
| 52 |
+
The results are undefined.
|
| 53 |
+
"""
|
| 54 |
+
cdef _Document doc
|
| 55 |
+
doc = _parseDocument(source, parser, base_url)
|
| 56 |
+
return _elementTreeFactory(doc, None), _IDDict(doc)
|
| 57 |
+
|
| 58 |
+
cdef class _IDDict:
|
| 59 |
+
"""IDDict(self, etree)
|
| 60 |
+
A dictionary-like proxy class that mapps ID attributes to elements.
|
| 61 |
+
|
| 62 |
+
The dictionary must be instantiated with the root element of a parsed XML
|
| 63 |
+
document, otherwise the behaviour is undefined. Elements and XML trees
|
| 64 |
+
that were created or modified 'by hand' are not supported.
|
| 65 |
+
"""
|
| 66 |
+
cdef _Document _doc
|
| 67 |
+
cdef object _keys
|
| 68 |
+
cdef object _items
|
| 69 |
+
def __cinit__(self, etree):
|
| 70 |
+
cdef _Document doc
|
| 71 |
+
doc = _documentOrRaise(etree)
|
| 72 |
+
if doc._c_doc.ids is NULL:
|
| 73 |
+
raise ValueError, "No ID dictionary available."
|
| 74 |
+
self._doc = doc
|
| 75 |
+
self._keys = None
|
| 76 |
+
self._items = None
|
| 77 |
+
|
| 78 |
+
def copy(self):
|
| 79 |
+
return _IDDict(self._doc)
|
| 80 |
+
|
| 81 |
+
def __getitem__(self, id_name):
|
| 82 |
+
cdef tree.xmlHashTable* c_ids
|
| 83 |
+
cdef tree.xmlID* c_id
|
| 84 |
+
cdef xmlAttr* c_attr
|
| 85 |
+
c_ids = self._doc._c_doc.ids
|
| 86 |
+
id_utf = _utf8(id_name)
|
| 87 |
+
c_id = <tree.xmlID*>tree.xmlHashLookup(c_ids, _xcstr(id_utf))
|
| 88 |
+
if c_id is NULL:
|
| 89 |
+
raise KeyError, "key not found."
|
| 90 |
+
c_attr = c_id.attr
|
| 91 |
+
if c_attr is NULL or c_attr.parent is NULL:
|
| 92 |
+
raise KeyError, "ID attribute not found."
|
| 93 |
+
return _elementFactory(self._doc, c_attr.parent)
|
| 94 |
+
|
| 95 |
+
def get(self, id_name):
|
| 96 |
+
return self[id_name]
|
| 97 |
+
|
| 98 |
+
def __contains__(self, id_name):
|
| 99 |
+
cdef tree.xmlID* c_id
|
| 100 |
+
id_utf = _utf8(id_name)
|
| 101 |
+
c_id = <tree.xmlID*>tree.xmlHashLookup(
|
| 102 |
+
self._doc._c_doc.ids, _xcstr(id_utf))
|
| 103 |
+
return c_id is not NULL
|
| 104 |
+
|
| 105 |
+
def has_key(self, id_name):
|
| 106 |
+
return id_name in self
|
| 107 |
+
|
| 108 |
+
def __repr__(self):
|
| 109 |
+
return repr(dict(self))
|
| 110 |
+
|
| 111 |
+
def keys(self):
|
| 112 |
+
if self._keys is None:
|
| 113 |
+
self._keys = self._build_keys()
|
| 114 |
+
return self._keys[:]
|
| 115 |
+
|
| 116 |
+
def __iter__(self):
|
| 117 |
+
if self._keys is None:
|
| 118 |
+
self._keys = self._build_keys()
|
| 119 |
+
return iter(self._keys)
|
| 120 |
+
|
| 121 |
+
def iterkeys(self):
|
| 122 |
+
return self
|
| 123 |
+
|
| 124 |
+
def __len__(self):
|
| 125 |
+
if self._keys is None:
|
| 126 |
+
self._keys = self._build_keys()
|
| 127 |
+
return len(self._keys)
|
| 128 |
+
|
| 129 |
+
def items(self):
|
| 130 |
+
if self._items is None:
|
| 131 |
+
self._items = self._build_items()
|
| 132 |
+
return self._items[:]
|
| 133 |
+
|
| 134 |
+
def iteritems(self):
|
| 135 |
+
if self._items is None:
|
| 136 |
+
self._items = self._build_items()
|
| 137 |
+
return iter(self._items)
|
| 138 |
+
|
| 139 |
+
def values(self):
|
| 140 |
+
cdef list values = []
|
| 141 |
+
if self._items is None:
|
| 142 |
+
self._items = self._build_items()
|
| 143 |
+
for item in self._items:
|
| 144 |
+
value = python.PyTuple_GET_ITEM(item, 1)
|
| 145 |
+
python.Py_INCREF(value)
|
| 146 |
+
values.append(value)
|
| 147 |
+
return values
|
| 148 |
+
|
| 149 |
+
def itervalues(self):
|
| 150 |
+
return iter(self.values())
|
| 151 |
+
|
| 152 |
+
cdef object _build_keys(self):
|
| 153 |
+
keys = []
|
| 154 |
+
tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids,
|
| 155 |
+
<tree.xmlHashScanner>_collectIdHashKeys, <python.PyObject*>keys)
|
| 156 |
+
return keys
|
| 157 |
+
|
| 158 |
+
cdef object _build_items(self):
|
| 159 |
+
items = []
|
| 160 |
+
context = (items, self._doc)
|
| 161 |
+
tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids,
|
| 162 |
+
<tree.xmlHashScanner>_collectIdHashItemList, <python.PyObject*>context)
|
| 163 |
+
return items
|
| 164 |
+
|
| 165 |
+
cdef void _collectIdHashItemList(void* payload, void* context, xmlChar* name) noexcept:
|
| 166 |
+
# collect elements from ID attribute hash table
|
| 167 |
+
cdef list lst
|
| 168 |
+
c_id = <tree.xmlID*>payload
|
| 169 |
+
if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL:
|
| 170 |
+
return
|
| 171 |
+
lst, doc = <tuple>context
|
| 172 |
+
element = _elementFactory(doc, c_id.attr.parent)
|
| 173 |
+
lst.append( (funicode(name), element) )
|
| 174 |
+
|
| 175 |
+
cdef void _collectIdHashKeys(void* payload, void* collect_list, xmlChar* name) noexcept:
|
| 176 |
+
c_id = <tree.xmlID*>payload
|
| 177 |
+
if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL:
|
| 178 |
+
return
|
| 179 |
+
(<list>collect_list).append(funicode(name))
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xmlschema.pxi
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# support for XMLSchema validation
|
| 2 |
+
from lxml.includes cimport xmlschema
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
cdef class XMLSchemaError(LxmlError):
|
| 6 |
+
"""Base class of all XML Schema errors
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
cdef class XMLSchemaParseError(XMLSchemaError):
|
| 10 |
+
"""Error while parsing an XML document as XML Schema.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
cdef class XMLSchemaValidateError(XMLSchemaError):
|
| 14 |
+
"""Error while validating an XML document with an XML Schema.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
################################################################################
|
| 19 |
+
# XMLSchema
|
| 20 |
+
|
| 21 |
+
cdef XPath _check_for_default_attributes = XPath(
|
| 22 |
+
"boolean(//xs:attribute[@default or @fixed][1])",
|
| 23 |
+
namespaces={'xs': 'http://www.w3.org/2001/XMLSchema'})
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
cdef class XMLSchema(_Validator):
|
| 27 |
+
"""XMLSchema(self, etree=None, file=None)
|
| 28 |
+
Turn a document into an XML Schema validator.
|
| 29 |
+
|
| 30 |
+
Either pass a schema as Element or ElementTree, or pass a file or
|
| 31 |
+
filename through the ``file`` keyword argument.
|
| 32 |
+
|
| 33 |
+
Passing the ``attribute_defaults`` boolean option will make the
|
| 34 |
+
schema insert default/fixed attributes into validated documents.
|
| 35 |
+
"""
|
| 36 |
+
cdef xmlschema.xmlSchema* _c_schema
|
| 37 |
+
cdef _Document _doc
|
| 38 |
+
cdef bint _has_default_attributes
|
| 39 |
+
cdef bint _add_attribute_defaults
|
| 40 |
+
|
| 41 |
+
def __cinit__(self):
|
| 42 |
+
self._has_default_attributes = True # play it safe
|
| 43 |
+
self._add_attribute_defaults = False
|
| 44 |
+
|
| 45 |
+
def __init__(self, etree=None, *, file=None, bint attribute_defaults=False):
|
| 46 |
+
cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt
|
| 47 |
+
cdef xmlDoc* c_doc
|
| 48 |
+
|
| 49 |
+
self._add_attribute_defaults = attribute_defaults
|
| 50 |
+
_Validator.__init__(self)
|
| 51 |
+
c_doc = NULL
|
| 52 |
+
if etree is not None:
|
| 53 |
+
doc = _documentOrRaise(etree)
|
| 54 |
+
root_node = _rootNodeOrRaise(etree)
|
| 55 |
+
c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
|
| 56 |
+
self._doc = _documentFactory(c_doc, doc._parser)
|
| 57 |
+
parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(c_doc)
|
| 58 |
+
elif file is not None:
|
| 59 |
+
file = _getFSPathOrObject(file)
|
| 60 |
+
if _isString(file):
|
| 61 |
+
filename = _encodeFilename(file)
|
| 62 |
+
parser_ctxt = xmlschema.xmlSchemaNewParserCtxt(_cstr(filename))
|
| 63 |
+
else:
|
| 64 |
+
self._doc = _parseDocument(file, None, None)
|
| 65 |
+
parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(self._doc._c_doc)
|
| 66 |
+
else:
|
| 67 |
+
raise XMLSchemaParseError, "No tree or file given"
|
| 68 |
+
|
| 69 |
+
if parser_ctxt is NULL:
|
| 70 |
+
raise MemoryError()
|
| 71 |
+
|
| 72 |
+
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
|
| 73 |
+
xmlschema.xmlSchemaSetParserStructuredErrors(
|
| 74 |
+
parser_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
|
| 75 |
+
if self._doc is not None:
|
| 76 |
+
# calling xmlSchemaParse on a schema with imports or
|
| 77 |
+
# includes will cause libxml2 to create an internal
|
| 78 |
+
# context for parsing, so push an implied context to route
|
| 79 |
+
# resolve requests to the document's parser
|
| 80 |
+
__GLOBAL_PARSER_CONTEXT.pushImpliedContextFromParser(self._doc._parser)
|
| 81 |
+
with nogil:
|
| 82 |
+
orig_loader = _register_document_loader()
|
| 83 |
+
self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt)
|
| 84 |
+
_reset_document_loader(orig_loader)
|
| 85 |
+
if self._doc is not None:
|
| 86 |
+
__GLOBAL_PARSER_CONTEXT.popImpliedContext()
|
| 87 |
+
xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt)
|
| 88 |
+
|
| 89 |
+
if self._c_schema is NULL:
|
| 90 |
+
raise XMLSchemaParseError(
|
| 91 |
+
self._error_log._buildExceptionMessage(
|
| 92 |
+
"Document is not valid XML Schema"),
|
| 93 |
+
self._error_log)
|
| 94 |
+
|
| 95 |
+
if self._doc is not None:
|
| 96 |
+
self._has_default_attributes = _check_for_default_attributes(self._doc)
|
| 97 |
+
self._add_attribute_defaults = attribute_defaults and self._has_default_attributes
|
| 98 |
+
|
| 99 |
+
def __dealloc__(self):
|
| 100 |
+
xmlschema.xmlSchemaFree(self._c_schema)
|
| 101 |
+
|
| 102 |
+
def __call__(self, etree):
|
| 103 |
+
"""__call__(self, etree)
|
| 104 |
+
|
| 105 |
+
Validate doc using XML Schema.
|
| 106 |
+
|
| 107 |
+
Returns true if document is valid, false if not.
|
| 108 |
+
"""
|
| 109 |
+
cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt
|
| 110 |
+
cdef _Document doc
|
| 111 |
+
cdef _Element root_node
|
| 112 |
+
cdef xmlDoc* c_doc
|
| 113 |
+
cdef int ret
|
| 114 |
+
|
| 115 |
+
assert self._c_schema is not NULL, "Schema instance not initialised"
|
| 116 |
+
doc = _documentOrRaise(etree)
|
| 117 |
+
root_node = _rootNodeOrRaise(etree)
|
| 118 |
+
|
| 119 |
+
valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(self._c_schema)
|
| 120 |
+
if valid_ctxt is NULL:
|
| 121 |
+
raise MemoryError()
|
| 122 |
+
|
| 123 |
+
try:
|
| 124 |
+
if self._add_attribute_defaults:
|
| 125 |
+
xmlschema.xmlSchemaSetValidOptions(
|
| 126 |
+
valid_ctxt, xmlschema.XML_SCHEMA_VAL_VC_I_CREATE)
|
| 127 |
+
|
| 128 |
+
self._error_log.clear()
|
| 129 |
+
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
|
| 130 |
+
xmlschema.xmlSchemaSetValidStructuredErrors(
|
| 131 |
+
valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
|
| 132 |
+
|
| 133 |
+
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
|
| 134 |
+
with nogil:
|
| 135 |
+
ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc)
|
| 136 |
+
_destroyFakeDoc(doc._c_doc, c_doc)
|
| 137 |
+
finally:
|
| 138 |
+
xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt)
|
| 139 |
+
|
| 140 |
+
if ret == -1:
|
| 141 |
+
raise XMLSchemaValidateError(
|
| 142 |
+
"Internal error in XML Schema validation.",
|
| 143 |
+
self._error_log)
|
| 144 |
+
if ret == 0:
|
| 145 |
+
return True
|
| 146 |
+
else:
|
| 147 |
+
return False
|
| 148 |
+
|
| 149 |
+
cdef _ParserSchemaValidationContext _newSaxValidator(
|
| 150 |
+
self, bint add_default_attributes):
|
| 151 |
+
cdef _ParserSchemaValidationContext context
|
| 152 |
+
context = _ParserSchemaValidationContext.__new__(_ParserSchemaValidationContext)
|
| 153 |
+
context._schema = self
|
| 154 |
+
context._add_default_attributes = (self._has_default_attributes and (
|
| 155 |
+
add_default_attributes or self._add_attribute_defaults))
|
| 156 |
+
return context
|
| 157 |
+
|
| 158 |
+
@cython.final
|
| 159 |
+
@cython.internal
|
| 160 |
+
cdef class _ParserSchemaValidationContext:
|
| 161 |
+
cdef XMLSchema _schema
|
| 162 |
+
cdef xmlschema.xmlSchemaValidCtxt* _valid_ctxt
|
| 163 |
+
cdef xmlschema.xmlSchemaSAXPlugStruct* _sax_plug
|
| 164 |
+
cdef bint _add_default_attributes
|
| 165 |
+
def __cinit__(self):
|
| 166 |
+
self._valid_ctxt = NULL
|
| 167 |
+
self._sax_plug = NULL
|
| 168 |
+
self._add_default_attributes = False
|
| 169 |
+
|
| 170 |
+
def __dealloc__(self):
|
| 171 |
+
self.disconnect()
|
| 172 |
+
if self._valid_ctxt:
|
| 173 |
+
xmlschema.xmlSchemaFreeValidCtxt(self._valid_ctxt)
|
| 174 |
+
|
| 175 |
+
cdef _ParserSchemaValidationContext copy(self):
|
| 176 |
+
assert self._schema is not None, "_ParserSchemaValidationContext not initialised"
|
| 177 |
+
return self._schema._newSaxValidator(
|
| 178 |
+
self._add_default_attributes)
|
| 179 |
+
|
| 180 |
+
cdef void inject_default_attributes(self, xmlDoc* c_doc) noexcept:
|
| 181 |
+
# we currently need to insert default attributes manually
|
| 182 |
+
# after parsing, as libxml2 does not support this at parse
|
| 183 |
+
# time
|
| 184 |
+
if self._add_default_attributes:
|
| 185 |
+
with nogil:
|
| 186 |
+
xmlschema.xmlSchemaValidateDoc(self._valid_ctxt, c_doc)
|
| 187 |
+
|
| 188 |
+
cdef int connect(self, xmlparser.xmlParserCtxt* c_ctxt, _BaseErrorLog error_log) except -1:
|
| 189 |
+
if self._valid_ctxt is NULL:
|
| 190 |
+
self._valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(
|
| 191 |
+
self._schema._c_schema)
|
| 192 |
+
if self._valid_ctxt is NULL:
|
| 193 |
+
raise MemoryError()
|
| 194 |
+
if self._add_default_attributes:
|
| 195 |
+
xmlschema.xmlSchemaSetValidOptions(
|
| 196 |
+
self._valid_ctxt, xmlschema.XML_SCHEMA_VAL_VC_I_CREATE)
|
| 197 |
+
if error_log is not None:
|
| 198 |
+
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
|
| 199 |
+
xmlschema.xmlSchemaSetValidStructuredErrors(
|
| 200 |
+
self._valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>error_log)
|
| 201 |
+
self._sax_plug = xmlschema.xmlSchemaSAXPlug(
|
| 202 |
+
self._valid_ctxt, &c_ctxt.sax, &c_ctxt.userData)
|
| 203 |
+
|
| 204 |
+
cdef void disconnect(self) noexcept:
|
| 205 |
+
if self._sax_plug is not NULL:
|
| 206 |
+
xmlschema.xmlSchemaSAXUnplug(self._sax_plug)
|
| 207 |
+
self._sax_plug = NULL
|
| 208 |
+
if self._valid_ctxt is not NULL:
|
| 209 |
+
xmlschema.xmlSchemaSetValidStructuredErrors(
|
| 210 |
+
self._valid_ctxt, NULL, NULL)
|
| 211 |
+
|
| 212 |
+
cdef bint isvalid(self) noexcept:
|
| 213 |
+
if self._valid_ctxt is NULL:
|
| 214 |
+
return 1 # valid
|
| 215 |
+
return xmlschema.xmlSchemaIsValid(self._valid_ctxt)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xslt.pxi
ADDED
|
@@ -0,0 +1,957 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# XSLT
|
| 2 |
+
from lxml.includes cimport xslt
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
cdef class XSLTError(LxmlError):
|
| 6 |
+
"""Base class of all XSLT errors.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
cdef class XSLTParseError(XSLTError):
|
| 10 |
+
"""Error parsing a stylesheet document.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
cdef class XSLTApplyError(XSLTError):
|
| 14 |
+
"""Error running an XSL transformation.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
class XSLTSaveError(XSLTError, SerialisationError):
|
| 18 |
+
"""Error serialising an XSLT result.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
cdef class XSLTExtensionError(XSLTError):
|
| 22 |
+
"""Error registering an XSLT extension.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# version information
|
| 27 |
+
LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
|
| 28 |
+
LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
################################################################################
|
| 32 |
+
# Where do we store what?
|
| 33 |
+
#
|
| 34 |
+
# xsltStylesheet->doc->_private
|
| 35 |
+
# == _XSLTResolverContext for XSL stylesheet
|
| 36 |
+
#
|
| 37 |
+
# xsltTransformContext->_private
|
| 38 |
+
# == _XSLTResolverContext for transformed document
|
| 39 |
+
#
|
| 40 |
+
################################################################################
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
################################################################################
|
| 44 |
+
# XSLT document loaders
|
| 45 |
+
|
| 46 |
+
@cython.final
|
| 47 |
+
@cython.internal
|
| 48 |
+
cdef class _XSLTResolverContext(_ResolverContext):
|
| 49 |
+
cdef xmlDoc* _c_style_doc
|
| 50 |
+
cdef _BaseParser _parser
|
| 51 |
+
|
| 52 |
+
cdef _XSLTResolverContext _copy(self):
|
| 53 |
+
cdef _XSLTResolverContext context
|
| 54 |
+
context = _XSLTResolverContext()
|
| 55 |
+
_initXSLTResolverContext(context, self._parser)
|
| 56 |
+
context._c_style_doc = self._c_style_doc
|
| 57 |
+
return context
|
| 58 |
+
|
| 59 |
+
cdef _initXSLTResolverContext(_XSLTResolverContext context,
|
| 60 |
+
_BaseParser parser):
|
| 61 |
+
_initResolverContext(context, parser.resolvers)
|
| 62 |
+
context._parser = parser
|
| 63 |
+
context._c_style_doc = NULL
|
| 64 |
+
|
| 65 |
+
cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context,
|
| 66 |
+
int parse_options, int* error) with gil:
|
| 67 |
+
# call the Python document loaders
|
| 68 |
+
cdef _XSLTResolverContext context
|
| 69 |
+
cdef _ResolverRegistry resolvers
|
| 70 |
+
cdef _InputDocument doc_ref
|
| 71 |
+
cdef xmlDoc* c_doc
|
| 72 |
+
cdef xmlDoc* c_return_doc = NULL
|
| 73 |
+
|
| 74 |
+
error[0] = 0
|
| 75 |
+
context = <_XSLTResolverContext>c_context
|
| 76 |
+
|
| 77 |
+
# shortcut if we resolve the stylesheet itself
|
| 78 |
+
c_doc = context._c_style_doc
|
| 79 |
+
try:
|
| 80 |
+
if c_doc is not NULL and c_doc.URL is not NULL:
|
| 81 |
+
if tree.xmlStrcmp(c_uri, c_doc.URL) == 0:
|
| 82 |
+
c_return_doc = _copyDoc(c_doc, 1)
|
| 83 |
+
return c_return_doc # 'goto', see 'finally' below
|
| 84 |
+
|
| 85 |
+
# delegate to the Python resolvers
|
| 86 |
+
resolvers = context._resolvers
|
| 87 |
+
if tree.xmlStrncmp(<unsigned char*>'string://__STRING__XSLT__/', c_uri, 26) == 0:
|
| 88 |
+
c_uri += 26
|
| 89 |
+
uri = _decodeFilename(c_uri)
|
| 90 |
+
doc_ref = resolvers.resolve(uri, None, context)
|
| 91 |
+
|
| 92 |
+
if doc_ref is not None:
|
| 93 |
+
if doc_ref._type == PARSER_DATA_STRING:
|
| 94 |
+
c_return_doc = _parseDoc(
|
| 95 |
+
doc_ref._data_bytes, doc_ref._filename, context._parser)
|
| 96 |
+
elif doc_ref._type == PARSER_DATA_FILENAME:
|
| 97 |
+
c_return_doc = _parseDocFromFile(
|
| 98 |
+
doc_ref._filename, context._parser)
|
| 99 |
+
elif doc_ref._type == PARSER_DATA_FILE:
|
| 100 |
+
c_return_doc = _parseDocFromFilelike(
|
| 101 |
+
doc_ref._file, doc_ref._filename, context._parser)
|
| 102 |
+
elif doc_ref._type == PARSER_DATA_EMPTY:
|
| 103 |
+
c_return_doc = _newXMLDoc()
|
| 104 |
+
if c_return_doc is not NULL and c_return_doc.URL is NULL:
|
| 105 |
+
c_return_doc.URL = tree.xmlStrdup(c_uri)
|
| 106 |
+
except:
|
| 107 |
+
error[0] = 1
|
| 108 |
+
context._store_raised()
|
| 109 |
+
finally:
|
| 110 |
+
return c_return_doc # and swallow any further exceptions
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
cdef void _xslt_store_resolver_exception(const_xmlChar* c_uri, void* context,
|
| 114 |
+
xslt.xsltLoadType c_type) noexcept with gil:
|
| 115 |
+
try:
|
| 116 |
+
message = f"Cannot resolve URI {_decodeFilename(c_uri)}"
|
| 117 |
+
if c_type == xslt.XSLT_LOAD_DOCUMENT:
|
| 118 |
+
exception = XSLTApplyError(message)
|
| 119 |
+
else:
|
| 120 |
+
exception = XSLTParseError(message)
|
| 121 |
+
(<_XSLTResolverContext>context)._store_exception(exception)
|
| 122 |
+
except BaseException as e:
|
| 123 |
+
(<_XSLTResolverContext>context)._store_exception(e)
|
| 124 |
+
finally:
|
| 125 |
+
return # and swallow any further exceptions
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
cdef xmlDoc* _xslt_doc_loader(const_xmlChar* c_uri, tree.xmlDict* c_dict,
|
| 129 |
+
int parse_options, void* c_ctxt,
|
| 130 |
+
xslt.xsltLoadType c_type) noexcept nogil:
|
| 131 |
+
# nogil => no Python objects here, may be called without thread context !
|
| 132 |
+
cdef xmlDoc* c_doc
|
| 133 |
+
cdef xmlDoc* result
|
| 134 |
+
cdef void* c_pcontext
|
| 135 |
+
cdef int error = 0
|
| 136 |
+
# find resolver contexts of stylesheet and transformed doc
|
| 137 |
+
if c_type == xslt.XSLT_LOAD_DOCUMENT:
|
| 138 |
+
# transformation time
|
| 139 |
+
c_pcontext = (<xslt.xsltTransformContext*>c_ctxt)._private
|
| 140 |
+
elif c_type == xslt.XSLT_LOAD_STYLESHEET:
|
| 141 |
+
# include/import resolution while parsing
|
| 142 |
+
c_pcontext = (<xslt.xsltStylesheet*>c_ctxt).doc._private
|
| 143 |
+
else:
|
| 144 |
+
c_pcontext = NULL
|
| 145 |
+
|
| 146 |
+
if c_pcontext is NULL:
|
| 147 |
+
# can't call Python without context, fall back to default loader
|
| 148 |
+
return XSLT_DOC_DEFAULT_LOADER(
|
| 149 |
+
c_uri, c_dict, parse_options, c_ctxt, c_type)
|
| 150 |
+
|
| 151 |
+
c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error)
|
| 152 |
+
if c_doc is NULL and not error:
|
| 153 |
+
c_doc = XSLT_DOC_DEFAULT_LOADER(
|
| 154 |
+
c_uri, c_dict, parse_options, c_ctxt, c_type)
|
| 155 |
+
if c_doc is NULL:
|
| 156 |
+
_xslt_store_resolver_exception(c_uri, c_pcontext, c_type)
|
| 157 |
+
|
| 158 |
+
if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET:
|
| 159 |
+
c_doc._private = c_pcontext
|
| 160 |
+
return c_doc
|
| 161 |
+
|
| 162 |
+
cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader
|
| 163 |
+
xslt.xsltSetLoaderFunc(<xslt.xsltDocLoaderFunc>_xslt_doc_loader)
|
| 164 |
+
|
| 165 |
+
################################################################################
|
| 166 |
+
# XSLT file/network access control
|
| 167 |
+
|
| 168 |
+
cdef class XSLTAccessControl:
|
| 169 |
+
"""XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True)
|
| 170 |
+
|
| 171 |
+
Access control for XSLT: reading/writing files, directories and
|
| 172 |
+
network I/O. Access to a type of resource is granted or denied by
|
| 173 |
+
passing any of the following boolean keyword arguments. All of
|
| 174 |
+
them default to True to allow access.
|
| 175 |
+
|
| 176 |
+
- read_file
|
| 177 |
+
- write_file
|
| 178 |
+
- create_dir
|
| 179 |
+
- read_network
|
| 180 |
+
- write_network
|
| 181 |
+
|
| 182 |
+
For convenience, there is also a class member `DENY_ALL` that
|
| 183 |
+
provides an XSLTAccessControl instance that is readily configured
|
| 184 |
+
to deny everything, and a `DENY_WRITE` member that denies all
|
| 185 |
+
write access but allows read access.
|
| 186 |
+
|
| 187 |
+
See `XSLT`.
|
| 188 |
+
"""
|
| 189 |
+
cdef xslt.xsltSecurityPrefs* _prefs
|
| 190 |
+
def __cinit__(self):
|
| 191 |
+
self._prefs = xslt.xsltNewSecurityPrefs()
|
| 192 |
+
if self._prefs is NULL:
|
| 193 |
+
raise MemoryError()
|
| 194 |
+
|
| 195 |
+
def __init__(self, *, bint read_file=True, bint write_file=True, bint create_dir=True,
|
| 196 |
+
bint read_network=True, bint write_network=True):
|
| 197 |
+
self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file)
|
| 198 |
+
self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file)
|
| 199 |
+
self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir)
|
| 200 |
+
self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network)
|
| 201 |
+
self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network)
|
| 202 |
+
|
| 203 |
+
DENY_ALL = XSLTAccessControl(
|
| 204 |
+
read_file=False, write_file=False, create_dir=False,
|
| 205 |
+
read_network=False, write_network=False)
|
| 206 |
+
|
| 207 |
+
DENY_WRITE = XSLTAccessControl(
|
| 208 |
+
read_file=True, write_file=False, create_dir=False,
|
| 209 |
+
read_network=True, write_network=False)
|
| 210 |
+
|
| 211 |
+
def __dealloc__(self):
|
| 212 |
+
if self._prefs is not NULL:
|
| 213 |
+
xslt.xsltFreeSecurityPrefs(self._prefs)
|
| 214 |
+
|
| 215 |
+
@cython.final
|
| 216 |
+
cdef _setAccess(self, xslt.xsltSecurityOption option, bint allow):
|
| 217 |
+
cdef xslt.xsltSecurityCheck function
|
| 218 |
+
if allow:
|
| 219 |
+
function = xslt.xsltSecurityAllow
|
| 220 |
+
else:
|
| 221 |
+
function = xslt.xsltSecurityForbid
|
| 222 |
+
xslt.xsltSetSecurityPrefs(self._prefs, option, function)
|
| 223 |
+
|
| 224 |
+
@cython.final
|
| 225 |
+
cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt) noexcept:
|
| 226 |
+
xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
|
| 227 |
+
|
| 228 |
+
@property
|
| 229 |
+
def options(self):
|
| 230 |
+
"""The access control configuration as a map of options."""
|
| 231 |
+
return {
|
| 232 |
+
'read_file': self._optval(xslt.XSLT_SECPREF_READ_FILE),
|
| 233 |
+
'write_file': self._optval(xslt.XSLT_SECPREF_WRITE_FILE),
|
| 234 |
+
'create_dir': self._optval(xslt.XSLT_SECPREF_CREATE_DIRECTORY),
|
| 235 |
+
'read_network': self._optval(xslt.XSLT_SECPREF_READ_NETWORK),
|
| 236 |
+
'write_network': self._optval(xslt.XSLT_SECPREF_WRITE_NETWORK),
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
@cython.final
|
| 240 |
+
cdef _optval(self, xslt.xsltSecurityOption option):
|
| 241 |
+
cdef xslt.xsltSecurityCheck function
|
| 242 |
+
function = xslt.xsltGetSecurityPrefs(self._prefs, option)
|
| 243 |
+
if function is <xslt.xsltSecurityCheck>xslt.xsltSecurityAllow:
|
| 244 |
+
return True
|
| 245 |
+
elif function is <xslt.xsltSecurityCheck>xslt.xsltSecurityForbid:
|
| 246 |
+
return False
|
| 247 |
+
else:
|
| 248 |
+
return None
|
| 249 |
+
|
| 250 |
+
def __repr__(self):
|
| 251 |
+
items = sorted(self.options.items())
|
| 252 |
+
return "%s(%s)" % (
|
| 253 |
+
python._fqtypename(self).decode('UTF-8').split('.')[-1],
|
| 254 |
+
', '.join(["%s=%r" % item for item in items]))
|
| 255 |
+
|
| 256 |
+
################################################################################
|
| 257 |
+
# XSLT
|
| 258 |
+
|
| 259 |
+
cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf) noexcept:
|
| 260 |
+
if ns_utf is None:
|
| 261 |
+
return 0
|
| 262 |
+
# libxml2 internalises the strings if ctxt has a dict
|
| 263 |
+
return xslt.xsltRegisterExtFunction(
|
| 264 |
+
<xslt.xsltTransformContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf),
|
| 265 |
+
<xslt.xmlXPathFunction>_xpath_function_call)
|
| 266 |
+
|
| 267 |
+
cdef dict EMPTY_DICT = {}
|
| 268 |
+
|
| 269 |
+
@cython.final
|
| 270 |
+
@cython.internal
|
| 271 |
+
cdef class _XSLTContext(_BaseContext):
|
| 272 |
+
cdef xslt.xsltTransformContext* _xsltCtxt
|
| 273 |
+
cdef _ReadOnlyElementProxy _extension_element_proxy
|
| 274 |
+
cdef dict _extension_elements
|
| 275 |
+
def __cinit__(self):
|
| 276 |
+
self._xsltCtxt = NULL
|
| 277 |
+
self._extension_elements = EMPTY_DICT
|
| 278 |
+
|
| 279 |
+
def __init__(self, namespaces, extensions, error_log, enable_regexp,
|
| 280 |
+
build_smart_strings):
|
| 281 |
+
if extensions is not None and extensions:
|
| 282 |
+
for ns_name_tuple, extension in extensions.items():
|
| 283 |
+
if ns_name_tuple[0] is None:
|
| 284 |
+
raise XSLTExtensionError, \
|
| 285 |
+
"extensions must not have empty namespaces"
|
| 286 |
+
if isinstance(extension, XSLTExtension):
|
| 287 |
+
if self._extension_elements is EMPTY_DICT:
|
| 288 |
+
self._extension_elements = {}
|
| 289 |
+
extensions = extensions.copy()
|
| 290 |
+
ns_utf = _utf8(ns_name_tuple[0])
|
| 291 |
+
name_utf = _utf8(ns_name_tuple[1])
|
| 292 |
+
self._extension_elements[(ns_utf, name_utf)] = extension
|
| 293 |
+
del extensions[ns_name_tuple]
|
| 294 |
+
_BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
|
| 295 |
+
build_smart_strings)
|
| 296 |
+
|
| 297 |
+
cdef _BaseContext _copy(self):
|
| 298 |
+
cdef _XSLTContext context
|
| 299 |
+
context = <_XSLTContext>_BaseContext._copy(self)
|
| 300 |
+
context._extension_elements = self._extension_elements
|
| 301 |
+
return context
|
| 302 |
+
|
| 303 |
+
cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
|
| 304 |
+
_Document doc):
|
| 305 |
+
self._xsltCtxt = xsltCtxt
|
| 306 |
+
self._set_xpath_context(xsltCtxt.xpathCtxt)
|
| 307 |
+
self._register_context(doc)
|
| 308 |
+
self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
|
| 309 |
+
self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
|
| 310 |
+
_registerXSLTExtensions(xsltCtxt, self._extension_elements)
|
| 311 |
+
|
| 312 |
+
cdef free_context(self):
|
| 313 |
+
self._cleanup_context()
|
| 314 |
+
self._release_context()
|
| 315 |
+
if self._xsltCtxt is not NULL:
|
| 316 |
+
xslt.xsltFreeTransformContext(self._xsltCtxt)
|
| 317 |
+
self._xsltCtxt = NULL
|
| 318 |
+
self._release_temp_refs()
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
@cython.final
|
| 322 |
+
@cython.internal
|
| 323 |
+
@cython.freelist(8)
|
| 324 |
+
cdef class _XSLTQuotedStringParam:
|
| 325 |
+
"""A wrapper class for literal XSLT string parameters that require
|
| 326 |
+
quote escaping.
|
| 327 |
+
"""
|
| 328 |
+
cdef bytes strval
|
| 329 |
+
def __cinit__(self, strval):
|
| 330 |
+
self.strval = _utf8(strval)
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
@cython.no_gc_clear
|
| 334 |
+
cdef class XSLT:
|
| 335 |
+
"""XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None)
|
| 336 |
+
|
| 337 |
+
Turn an XSL document into an XSLT object.
|
| 338 |
+
|
| 339 |
+
Calling this object on a tree or Element will execute the XSLT::
|
| 340 |
+
|
| 341 |
+
transform = etree.XSLT(xsl_tree)
|
| 342 |
+
result = transform(xml_tree)
|
| 343 |
+
|
| 344 |
+
Keyword arguments of the constructor:
|
| 345 |
+
|
| 346 |
+
- extensions: a dict mapping ``(namespace, name)`` pairs to
|
| 347 |
+
extension functions or extension elements
|
| 348 |
+
- regexp: enable exslt regular expression support in XPath
|
| 349 |
+
(default: True)
|
| 350 |
+
- access_control: access restrictions for network or file
|
| 351 |
+
system (see `XSLTAccessControl`)
|
| 352 |
+
|
| 353 |
+
Keyword arguments of the XSLT call:
|
| 354 |
+
|
| 355 |
+
- profile_run: enable XSLT profiling and make the profile available
|
| 356 |
+
as XML document in ``result.xslt_profile`` (default: False)
|
| 357 |
+
|
| 358 |
+
Other keyword arguments of the call are passed to the stylesheet
|
| 359 |
+
as parameters.
|
| 360 |
+
"""
|
| 361 |
+
cdef _XSLTContext _context
|
| 362 |
+
cdef xslt.xsltStylesheet* _c_style
|
| 363 |
+
cdef _XSLTResolverContext _xslt_resolver_context
|
| 364 |
+
cdef XSLTAccessControl _access_control
|
| 365 |
+
cdef _ErrorLog _error_log
|
| 366 |
+
|
| 367 |
+
def __cinit__(self):
|
| 368 |
+
self._c_style = NULL
|
| 369 |
+
|
| 370 |
+
def __init__(self, xslt_input, *, extensions=None, regexp=True,
|
| 371 |
+
access_control=None):
|
| 372 |
+
cdef xslt.xsltStylesheet* c_style = NULL
|
| 373 |
+
cdef xmlDoc* c_doc
|
| 374 |
+
cdef _Document doc
|
| 375 |
+
cdef _Element root_node
|
| 376 |
+
|
| 377 |
+
doc = _documentOrRaise(xslt_input)
|
| 378 |
+
root_node = _rootNodeOrRaise(xslt_input)
|
| 379 |
+
|
| 380 |
+
# set access control or raise TypeError
|
| 381 |
+
self._access_control = access_control
|
| 382 |
+
|
| 383 |
+
# make a copy of the document as stylesheet parsing modifies it
|
| 384 |
+
c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
|
| 385 |
+
|
| 386 |
+
# make sure we always have a stylesheet URL
|
| 387 |
+
if c_doc.URL is NULL:
|
| 388 |
+
doc_url_utf = python.PyUnicode_AsASCIIString(
|
| 389 |
+
f"string://__STRING__XSLT__/{id(self)}.xslt")
|
| 390 |
+
c_doc.URL = tree.xmlStrdup(_xcstr(doc_url_utf))
|
| 391 |
+
|
| 392 |
+
self._error_log = _ErrorLog()
|
| 393 |
+
self._xslt_resolver_context = _XSLTResolverContext()
|
| 394 |
+
_initXSLTResolverContext(self._xslt_resolver_context, doc._parser)
|
| 395 |
+
# keep a copy in case we need to access the stylesheet via 'document()'
|
| 396 |
+
self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1)
|
| 397 |
+
c_doc._private = <python.PyObject*>self._xslt_resolver_context
|
| 398 |
+
|
| 399 |
+
with self._error_log:
|
| 400 |
+
orig_loader = _register_document_loader()
|
| 401 |
+
c_style = xslt.xsltParseStylesheetDoc(c_doc)
|
| 402 |
+
_reset_document_loader(orig_loader)
|
| 403 |
+
|
| 404 |
+
if c_style is NULL or c_style.errors:
|
| 405 |
+
tree.xmlFreeDoc(c_doc)
|
| 406 |
+
if c_style is not NULL:
|
| 407 |
+
xslt.xsltFreeStylesheet(c_style)
|
| 408 |
+
self._xslt_resolver_context._raise_if_stored()
|
| 409 |
+
# last error seems to be the most accurate here
|
| 410 |
+
if self._error_log.last_error is not None and \
|
| 411 |
+
self._error_log.last_error.message:
|
| 412 |
+
raise XSLTParseError(self._error_log.last_error.message,
|
| 413 |
+
self._error_log)
|
| 414 |
+
else:
|
| 415 |
+
raise XSLTParseError(
|
| 416 |
+
self._error_log._buildExceptionMessage(
|
| 417 |
+
"Cannot parse stylesheet"),
|
| 418 |
+
self._error_log)
|
| 419 |
+
|
| 420 |
+
c_doc._private = NULL # no longer used!
|
| 421 |
+
self._c_style = c_style
|
| 422 |
+
self._context = _XSLTContext(None, extensions, self._error_log, regexp, True)
|
| 423 |
+
|
| 424 |
+
def __dealloc__(self):
|
| 425 |
+
if self._xslt_resolver_context is not None and \
|
| 426 |
+
self._xslt_resolver_context._c_style_doc is not NULL:
|
| 427 |
+
tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc)
|
| 428 |
+
# this cleans up the doc copy as well
|
| 429 |
+
if self._c_style is not NULL:
|
| 430 |
+
xslt.xsltFreeStylesheet(self._c_style)
|
| 431 |
+
|
| 432 |
+
@property
|
| 433 |
+
def error_log(self):
|
| 434 |
+
"""The log of errors and warnings of an XSLT execution."""
|
| 435 |
+
return self._error_log.copy()
|
| 436 |
+
|
| 437 |
+
@staticmethod
|
| 438 |
+
def strparam(strval):
|
| 439 |
+
"""strparam(strval)
|
| 440 |
+
|
| 441 |
+
Mark an XSLT string parameter that requires quote escaping
|
| 442 |
+
before passing it into the transformation. Use it like this::
|
| 443 |
+
|
| 444 |
+
result = transform(doc, some_strval = XSLT.strparam(
|
| 445 |
+
'''it's \"Monty Python's\" ...'''))
|
| 446 |
+
|
| 447 |
+
Escaped string parameters can be reused without restriction.
|
| 448 |
+
"""
|
| 449 |
+
return _XSLTQuotedStringParam(strval)
|
| 450 |
+
|
| 451 |
+
@staticmethod
|
| 452 |
+
def set_global_max_depth(int max_depth):
|
| 453 |
+
"""set_global_max_depth(max_depth)
|
| 454 |
+
|
| 455 |
+
The maximum traversal depth that the stylesheet engine will allow.
|
| 456 |
+
This does not only count the template recursion depth but also takes
|
| 457 |
+
the number of variables/parameters into account. The required setting
|
| 458 |
+
for a run depends on both the stylesheet and the input data.
|
| 459 |
+
|
| 460 |
+
Example::
|
| 461 |
+
|
| 462 |
+
XSLT.set_global_max_depth(5000)
|
| 463 |
+
|
| 464 |
+
Note that this is currently a global, module-wide setting because
|
| 465 |
+
libxslt does not support it at a per-stylesheet level.
|
| 466 |
+
"""
|
| 467 |
+
if max_depth < 0:
|
| 468 |
+
raise ValueError("cannot set a maximum stylesheet traversal depth < 0")
|
| 469 |
+
xslt.xsltMaxDepth = max_depth
|
| 470 |
+
|
| 471 |
+
def tostring(self, _ElementTree result_tree):
|
| 472 |
+
"""tostring(self, result_tree)
|
| 473 |
+
|
| 474 |
+
Save result doc to string based on stylesheet output method.
|
| 475 |
+
|
| 476 |
+
:deprecated: use str(result_tree) instead.
|
| 477 |
+
"""
|
| 478 |
+
return str(result_tree)
|
| 479 |
+
|
| 480 |
+
def __deepcopy__(self, memo):
|
| 481 |
+
return self.__copy__()
|
| 482 |
+
|
| 483 |
+
def __copy__(self):
|
| 484 |
+
return _copyXSLT(self)
|
| 485 |
+
|
| 486 |
+
def __call__(self, _input, *, profile_run=False, **kw):
|
| 487 |
+
"""__call__(self, _input, profile_run=False, **kw)
|
| 488 |
+
|
| 489 |
+
Execute the XSL transformation on a tree or Element.
|
| 490 |
+
|
| 491 |
+
Pass the ``profile_run`` option to get profile information
|
| 492 |
+
about the XSLT. The result of the XSLT will have a property
|
| 493 |
+
xslt_profile that holds an XML tree with profiling data.
|
| 494 |
+
"""
|
| 495 |
+
cdef _XSLTContext context = None
|
| 496 |
+
cdef _XSLTResolverContext resolver_context
|
| 497 |
+
cdef _Document input_doc
|
| 498 |
+
cdef _Element root_node
|
| 499 |
+
cdef _Document result_doc
|
| 500 |
+
cdef _Document profile_doc = None
|
| 501 |
+
cdef xmlDoc* c_profile_doc
|
| 502 |
+
cdef xslt.xsltTransformContext* transform_ctxt
|
| 503 |
+
cdef xmlDoc* c_result = NULL
|
| 504 |
+
cdef xmlDoc* c_doc
|
| 505 |
+
cdef tree.xmlDict* c_dict
|
| 506 |
+
cdef const_char** params = NULL
|
| 507 |
+
|
| 508 |
+
assert self._c_style is not NULL, "XSLT stylesheet not initialised"
|
| 509 |
+
input_doc = _documentOrRaise(_input)
|
| 510 |
+
root_node = _rootNodeOrRaise(_input)
|
| 511 |
+
|
| 512 |
+
c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
|
| 513 |
+
|
| 514 |
+
transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
|
| 515 |
+
if transform_ctxt is NULL:
|
| 516 |
+
_destroyFakeDoc(input_doc._c_doc, c_doc)
|
| 517 |
+
raise MemoryError()
|
| 518 |
+
|
| 519 |
+
# using the stylesheet dict is safer than using a possibly
|
| 520 |
+
# unrelated dict from the current thread. Almost all
|
| 521 |
+
# non-input tag/attr names will come from the stylesheet
|
| 522 |
+
# anyway.
|
| 523 |
+
if transform_ctxt.dict is not NULL:
|
| 524 |
+
xmlparser.xmlDictFree(transform_ctxt.dict)
|
| 525 |
+
if kw:
|
| 526 |
+
# parameter values are stored in the dict
|
| 527 |
+
# => avoid unnecessarily cluttering the global dict
|
| 528 |
+
transform_ctxt.dict = xmlparser.xmlDictCreateSub(self._c_style.doc.dict)
|
| 529 |
+
if transform_ctxt.dict is NULL:
|
| 530 |
+
xslt.xsltFreeTransformContext(transform_ctxt)
|
| 531 |
+
raise MemoryError()
|
| 532 |
+
else:
|
| 533 |
+
transform_ctxt.dict = self._c_style.doc.dict
|
| 534 |
+
xmlparser.xmlDictReference(transform_ctxt.dict)
|
| 535 |
+
|
| 536 |
+
xslt.xsltSetCtxtParseOptions(
|
| 537 |
+
transform_ctxt, input_doc._parser._parse_options)
|
| 538 |
+
|
| 539 |
+
if profile_run:
|
| 540 |
+
transform_ctxt.profile = 1
|
| 541 |
+
|
| 542 |
+
try:
|
| 543 |
+
context = self._context._copy()
|
| 544 |
+
context.register_context(transform_ctxt, input_doc)
|
| 545 |
+
|
| 546 |
+
resolver_context = self._xslt_resolver_context._copy()
|
| 547 |
+
transform_ctxt._private = <python.PyObject*>resolver_context
|
| 548 |
+
|
| 549 |
+
_convert_xslt_parameters(transform_ctxt, kw, ¶ms)
|
| 550 |
+
c_result = self._run_transform(
|
| 551 |
+
c_doc, params, context, transform_ctxt)
|
| 552 |
+
if params is not NULL:
|
| 553 |
+
# deallocate space for parameters
|
| 554 |
+
python.lxml_free(params)
|
| 555 |
+
|
| 556 |
+
if transform_ctxt.state != xslt.XSLT_STATE_OK:
|
| 557 |
+
if c_result is not NULL:
|
| 558 |
+
tree.xmlFreeDoc(c_result)
|
| 559 |
+
c_result = NULL
|
| 560 |
+
|
| 561 |
+
if transform_ctxt.profile:
|
| 562 |
+
c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
|
| 563 |
+
if c_profile_doc is not NULL:
|
| 564 |
+
profile_doc = _documentFactory(
|
| 565 |
+
c_profile_doc, input_doc._parser)
|
| 566 |
+
finally:
|
| 567 |
+
if context is not None:
|
| 568 |
+
context.free_context()
|
| 569 |
+
_destroyFakeDoc(input_doc._c_doc, c_doc)
|
| 570 |
+
|
| 571 |
+
try:
|
| 572 |
+
if resolver_context is not None and resolver_context._has_raised():
|
| 573 |
+
if c_result is not NULL:
|
| 574 |
+
tree.xmlFreeDoc(c_result)
|
| 575 |
+
c_result = NULL
|
| 576 |
+
resolver_context._raise_if_stored()
|
| 577 |
+
|
| 578 |
+
if context._exc._has_raised():
|
| 579 |
+
if c_result is not NULL:
|
| 580 |
+
tree.xmlFreeDoc(c_result)
|
| 581 |
+
c_result = NULL
|
| 582 |
+
context._exc._raise_if_stored()
|
| 583 |
+
|
| 584 |
+
if c_result is NULL:
|
| 585 |
+
# last error seems to be the most accurate here
|
| 586 |
+
error = self._error_log.last_error
|
| 587 |
+
if error is not None and error.message:
|
| 588 |
+
if error.line > 0:
|
| 589 |
+
message = f"{error.message}, line {error.line}"
|
| 590 |
+
else:
|
| 591 |
+
message = error.message
|
| 592 |
+
elif error is not None and error.line > 0:
|
| 593 |
+
message = f"Error applying stylesheet, line {error.line}"
|
| 594 |
+
else:
|
| 595 |
+
message = "Error applying stylesheet"
|
| 596 |
+
raise XSLTApplyError(message, self._error_log)
|
| 597 |
+
finally:
|
| 598 |
+
if resolver_context is not None:
|
| 599 |
+
resolver_context.clear()
|
| 600 |
+
|
| 601 |
+
result_doc = _documentFactory(c_result, input_doc._parser)
|
| 602 |
+
|
| 603 |
+
c_dict = c_result.dict
|
| 604 |
+
xmlparser.xmlDictReference(c_dict)
|
| 605 |
+
__GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
|
| 606 |
+
if c_dict is not c_result.dict or \
|
| 607 |
+
self._c_style.doc.dict is not c_result.dict or \
|
| 608 |
+
input_doc._c_doc.dict is not c_result.dict:
|
| 609 |
+
with nogil:
|
| 610 |
+
if c_dict is not c_result.dict:
|
| 611 |
+
fixThreadDictNames(<xmlNode*>c_result,
|
| 612 |
+
c_dict, c_result.dict)
|
| 613 |
+
if self._c_style.doc.dict is not c_result.dict:
|
| 614 |
+
fixThreadDictNames(<xmlNode*>c_result,
|
| 615 |
+
self._c_style.doc.dict, c_result.dict)
|
| 616 |
+
if input_doc._c_doc.dict is not c_result.dict:
|
| 617 |
+
fixThreadDictNames(<xmlNode*>c_result,
|
| 618 |
+
input_doc._c_doc.dict, c_result.dict)
|
| 619 |
+
xmlparser.xmlDictFree(c_dict)
|
| 620 |
+
|
| 621 |
+
return _xsltResultTreeFactory(result_doc, self, profile_doc)
|
| 622 |
+
|
| 623 |
+
cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc,
|
| 624 |
+
const_char** params, _XSLTContext context,
|
| 625 |
+
xslt.xsltTransformContext* transform_ctxt):
|
| 626 |
+
cdef xmlDoc* c_result
|
| 627 |
+
xslt.xsltSetTransformErrorFunc(transform_ctxt, <void*>self._error_log,
|
| 628 |
+
<xmlerror.xmlGenericErrorFunc>_receiveXSLTError)
|
| 629 |
+
if self._access_control is not None:
|
| 630 |
+
self._access_control._register_in_context(transform_ctxt)
|
| 631 |
+
with self._error_log, nogil:
|
| 632 |
+
orig_loader = _register_document_loader()
|
| 633 |
+
c_result = xslt.xsltApplyStylesheetUser(
|
| 634 |
+
self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
|
| 635 |
+
_reset_document_loader(orig_loader)
|
| 636 |
+
return c_result
|
| 637 |
+
|
| 638 |
+
|
| 639 |
+
cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt,
|
| 640 |
+
dict parameters, const_char*** params_ptr):
|
| 641 |
+
cdef Py_ssize_t i, parameter_count
|
| 642 |
+
cdef const_char** params
|
| 643 |
+
cdef tree.xmlDict* c_dict = transform_ctxt.dict
|
| 644 |
+
params_ptr[0] = NULL
|
| 645 |
+
parameter_count = len(parameters)
|
| 646 |
+
if parameter_count == 0:
|
| 647 |
+
return
|
| 648 |
+
# allocate space for parameters
|
| 649 |
+
# * 2 as we want an entry for both key and value,
|
| 650 |
+
# and + 1 as array is NULL terminated
|
| 651 |
+
params = <const_char**>python.lxml_malloc(parameter_count * 2 + 1, sizeof(const_char*))
|
| 652 |
+
if not params:
|
| 653 |
+
raise MemoryError()
|
| 654 |
+
try:
|
| 655 |
+
i = 0
|
| 656 |
+
for key, value in parameters.iteritems():
|
| 657 |
+
k = _utf8(key)
|
| 658 |
+
if isinstance(value, _XSLTQuotedStringParam):
|
| 659 |
+
v = (<_XSLTQuotedStringParam>value).strval
|
| 660 |
+
xslt.xsltQuoteOneUserParam(
|
| 661 |
+
transform_ctxt, _xcstr(k), _xcstr(v))
|
| 662 |
+
else:
|
| 663 |
+
if isinstance(value, XPath):
|
| 664 |
+
v = (<XPath>value)._path
|
| 665 |
+
else:
|
| 666 |
+
v = _utf8(value)
|
| 667 |
+
|
| 668 |
+
c_len = len(k)
|
| 669 |
+
if c_len > limits.INT_MAX:
|
| 670 |
+
raise ValueError("Parameter name too long")
|
| 671 |
+
params[i] = <const_char*> tree.xmlDictLookup(c_dict, _xcstr(k), <int> c_len)
|
| 672 |
+
i += 1
|
| 673 |
+
c_len = len(v)
|
| 674 |
+
if c_len > limits.INT_MAX:
|
| 675 |
+
raise ValueError("Parameter value too long")
|
| 676 |
+
params[i] = <const_char*> tree.xmlDictLookup(c_dict, _xcstr(v), <int> c_len)
|
| 677 |
+
i += 1
|
| 678 |
+
except:
|
| 679 |
+
python.lxml_free(params)
|
| 680 |
+
raise
|
| 681 |
+
params[i] = NULL
|
| 682 |
+
params_ptr[0] = params
|
| 683 |
+
|
| 684 |
+
cdef XSLT _copyXSLT(XSLT stylesheet):
|
| 685 |
+
cdef XSLT new_xslt
|
| 686 |
+
cdef xmlDoc* c_doc
|
| 687 |
+
assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised"
|
| 688 |
+
new_xslt = XSLT.__new__(XSLT)
|
| 689 |
+
new_xslt._access_control = stylesheet._access_control
|
| 690 |
+
new_xslt._error_log = _ErrorLog()
|
| 691 |
+
new_xslt._context = stylesheet._context._copy()
|
| 692 |
+
|
| 693 |
+
new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy()
|
| 694 |
+
new_xslt._xslt_resolver_context._c_style_doc = _copyDoc(
|
| 695 |
+
stylesheet._xslt_resolver_context._c_style_doc, 1)
|
| 696 |
+
|
| 697 |
+
c_doc = _copyDoc(stylesheet._c_style.doc, 1)
|
| 698 |
+
new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc)
|
| 699 |
+
if new_xslt._c_style is NULL:
|
| 700 |
+
tree.xmlFreeDoc(c_doc)
|
| 701 |
+
raise MemoryError()
|
| 702 |
+
|
| 703 |
+
return new_xslt
|
| 704 |
+
|
| 705 |
+
@cython.final
|
| 706 |
+
cdef class _XSLTResultTree(_ElementTree):
|
| 707 |
+
"""The result of an XSLT evaluation.
|
| 708 |
+
|
| 709 |
+
Use ``str()`` or ``bytes()`` (or ``unicode()`` in Python 2.x) to serialise to a string,
|
| 710 |
+
and the ``.write_output()`` method to write serialise to a file.
|
| 711 |
+
"""
|
| 712 |
+
cdef XSLT _xslt
|
| 713 |
+
cdef _Document _profile
|
| 714 |
+
cdef xmlChar* _buffer
|
| 715 |
+
cdef Py_ssize_t _buffer_len
|
| 716 |
+
cdef Py_ssize_t _buffer_refcnt
|
| 717 |
+
|
| 718 |
+
def write_output(self, file, *, compression=0):
|
| 719 |
+
"""write_output(self, file, *, compression=0)
|
| 720 |
+
|
| 721 |
+
Serialise the XSLT output to a file or file-like object.
|
| 722 |
+
|
| 723 |
+
As opposed to the generic ``.write()`` method, ``.write_output()`` serialises
|
| 724 |
+
the result as defined by the ``<xsl:output>`` tag.
|
| 725 |
+
"""
|
| 726 |
+
cdef _FilelikeWriter writer = None
|
| 727 |
+
cdef _Document doc
|
| 728 |
+
cdef int r, rclose, c_compression
|
| 729 |
+
cdef const_xmlChar* c_encoding = NULL
|
| 730 |
+
cdef tree.xmlOutputBuffer* c_buffer
|
| 731 |
+
|
| 732 |
+
if self._context_node is not None:
|
| 733 |
+
doc = self._context_node._doc
|
| 734 |
+
else:
|
| 735 |
+
doc = None
|
| 736 |
+
if doc is None:
|
| 737 |
+
doc = self._doc
|
| 738 |
+
if doc is None:
|
| 739 |
+
raise XSLTSaveError("No document to serialise")
|
| 740 |
+
c_compression = compression or 0
|
| 741 |
+
xslt.LXML_GET_XSLT_ENCODING(c_encoding, self._xslt._c_style)
|
| 742 |
+
writer = _create_output_buffer(file, <const_char*>c_encoding, c_compression, &c_buffer, close=False)
|
| 743 |
+
if writer is None:
|
| 744 |
+
with nogil:
|
| 745 |
+
r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
|
| 746 |
+
rclose = tree.xmlOutputBufferClose(c_buffer)
|
| 747 |
+
else:
|
| 748 |
+
r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
|
| 749 |
+
rclose = tree.xmlOutputBufferClose(c_buffer)
|
| 750 |
+
if writer is not None:
|
| 751 |
+
writer._exc_context._raise_if_stored()
|
| 752 |
+
if r < 0 or rclose == -1:
|
| 753 |
+
python.PyErr_SetFromErrno(IOError) # raises IOError
|
| 754 |
+
|
| 755 |
+
cdef _saveToStringAndSize(self, xmlChar** s, int* l):
|
| 756 |
+
cdef _Document doc
|
| 757 |
+
cdef int r
|
| 758 |
+
if self._context_node is not None:
|
| 759 |
+
doc = self._context_node._doc
|
| 760 |
+
else:
|
| 761 |
+
doc = None
|
| 762 |
+
if doc is None:
|
| 763 |
+
doc = self._doc
|
| 764 |
+
if doc is None:
|
| 765 |
+
s[0] = NULL
|
| 766 |
+
return
|
| 767 |
+
with nogil:
|
| 768 |
+
r = xslt.xsltSaveResultToString(s, l, doc._c_doc,
|
| 769 |
+
self._xslt._c_style)
|
| 770 |
+
if r == -1:
|
| 771 |
+
raise MemoryError()
|
| 772 |
+
|
| 773 |
+
def __str__(self):
|
| 774 |
+
cdef xmlChar* encoding
|
| 775 |
+
cdef xmlChar* s = NULL
|
| 776 |
+
cdef int l = 0
|
| 777 |
+
self._saveToStringAndSize(&s, &l)
|
| 778 |
+
if s is NULL:
|
| 779 |
+
return ''
|
| 780 |
+
encoding = self._xslt._c_style.encoding
|
| 781 |
+
try:
|
| 782 |
+
if encoding is NULL:
|
| 783 |
+
result = s[:l].decode('UTF-8')
|
| 784 |
+
else:
|
| 785 |
+
result = s[:l].decode(encoding)
|
| 786 |
+
finally:
|
| 787 |
+
tree.xmlFree(s)
|
| 788 |
+
return _stripEncodingDeclaration(result)
|
| 789 |
+
|
| 790 |
+
def __getbuffer__(self, Py_buffer* buffer, int flags):
|
| 791 |
+
cdef int l = 0
|
| 792 |
+
if buffer is NULL:
|
| 793 |
+
return
|
| 794 |
+
if self._buffer is NULL or flags & python.PyBUF_WRITABLE:
|
| 795 |
+
self._saveToStringAndSize(<xmlChar**>&buffer.buf, &l)
|
| 796 |
+
buffer.len = l
|
| 797 |
+
if self._buffer is NULL and not flags & python.PyBUF_WRITABLE:
|
| 798 |
+
self._buffer = <xmlChar*>buffer.buf
|
| 799 |
+
self._buffer_len = l
|
| 800 |
+
self._buffer_refcnt = 1
|
| 801 |
+
else:
|
| 802 |
+
buffer.buf = self._buffer
|
| 803 |
+
buffer.len = self._buffer_len
|
| 804 |
+
self._buffer_refcnt += 1
|
| 805 |
+
if flags & python.PyBUF_WRITABLE:
|
| 806 |
+
buffer.readonly = 0
|
| 807 |
+
else:
|
| 808 |
+
buffer.readonly = 1
|
| 809 |
+
if flags & python.PyBUF_FORMAT:
|
| 810 |
+
buffer.format = "B"
|
| 811 |
+
else:
|
| 812 |
+
buffer.format = NULL
|
| 813 |
+
buffer.ndim = 0
|
| 814 |
+
buffer.shape = NULL
|
| 815 |
+
buffer.strides = NULL
|
| 816 |
+
buffer.suboffsets = NULL
|
| 817 |
+
buffer.itemsize = 1
|
| 818 |
+
buffer.internal = NULL
|
| 819 |
+
if buffer.obj is not self: # set by Cython?
|
| 820 |
+
buffer.obj = self
|
| 821 |
+
|
| 822 |
+
def __releasebuffer__(self, Py_buffer* buffer):
|
| 823 |
+
if buffer is NULL:
|
| 824 |
+
return
|
| 825 |
+
if <xmlChar*>buffer.buf is self._buffer:
|
| 826 |
+
self._buffer_refcnt -= 1
|
| 827 |
+
if self._buffer_refcnt == 0:
|
| 828 |
+
tree.xmlFree(<char*>self._buffer)
|
| 829 |
+
self._buffer = NULL
|
| 830 |
+
else:
|
| 831 |
+
tree.xmlFree(<char*>buffer.buf)
|
| 832 |
+
buffer.buf = NULL
|
| 833 |
+
|
| 834 |
+
property xslt_profile:
|
| 835 |
+
"""Return an ElementTree with profiling data for the stylesheet run.
|
| 836 |
+
"""
|
| 837 |
+
def __get__(self):
|
| 838 |
+
cdef object root
|
| 839 |
+
if self._profile is None:
|
| 840 |
+
return None
|
| 841 |
+
root = self._profile.getroot()
|
| 842 |
+
if root is None:
|
| 843 |
+
return None
|
| 844 |
+
return ElementTree(root)
|
| 845 |
+
|
| 846 |
+
def __del__(self):
|
| 847 |
+
self._profile = None
|
| 848 |
+
|
| 849 |
+
cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile):
|
| 850 |
+
cdef _XSLTResultTree result
|
| 851 |
+
result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree)
|
| 852 |
+
result._xslt = xslt
|
| 853 |
+
result._profile = profile
|
| 854 |
+
return result
|
| 855 |
+
|
| 856 |
+
# functions like "output" and "write" are a potential security risk, but we
|
| 857 |
+
# rely on the user to configure XSLTAccessControl as needed
|
| 858 |
+
xslt.xsltRegisterAllExtras()
|
| 859 |
+
|
| 860 |
+
# enable EXSLT support for XSLT
|
| 861 |
+
xslt.exsltRegisterAll()
|
| 862 |
+
|
| 863 |
+
|
| 864 |
+
################################################################################
|
| 865 |
+
# XSLT PI support
|
| 866 |
+
|
| 867 |
+
cdef object _RE_PI_HREF = re.compile(r'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")')
|
| 868 |
+
cdef object _FIND_PI_HREF = _RE_PI_HREF.findall
|
| 869 |
+
cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub
|
| 870 |
+
cdef XPath __findStylesheetByID = None
|
| 871 |
+
|
| 872 |
+
cdef _findStylesheetByID(_Document doc, id):
|
| 873 |
+
global __findStylesheetByID
|
| 874 |
+
if __findStylesheetByID is None:
|
| 875 |
+
__findStylesheetByID = XPath(
|
| 876 |
+
"//xsl:stylesheet[@xml:id = $id]",
|
| 877 |
+
namespaces={"xsl" : "http://www.w3.org/1999/XSL/Transform"})
|
| 878 |
+
return __findStylesheetByID(doc, id=id)
|
| 879 |
+
|
| 880 |
+
cdef class _XSLTProcessingInstruction(PIBase):
|
| 881 |
+
def parseXSL(self, parser=None):
|
| 882 |
+
"""parseXSL(self, parser=None)
|
| 883 |
+
|
| 884 |
+
Try to parse the stylesheet referenced by this PI and return
|
| 885 |
+
an ElementTree for it. If the stylesheet is embedded in the
|
| 886 |
+
same document (referenced via xml:id), find and return an
|
| 887 |
+
ElementTree for the stylesheet Element.
|
| 888 |
+
|
| 889 |
+
The optional ``parser`` keyword argument can be passed to specify the
|
| 890 |
+
parser used to read from external stylesheet URLs.
|
| 891 |
+
"""
|
| 892 |
+
cdef _Document result_doc
|
| 893 |
+
cdef _Element result_node
|
| 894 |
+
cdef bytes href_utf
|
| 895 |
+
cdef const_xmlChar* c_href
|
| 896 |
+
cdef xmlAttr* c_attr
|
| 897 |
+
_assertValidNode(self)
|
| 898 |
+
if self._c_node.content is NULL:
|
| 899 |
+
raise ValueError, "PI lacks content"
|
| 900 |
+
hrefs = _FIND_PI_HREF(' ' + (<unsigned char*>self._c_node.content).decode('UTF-8'))
|
| 901 |
+
if len(hrefs) != 1:
|
| 902 |
+
raise ValueError, "malformed PI attributes"
|
| 903 |
+
hrefs = hrefs[0]
|
| 904 |
+
href_utf = utf8(hrefs[0] or hrefs[1])
|
| 905 |
+
c_href = _xcstr(href_utf)
|
| 906 |
+
|
| 907 |
+
if c_href[0] != c'#':
|
| 908 |
+
# normal URL, try to parse from it
|
| 909 |
+
c_href = tree.xmlBuildURI(
|
| 910 |
+
c_href,
|
| 911 |
+
tree.xmlNodeGetBase(self._c_node.doc, self._c_node))
|
| 912 |
+
if c_href is not NULL:
|
| 913 |
+
try:
|
| 914 |
+
href_utf = <unsigned char*>c_href
|
| 915 |
+
finally:
|
| 916 |
+
tree.xmlFree(<char*>c_href)
|
| 917 |
+
result_doc = _parseDocumentFromURL(href_utf, parser)
|
| 918 |
+
return _elementTreeFactory(result_doc, None)
|
| 919 |
+
|
| 920 |
+
# ID reference to embedded stylesheet
|
| 921 |
+
# try XML:ID lookup
|
| 922 |
+
_assertValidDoc(self._doc)
|
| 923 |
+
c_href += 1 # skip leading '#'
|
| 924 |
+
c_attr = tree.xmlGetID(self._c_node.doc, c_href)
|
| 925 |
+
if c_attr is not NULL and c_attr.doc is self._c_node.doc:
|
| 926 |
+
result_node = _elementFactory(self._doc, c_attr.parent)
|
| 927 |
+
return _elementTreeFactory(result_node._doc, result_node)
|
| 928 |
+
|
| 929 |
+
# try XPath search
|
| 930 |
+
root = _findStylesheetByID(self._doc, funicode(c_href))
|
| 931 |
+
if not root:
|
| 932 |
+
raise ValueError, "reference to non-existing embedded stylesheet"
|
| 933 |
+
elif len(root) > 1:
|
| 934 |
+
raise ValueError, "ambiguous reference to embedded stylesheet"
|
| 935 |
+
result_node = root[0]
|
| 936 |
+
return _elementTreeFactory(result_node._doc, result_node)
|
| 937 |
+
|
| 938 |
+
def set(self, key, value):
|
| 939 |
+
"""set(self, key, value)
|
| 940 |
+
|
| 941 |
+
Supports setting the 'href' pseudo-attribute in the text of
|
| 942 |
+
the processing instruction.
|
| 943 |
+
"""
|
| 944 |
+
if key != "href":
|
| 945 |
+
raise AttributeError, \
|
| 946 |
+
"only setting the 'href' attribute is supported on XSLT-PIs"
|
| 947 |
+
if value is None:
|
| 948 |
+
attrib = ""
|
| 949 |
+
elif '"' in value or '>' in value:
|
| 950 |
+
raise ValueError, "Invalid URL, must not contain '\"' or '>'"
|
| 951 |
+
else:
|
| 952 |
+
attrib = f' href="{value}"'
|
| 953 |
+
text = ' ' + self.text
|
| 954 |
+
if _FIND_PI_HREF(text):
|
| 955 |
+
self.text = _REPLACE_PI_HREF(attrib, text)
|
| 956 |
+
else:
|
| 957 |
+
self.text = text + attrib
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/LICENSE
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Copyright (c) 2010-2020 Benjamin Peterson
|
| 2 |
+
|
| 3 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
| 4 |
+
this software and associated documentation files (the "Software"), to deal in
|
| 5 |
+
the Software without restriction, including without limitation the rights to
|
| 6 |
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
| 7 |
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
| 8 |
+
subject to the following conditions:
|
| 9 |
+
|
| 10 |
+
The above copyright notice and this permission notice shall be included in all
|
| 11 |
+
copies or substantial portions of the Software.
|
| 12 |
+
|
| 13 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 14 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
| 15 |
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
| 16 |
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
| 17 |
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
| 18 |
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/METADATA
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.1
|
| 2 |
+
Name: six
|
| 3 |
+
Version: 1.16.0
|
| 4 |
+
Summary: Python 2 and 3 compatibility utilities
|
| 5 |
+
Home-page: https://github.com/benjaminp/six
|
| 6 |
+
Author: Benjamin Peterson
|
| 7 |
+
Author-email: benjamin@python.org
|
| 8 |
+
License: MIT
|
| 9 |
+
Platform: UNKNOWN
|
| 10 |
+
Classifier: Development Status :: 5 - Production/Stable
|
| 11 |
+
Classifier: Programming Language :: Python :: 2
|
| 12 |
+
Classifier: Programming Language :: Python :: 3
|
| 13 |
+
Classifier: Intended Audience :: Developers
|
| 14 |
+
Classifier: License :: OSI Approved :: MIT License
|
| 15 |
+
Classifier: Topic :: Software Development :: Libraries
|
| 16 |
+
Classifier: Topic :: Utilities
|
| 17 |
+
Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*
|
| 18 |
+
|
| 19 |
+
.. image:: https://img.shields.io/pypi/v/six.svg
|
| 20 |
+
:target: https://pypi.org/project/six/
|
| 21 |
+
:alt: six on PyPI
|
| 22 |
+
|
| 23 |
+
.. image:: https://travis-ci.org/benjaminp/six.svg?branch=master
|
| 24 |
+
:target: https://travis-ci.org/benjaminp/six
|
| 25 |
+
:alt: six on TravisCI
|
| 26 |
+
|
| 27 |
+
.. image:: https://readthedocs.org/projects/six/badge/?version=latest
|
| 28 |
+
:target: https://six.readthedocs.io/
|
| 29 |
+
:alt: six's documentation on Read the Docs
|
| 30 |
+
|
| 31 |
+
.. image:: https://img.shields.io/badge/license-MIT-green.svg
|
| 32 |
+
:target: https://github.com/benjaminp/six/blob/master/LICENSE
|
| 33 |
+
:alt: MIT License badge
|
| 34 |
+
|
| 35 |
+
Six is a Python 2 and 3 compatibility library. It provides utility functions
|
| 36 |
+
for smoothing over the differences between the Python versions with the goal of
|
| 37 |
+
writing Python code that is compatible on both Python versions. See the
|
| 38 |
+
documentation for more information on what is provided.
|
| 39 |
+
|
| 40 |
+
Six supports Python 2.7 and 3.3+. It is contained in only one Python
|
| 41 |
+
file, so it can be easily copied into your project. (The copyright and license
|
| 42 |
+
notice must be retained.)
|
| 43 |
+
|
| 44 |
+
Online documentation is at https://six.readthedocs.io/.
|
| 45 |
+
|
| 46 |
+
Bugs can be reported to https://github.com/benjaminp/six. The code can also
|
| 47 |
+
be found there.
|
| 48 |
+
|
| 49 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/RECORD
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/six.cpython-312.pyc,,
|
| 2 |
+
six-1.16.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 3 |
+
six-1.16.0.dist-info/LICENSE,sha256=i7hQxWWqOJ_cFvOkaWWtI9gq3_YPI5P8J2K2MYXo5sk,1066
|
| 4 |
+
six-1.16.0.dist-info/METADATA,sha256=VQcGIFCAEmfZcl77E5riPCN4v2TIsc_qtacnjxKHJoI,1795
|
| 5 |
+
six-1.16.0.dist-info/RECORD,,
|
| 6 |
+
six-1.16.0.dist-info/WHEEL,sha256=Z-nyYpwrcSqxfdux5Mbn_DQ525iP7J2DG3JgGvOYyTQ,110
|
| 7 |
+
six-1.16.0.dist-info/top_level.txt,sha256=_iVH_iYEtEXnD8nYGQYpYFUvkUW9sEO1GYbkeKSAais,4
|
| 8 |
+
six.py,sha256=TOOfQi7nFGfMrIvtdr6wX4wyHH8M7aknmuLfo2cBBrM,34549
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: bdist_wheel (0.36.2)
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py2-none-any
|
| 5 |
+
Tag: py3-none-any
|
| 6 |
+
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
six
|