BryanW commited on
Commit
b4be5f5
·
verified ·
1 Parent(s): 2c1bace

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/INSTALLER +1 -0
  2. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/LICENSE.txt +28 -0
  3. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/METADATA +74 -0
  4. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/RECORD +38 -0
  5. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/WHEEL +4 -0
  6. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/__init__.py +69 -0
  7. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/_version.py +16 -0
  8. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/archive.py +73 -0
  9. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/asyn.py +1096 -0
  10. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/callbacks.py +324 -0
  11. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/compression.py +175 -0
  12. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/config.py +131 -0
  13. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/conftest.py +55 -0
  14. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/core.py +738 -0
  15. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/exceptions.py +18 -0
  16. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/fuse.py +324 -0
  17. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/generic.py +408 -0
  18. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/mapping.py +251 -0
  19. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/parquet.py +541 -0
  20. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/registry.py +305 -0
  21. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/transaction.py +90 -0
  22. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/utils.py +740 -0
  23. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/INSTALLER +1 -0
  24. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/METADATA +347 -0
  25. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/RECORD +336 -0
  26. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/REQUESTED +0 -0
  27. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/WHEEL +5 -0
  28. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/entry_points.txt +7 -0
  29. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/top_level.txt +1 -0
  30. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/debug.pxi +36 -0
  31. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/docloader.pxi +178 -0
  32. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/dtd.pxi +479 -0
  33. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/iterparse.pxi +438 -0
  34. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/lxml.etree_api.h +204 -0
  35. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/nsclasses.pxi +281 -0
  36. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/objectify.pyx +2149 -0
  37. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/parser.pxi +2071 -0
  38. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/parsertarget.pxi +180 -0
  39. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi +565 -0
  40. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi +875 -0
  41. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/usedoctest.py +13 -0
  42. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xmlid.pxi +179 -0
  43. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xmlschema.pxi +215 -0
  44. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xslt.pxi +957 -0
  45. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/INSTALLER +1 -0
  46. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/LICENSE +18 -0
  47. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/METADATA +49 -0
  48. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/RECORD +8 -0
  49. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/WHEEL +6 -0
  50. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/top_level.txt +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/LICENSE.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2014 Pallets
2
+
3
+ Redistribution and use in source and binary forms, with or without
4
+ modification, are permitted provided that the following conditions are
5
+ met:
6
+
7
+ 1. Redistributions of source code must retain the above copyright
8
+ notice, this list of conditions and the following disclaimer.
9
+
10
+ 2. Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+
14
+ 3. Neither the name of the copyright holder nor the names of its
15
+ contributors may be used to endorse or promote products derived from
16
+ this software without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21
+ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/METADATA ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.3
2
+ Name: click
3
+ Version: 8.1.8
4
+ Summary: Composable command line interface toolkit
5
+ Maintainer-email: Pallets <contact@palletsprojects.com>
6
+ Requires-Python: >=3.7
7
+ Description-Content-Type: text/markdown
8
+ Classifier: Development Status :: 5 - Production/Stable
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: BSD License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python
13
+ Classifier: Typing :: Typed
14
+ Requires-Dist: colorama; platform_system == 'Windows'
15
+ Requires-Dist: importlib-metadata; python_version < '3.8'
16
+ Project-URL: Changes, https://click.palletsprojects.com/changes/
17
+ Project-URL: Chat, https://discord.gg/pallets
18
+ Project-URL: Documentation, https://click.palletsprojects.com/
19
+ Project-URL: Donate, https://palletsprojects.com/donate
20
+ Project-URL: Source, https://github.com/pallets/click/
21
+
22
+ # $ click_
23
+
24
+ Click is a Python package for creating beautiful command line interfaces
25
+ in a composable way with as little code as necessary. It's the "Command
26
+ Line Interface Creation Kit". It's highly configurable but comes with
27
+ sensible defaults out of the box.
28
+
29
+ It aims to make the process of writing command line tools quick and fun
30
+ while also preventing any frustration caused by the inability to
31
+ implement an intended CLI API.
32
+
33
+ Click in three points:
34
+
35
+ - Arbitrary nesting of commands
36
+ - Automatic help page generation
37
+ - Supports lazy loading of subcommands at runtime
38
+
39
+
40
+ ## A Simple Example
41
+
42
+ ```python
43
+ import click
44
+
45
+ @click.command()
46
+ @click.option("--count", default=1, help="Number of greetings.")
47
+ @click.option("--name", prompt="Your name", help="The person to greet.")
48
+ def hello(count, name):
49
+ """Simple program that greets NAME for a total of COUNT times."""
50
+ for _ in range(count):
51
+ click.echo(f"Hello, {name}!")
52
+
53
+ if __name__ == '__main__':
54
+ hello()
55
+ ```
56
+
57
+ ```
58
+ $ python hello.py --count=3
59
+ Your name: Click
60
+ Hello, Click!
61
+ Hello, Click!
62
+ Hello, Click!
63
+ ```
64
+
65
+
66
+ ## Donate
67
+
68
+ The Pallets organization develops and supports Click and other popular
69
+ packages. In order to grow the community of contributors and users, and
70
+ allow the maintainers to devote more time to the projects, [please
71
+ donate today][].
72
+
73
+ [please donate today]: https://palletsprojects.com/donate
74
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/RECORD ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ click-8.1.8.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ click-8.1.8.dist-info/LICENSE.txt,sha256=morRBqOU6FO_4h9C9OctWSgZoigF2ZG18ydQKSkrZY0,1475
3
+ click-8.1.8.dist-info/METADATA,sha256=WJtQ6uGS2ybLfvUE4vC0XIhIBr4yFGwjrMBR2fiCQ-Q,2263
4
+ click-8.1.8.dist-info/RECORD,,
5
+ click-8.1.8.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
6
+ click/__init__.py,sha256=j1DJeCbga4ribkv5uyvIAzI0oFN13fW9mevDKShFelo,3188
7
+ click/__pycache__/__init__.cpython-312.pyc,,
8
+ click/__pycache__/_compat.cpython-312.pyc,,
9
+ click/__pycache__/_termui_impl.cpython-312.pyc,,
10
+ click/__pycache__/_textwrap.cpython-312.pyc,,
11
+ click/__pycache__/_winconsole.cpython-312.pyc,,
12
+ click/__pycache__/core.cpython-312.pyc,,
13
+ click/__pycache__/decorators.cpython-312.pyc,,
14
+ click/__pycache__/exceptions.cpython-312.pyc,,
15
+ click/__pycache__/formatting.cpython-312.pyc,,
16
+ click/__pycache__/globals.cpython-312.pyc,,
17
+ click/__pycache__/parser.cpython-312.pyc,,
18
+ click/__pycache__/shell_completion.cpython-312.pyc,,
19
+ click/__pycache__/termui.cpython-312.pyc,,
20
+ click/__pycache__/testing.cpython-312.pyc,,
21
+ click/__pycache__/types.cpython-312.pyc,,
22
+ click/__pycache__/utils.cpython-312.pyc,,
23
+ click/_compat.py,sha256=IGKh_J5QdfKELitnRfTGHneejWxoCw_NX9tfMbdcg3w,18730
24
+ click/_termui_impl.py,sha256=a5z7I9gOFeMmu7Gb6_RPyQ8GPuVP1EeblixcWSPSQPk,24783
25
+ click/_textwrap.py,sha256=10fQ64OcBUMuK7mFvh8363_uoOxPlRItZBmKzRJDgoY,1353
26
+ click/_winconsole.py,sha256=5ju3jQkcZD0W27WEMGqmEP4y_crUVzPCqsX_FYb7BO0,7860
27
+ click/core.py,sha256=Q1nEVdctZwvIPOlt4vfHko0TYnHCeE40UEEul8Wpyvs,114748
28
+ click/decorators.py,sha256=7t6F-QWowtLh6F_6l-4YV4Y4yNTcqFQEu9i37zIz68s,18925
29
+ click/exceptions.py,sha256=V7zDT6emqJ8iNl0kF1P5kpFmLMWQ1T1L7aNNKM4YR0w,9600
30
+ click/formatting.py,sha256=Frf0-5W33-loyY_i9qrwXR8-STnW3m5gvyxLVUdyxyk,9706
31
+ click/globals.py,sha256=cuJ6Bbo073lgEEmhjr394PeM-QFmXM-Ci-wmfsd7H5g,1954
32
+ click/parser.py,sha256=h4sndcpF5OHrZQN8vD8IWb5OByvW7ABbhRToxovrqS8,19067
33
+ click/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ click/shell_completion.py,sha256=TR0dXEGcvWb9Eo3aaQEXGhnvNS3FF4H4QcuLnvAvYo4,18636
35
+ click/termui.py,sha256=dLxiS70UOvIYBda_nEEZaPAFOVDVmRs1sEPMuLDowQo,28310
36
+ click/testing.py,sha256=3RA8anCf7TZ8-5RAF5it2Te-aWXBAL5VLasQnMiC2ZQ,16282
37
+ click/types.py,sha256=BD5Qqq4h-8kawBmOIzJlmq4xzThAf4wCvaOLZSBDNx0,36422
38
+ click/utils.py,sha256=ce-IrO9ilII76LGkU354pOdHbepM8UftfNH7SfMU_28,20330
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click-8.1.8.dist-info/WHEEL ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: flit 3.10.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/__init__.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from importlib.metadata import entry_points
2
+
3
+ from . import caching
4
+ from ._version import __version__ # noqa: F401
5
+ from .callbacks import Callback
6
+ from .compression import available_compressions
7
+ from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
8
+ from .exceptions import FSTimeoutError
9
+ from .mapping import FSMap, get_mapper
10
+ from .registry import (
11
+ available_protocols,
12
+ filesystem,
13
+ get_filesystem_class,
14
+ register_implementation,
15
+ registry,
16
+ )
17
+ from .spec import AbstractFileSystem
18
+
19
+ __all__ = [
20
+ "AbstractFileSystem",
21
+ "FSTimeoutError",
22
+ "FSMap",
23
+ "filesystem",
24
+ "register_implementation",
25
+ "get_filesystem_class",
26
+ "get_fs_token_paths",
27
+ "get_mapper",
28
+ "open",
29
+ "open_files",
30
+ "open_local",
31
+ "registry",
32
+ "caching",
33
+ "Callback",
34
+ "available_protocols",
35
+ "available_compressions",
36
+ "url_to_fs",
37
+ ]
38
+
39
+
40
+ def process_entries():
41
+ if entry_points is not None:
42
+ try:
43
+ eps = entry_points()
44
+ except TypeError:
45
+ pass # importlib-metadata < 0.8
46
+ else:
47
+ if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
48
+ specs = eps.select(group="fsspec.specs")
49
+ else:
50
+ specs = eps.get("fsspec.specs", [])
51
+ registered_names = {}
52
+ for spec in specs:
53
+ err_msg = f"Unable to load filesystem from {spec}"
54
+ name = spec.name
55
+ if name in registered_names:
56
+ continue
57
+ registered_names[name] = True
58
+ register_implementation(
59
+ name,
60
+ spec.value.replace(":", "."),
61
+ errtxt=err_msg,
62
+ # We take our implementations as the ones to overload with if
63
+ # for some reason we encounter some, may be the same, already
64
+ # registered
65
+ clobber=True,
66
+ )
67
+
68
+
69
+ process_entries()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/_version.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # file generated by setuptools_scm
2
+ # don't change, don't track in version control
3
+ TYPE_CHECKING = False
4
+ if TYPE_CHECKING:
5
+ from typing import Tuple, Union
6
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
7
+ else:
8
+ VERSION_TUPLE = object
9
+
10
+ version: str
11
+ __version__: str
12
+ __version_tuple__: VERSION_TUPLE
13
+ version_tuple: VERSION_TUPLE
14
+
15
+ __version__ = version = '2024.5.0'
16
+ __version_tuple__ = version_tuple = (2024, 5, 0)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/archive.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fsspec import AbstractFileSystem
2
+ from fsspec.utils import tokenize
3
+
4
+
5
+ class AbstractArchiveFileSystem(AbstractFileSystem):
6
+ """
7
+ A generic superclass for implementing Archive-based filesystems.
8
+
9
+ Currently, it is shared amongst
10
+ :class:`~fsspec.implementations.zip.ZipFileSystem`,
11
+ :class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and
12
+ :class:`~fsspec.implementations.tar.TarFileSystem`.
13
+ """
14
+
15
+ def __str__(self):
16
+ return f"<Archive-like object {type(self).__name__} at {id(self)}>"
17
+
18
+ __repr__ = __str__
19
+
20
+ def ukey(self, path):
21
+ return tokenize(path, self.fo, self.protocol)
22
+
23
+ def _all_dirnames(self, paths):
24
+ """Returns *all* directory names for each path in paths, including intermediate
25
+ ones.
26
+
27
+ Parameters
28
+ ----------
29
+ paths: Iterable of path strings
30
+ """
31
+ if len(paths) == 0:
32
+ return set()
33
+
34
+ dirnames = {self._parent(path) for path in paths} - {self.root_marker}
35
+ return dirnames | self._all_dirnames(dirnames)
36
+
37
+ def info(self, path, **kwargs):
38
+ self._get_dirs()
39
+ path = self._strip_protocol(path)
40
+ if path in {"", "/"} and self.dir_cache:
41
+ return {"name": "", "type": "directory", "size": 0}
42
+ if path in self.dir_cache:
43
+ return self.dir_cache[path]
44
+ elif path + "/" in self.dir_cache:
45
+ return self.dir_cache[path + "/"]
46
+ else:
47
+ raise FileNotFoundError(path)
48
+
49
+ def ls(self, path, detail=True, **kwargs):
50
+ self._get_dirs()
51
+ paths = {}
52
+ for p, f in self.dir_cache.items():
53
+ p = p.rstrip("/")
54
+ if "/" in p:
55
+ root = p.rsplit("/", 1)[0]
56
+ else:
57
+ root = ""
58
+ if root == path.rstrip("/"):
59
+ paths[p] = f
60
+ elif all(
61
+ (a == b)
62
+ for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
63
+ ):
64
+ # root directory entry
65
+ ppath = p.rstrip("/").split("/", 1)[0]
66
+ if ppath not in paths:
67
+ out = {"name": ppath, "size": 0, "type": "directory"}
68
+ paths[ppath] = out
69
+ if detail:
70
+ out = sorted(paths.values(), key=lambda _: _["name"])
71
+ return out
72
+ else:
73
+ return sorted(paths)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/asyn.py ADDED
@@ -0,0 +1,1096 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import asyncio.events
3
+ import functools
4
+ import inspect
5
+ import io
6
+ import numbers
7
+ import os
8
+ import re
9
+ import threading
10
+ from contextlib import contextmanager
11
+ from glob import has_magic
12
+ from typing import TYPE_CHECKING, Iterable
13
+
14
+ from .callbacks import DEFAULT_CALLBACK
15
+ from .exceptions import FSTimeoutError
16
+ from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
17
+ from .spec import AbstractBufferedFile, AbstractFileSystem
18
+ from .utils import glob_translate, is_exception, other_paths
19
+
20
+ private = re.compile("_[^_]")
21
+ iothread = [None] # dedicated fsspec IO thread
22
+ loop = [None] # global event loop for any non-async instance
23
+ _lock = None # global lock placeholder
24
+ get_running_loop = asyncio.get_running_loop
25
+
26
+
27
+ def get_lock():
28
+ """Allocate or return a threading lock.
29
+
30
+ The lock is allocated on first use to allow setting one lock per forked process.
31
+ """
32
+ global _lock
33
+ if not _lock:
34
+ _lock = threading.Lock()
35
+ return _lock
36
+
37
+
38
+ def reset_lock():
39
+ """Reset the global lock.
40
+
41
+ This should be called only on the init of a forked process to reset the lock to
42
+ None, enabling the new forked process to get a new lock.
43
+ """
44
+ global _lock
45
+
46
+ iothread[0] = None
47
+ loop[0] = None
48
+ _lock = None
49
+
50
+
51
+ async def _runner(event, coro, result, timeout=None):
52
+ timeout = timeout if timeout else None # convert 0 or 0.0 to None
53
+ if timeout is not None:
54
+ coro = asyncio.wait_for(coro, timeout=timeout)
55
+ try:
56
+ result[0] = await coro
57
+ except Exception as ex:
58
+ result[0] = ex
59
+ finally:
60
+ event.set()
61
+
62
+
63
+ def sync(loop, func, *args, timeout=None, **kwargs):
64
+ """
65
+ Make loop run coroutine until it returns. Runs in other thread
66
+
67
+ Examples
68
+ --------
69
+ >>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args,
70
+ timeout=timeout, **kwargs)
71
+ """
72
+ timeout = timeout if timeout else None # convert 0 or 0.0 to None
73
+ # NB: if the loop is not running *yet*, it is OK to submit work
74
+ # and we will wait for it
75
+ if loop is None or loop.is_closed():
76
+ raise RuntimeError("Loop is not running")
77
+ try:
78
+ loop0 = asyncio.events.get_running_loop()
79
+ if loop0 is loop:
80
+ raise NotImplementedError("Calling sync() from within a running loop")
81
+ except NotImplementedError:
82
+ raise
83
+ except RuntimeError:
84
+ pass
85
+ coro = func(*args, **kwargs)
86
+ result = [None]
87
+ event = threading.Event()
88
+ asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
89
+ while True:
90
+ # this loops allows thread to get interrupted
91
+ if event.wait(1):
92
+ break
93
+ if timeout is not None:
94
+ timeout -= 1
95
+ if timeout < 0:
96
+ raise FSTimeoutError
97
+
98
+ return_result = result[0]
99
+ if isinstance(return_result, asyncio.TimeoutError):
100
+ # suppress asyncio.TimeoutError, raise FSTimeoutError
101
+ raise FSTimeoutError from return_result
102
+ elif isinstance(return_result, BaseException):
103
+ raise return_result
104
+ else:
105
+ return return_result
106
+
107
+
108
+ def sync_wrapper(func, obj=None):
109
+ """Given a function, make so can be called in blocking contexts
110
+
111
+ Leave obj=None if defining within a class. Pass the instance if attaching
112
+ as an attribute of the instance.
113
+ """
114
+
115
+ @functools.wraps(func)
116
+ def wrapper(*args, **kwargs):
117
+ self = obj or args[0]
118
+ return sync(self.loop, func, *args, **kwargs)
119
+
120
+ return wrapper
121
+
122
+
123
+ @contextmanager
124
+ def _selector_policy():
125
+ original_policy = asyncio.get_event_loop_policy()
126
+ try:
127
+ if os.name == "nt" and hasattr(asyncio, "WindowsSelectorEventLoopPolicy"):
128
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
129
+
130
+ yield
131
+ finally:
132
+ asyncio.set_event_loop_policy(original_policy)
133
+
134
+
135
+ def get_loop():
136
+ """Create or return the default fsspec IO loop
137
+
138
+ The loop will be running on a separate thread.
139
+ """
140
+ if loop[0] is None:
141
+ with get_lock():
142
+ # repeat the check just in case the loop got filled between the
143
+ # previous two calls from another thread
144
+ if loop[0] is None:
145
+ with _selector_policy():
146
+ loop[0] = asyncio.new_event_loop()
147
+ th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
148
+ th.daemon = True
149
+ th.start()
150
+ iothread[0] = th
151
+ return loop[0]
152
+
153
+
154
+ if TYPE_CHECKING:
155
+ import resource
156
+
157
+ ResourceError = resource.error
158
+ else:
159
+ try:
160
+ import resource
161
+ except ImportError:
162
+ resource = None
163
+ ResourceError = OSError
164
+ else:
165
+ ResourceError = getattr(resource, "error", OSError)
166
+
167
+ _DEFAULT_BATCH_SIZE = 128
168
+ _NOFILES_DEFAULT_BATCH_SIZE = 1280
169
+
170
+
171
+ def _get_batch_size(nofiles=False):
172
+ from fsspec.config import conf
173
+
174
+ if nofiles:
175
+ if "nofiles_gather_batch_size" in conf:
176
+ return conf["nofiles_gather_batch_size"]
177
+ else:
178
+ if "gather_batch_size" in conf:
179
+ return conf["gather_batch_size"]
180
+ if nofiles:
181
+ return _NOFILES_DEFAULT_BATCH_SIZE
182
+ if resource is None:
183
+ return _DEFAULT_BATCH_SIZE
184
+
185
+ try:
186
+ soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
187
+ except (ImportError, ValueError, ResourceError):
188
+ return _DEFAULT_BATCH_SIZE
189
+
190
+ if soft_limit == resource.RLIM_INFINITY:
191
+ return -1
192
+ else:
193
+ return soft_limit // 8
194
+
195
+
196
+ def running_async() -> bool:
197
+ """Being executed by an event loop?"""
198
+ try:
199
+ asyncio.get_running_loop()
200
+ return True
201
+ except RuntimeError:
202
+ return False
203
+
204
+
205
+ async def _run_coros_in_chunks(
206
+ coros,
207
+ batch_size=None,
208
+ callback=DEFAULT_CALLBACK,
209
+ timeout=None,
210
+ return_exceptions=False,
211
+ nofiles=False,
212
+ ):
213
+ """Run the given coroutines in chunks.
214
+
215
+ Parameters
216
+ ----------
217
+ coros: list of coroutines to run
218
+ batch_size: int or None
219
+ Number of coroutines to submit/wait on simultaneously.
220
+ If -1, then it will not be any throttling. If
221
+ None, it will be inferred from _get_batch_size()
222
+ callback: fsspec.callbacks.Callback instance
223
+ Gets a relative_update when each coroutine completes
224
+ timeout: number or None
225
+ If given, each coroutine times out after this time. Note that, since
226
+ there are multiple batches, the total run time of this function will in
227
+ general be longer
228
+ return_exceptions: bool
229
+ Same meaning as in asyncio.gather
230
+ nofiles: bool
231
+ If inferring the batch_size, does this operation involve local files?
232
+ If yes, you normally expect smaller batches.
233
+ """
234
+
235
+ if batch_size is None:
236
+ batch_size = _get_batch_size(nofiles=nofiles)
237
+
238
+ if batch_size == -1:
239
+ batch_size = len(coros)
240
+
241
+ assert batch_size > 0
242
+
243
+ async def _run_coro(coro, i):
244
+ try:
245
+ return await asyncio.wait_for(coro, timeout=timeout), i
246
+ except Exception as e:
247
+ if not return_exceptions:
248
+ raise
249
+ return e, i
250
+ finally:
251
+ callback.relative_update(1)
252
+
253
+ i = 0
254
+ n = len(coros)
255
+ results = [None] * n
256
+ pending = set()
257
+
258
+ while pending or i < n:
259
+ while len(pending) < batch_size and i < n:
260
+ pending.add(asyncio.ensure_future(_run_coro(coros[i], i)))
261
+ i += 1
262
+
263
+ if not pending:
264
+ break
265
+
266
+ done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
267
+ while done:
268
+ result, k = await done.pop()
269
+ results[k] = result
270
+
271
+ return results
272
+
273
+
274
+ # these methods should be implemented as async by any async-able backend
275
+ async_methods = [
276
+ "_ls",
277
+ "_cat_file",
278
+ "_get_file",
279
+ "_put_file",
280
+ "_rm_file",
281
+ "_cp_file",
282
+ "_pipe_file",
283
+ "_expand_path",
284
+ "_info",
285
+ "_isfile",
286
+ "_isdir",
287
+ "_exists",
288
+ "_walk",
289
+ "_glob",
290
+ "_find",
291
+ "_du",
292
+ "_size",
293
+ "_mkdir",
294
+ "_makedirs",
295
+ ]
296
+
297
+
298
+ class AsyncFileSystem(AbstractFileSystem):
299
+ """Async file operations, default implementations
300
+
301
+ Passes bulk operations to asyncio.gather for concurrent operation.
302
+
303
+ Implementations that have concurrent batch operations and/or async methods
304
+ should inherit from this class instead of AbstractFileSystem. Docstrings are
305
+ copied from the un-underscored method in AbstractFileSystem, if not given.
306
+ """
307
+
308
+ # note that methods do not have docstring here; they will be copied
309
+ # for _* methods and inferred for overridden methods.
310
+
311
+ async_impl = True
312
+ mirror_sync_methods = True
313
+ disable_throttling = False
314
+
315
+ def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs):
316
+ self.asynchronous = asynchronous
317
+ self._pid = os.getpid()
318
+ if not asynchronous:
319
+ self._loop = loop or get_loop()
320
+ else:
321
+ self._loop = None
322
+ self.batch_size = batch_size
323
+ super().__init__(*args, **kwargs)
324
+
325
+ @property
326
+ def loop(self):
327
+ if self._pid != os.getpid():
328
+ raise RuntimeError("This class is not fork-safe")
329
+ return self._loop
330
+
331
+ async def _rm_file(self, path, **kwargs):
332
+ raise NotImplementedError
333
+
334
+ async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
335
+ # TODO: implement on_error
336
+ batch_size = batch_size or self.batch_size
337
+ path = await self._expand_path(path, recursive=recursive)
338
+ return await _run_coros_in_chunks(
339
+ [self._rm_file(p, **kwargs) for p in reversed(path)],
340
+ batch_size=batch_size,
341
+ nofiles=True,
342
+ )
343
+
344
+ async def _cp_file(self, path1, path2, **kwargs):
345
+ raise NotImplementedError
346
+
347
+ async def _copy(
348
+ self,
349
+ path1,
350
+ path2,
351
+ recursive=False,
352
+ on_error=None,
353
+ maxdepth=None,
354
+ batch_size=None,
355
+ **kwargs,
356
+ ):
357
+ if on_error is None and recursive:
358
+ on_error = "ignore"
359
+ elif on_error is None:
360
+ on_error = "raise"
361
+
362
+ if isinstance(path1, list) and isinstance(path2, list):
363
+ # No need to expand paths when both source and destination
364
+ # are provided as lists
365
+ paths1 = path1
366
+ paths2 = path2
367
+ else:
368
+ source_is_str = isinstance(path1, str)
369
+ paths1 = await self._expand_path(
370
+ path1, maxdepth=maxdepth, recursive=recursive
371
+ )
372
+ if source_is_str and (not recursive or maxdepth is not None):
373
+ # Non-recursive glob does not copy directories
374
+ paths1 = [
375
+ p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
376
+ ]
377
+ if not paths1:
378
+ return
379
+
380
+ source_is_file = len(paths1) == 1
381
+ dest_is_dir = isinstance(path2, str) and (
382
+ trailing_sep(path2) or await self._isdir(path2)
383
+ )
384
+
385
+ exists = source_is_str and (
386
+ (has_magic(path1) and source_is_file)
387
+ or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
388
+ )
389
+ paths2 = other_paths(
390
+ paths1,
391
+ path2,
392
+ exists=exists,
393
+ flatten=not source_is_str,
394
+ )
395
+
396
+ batch_size = batch_size or self.batch_size
397
+ coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
398
+ result = await _run_coros_in_chunks(
399
+ coros, batch_size=batch_size, return_exceptions=True, nofiles=True
400
+ )
401
+
402
+ for ex in filter(is_exception, result):
403
+ if on_error == "ignore" and isinstance(ex, FileNotFoundError):
404
+ continue
405
+ raise ex
406
+
407
+ async def _pipe_file(self, path, value, **kwargs):
408
+ raise NotImplementedError
409
+
410
+ async def _pipe(self, path, value=None, batch_size=None, **kwargs):
411
+ if isinstance(path, str):
412
+ path = {path: value}
413
+ batch_size = batch_size or self.batch_size
414
+ return await _run_coros_in_chunks(
415
+ [self._pipe_file(k, v, **kwargs) for k, v in path.items()],
416
+ batch_size=batch_size,
417
+ nofiles=True,
418
+ )
419
+
420
+ async def _process_limits(self, url, start, end):
421
+ """Helper for "Range"-based _cat_file"""
422
+ size = None
423
+ suff = False
424
+ if start is not None and start < 0:
425
+ # if start is negative and end None, end is the "suffix length"
426
+ if end is None:
427
+ end = -start
428
+ start = ""
429
+ suff = True
430
+ else:
431
+ size = size or (await self._info(url))["size"]
432
+ start = size + start
433
+ elif start is None:
434
+ start = 0
435
+ if not suff:
436
+ if end is not None and end < 0:
437
+ if start is not None:
438
+ size = size or (await self._info(url))["size"]
439
+ end = size + end
440
+ elif end is None:
441
+ end = ""
442
+ if isinstance(end, numbers.Integral):
443
+ end -= 1 # bytes range is inclusive
444
+ return f"bytes={start}-{end}"
445
+
446
+ async def _cat_file(self, path, start=None, end=None, **kwargs):
447
+ raise NotImplementedError
448
+
449
+ async def _cat(
450
+ self, path, recursive=False, on_error="raise", batch_size=None, **kwargs
451
+ ):
452
+ paths = await self._expand_path(path, recursive=recursive)
453
+ coros = [self._cat_file(path, **kwargs) for path in paths]
454
+ batch_size = batch_size or self.batch_size
455
+ out = await _run_coros_in_chunks(
456
+ coros, batch_size=batch_size, nofiles=True, return_exceptions=True
457
+ )
458
+ if on_error == "raise":
459
+ ex = next(filter(is_exception, out), False)
460
+ if ex:
461
+ raise ex
462
+ if (
463
+ len(paths) > 1
464
+ or isinstance(path, list)
465
+ or paths[0] != self._strip_protocol(path)
466
+ ):
467
+ return {
468
+ k: v
469
+ for k, v in zip(paths, out)
470
+ if on_error != "omit" or not is_exception(v)
471
+ }
472
+ else:
473
+ return out[0]
474
+
475
+ async def _cat_ranges(
476
+ self,
477
+ paths,
478
+ starts,
479
+ ends,
480
+ max_gap=None,
481
+ batch_size=None,
482
+ on_error="return",
483
+ **kwargs,
484
+ ):
485
+ """Get the contents of byte ranges from one or more files
486
+
487
+ Parameters
488
+ ----------
489
+ paths: list
490
+ A list of of filepaths on this filesystems
491
+ starts, ends: int or list
492
+ Bytes limits of the read. If using a single int, the same value will be
493
+ used to read all the specified files.
494
+ """
495
+ # TODO: on_error
496
+ if max_gap is not None:
497
+ # use utils.merge_offset_ranges
498
+ raise NotImplementedError
499
+ if not isinstance(paths, list):
500
+ raise TypeError
501
+ if not isinstance(starts, Iterable):
502
+ starts = [starts] * len(paths)
503
+ if not isinstance(ends, Iterable):
504
+ ends = [ends] * len(paths)
505
+ if len(starts) != len(paths) or len(ends) != len(paths):
506
+ raise ValueError
507
+ coros = [
508
+ self._cat_file(p, start=s, end=e, **kwargs)
509
+ for p, s, e in zip(paths, starts, ends)
510
+ ]
511
+ batch_size = batch_size or self.batch_size
512
+ return await _run_coros_in_chunks(
513
+ coros, batch_size=batch_size, nofiles=True, return_exceptions=True
514
+ )
515
+
516
+ async def _put_file(self, lpath, rpath, **kwargs):
517
+ raise NotImplementedError
518
+
519
+ async def _put(
520
+ self,
521
+ lpath,
522
+ rpath,
523
+ recursive=False,
524
+ callback=DEFAULT_CALLBACK,
525
+ batch_size=None,
526
+ maxdepth=None,
527
+ **kwargs,
528
+ ):
529
+ """Copy file(s) from local.
530
+
531
+ Copies a specific file or tree of files (if recursive=True). If rpath
532
+ ends with a "/", it will be assumed to be a directory, and target files
533
+ will go within.
534
+
535
+ The put_file method will be called concurrently on a batch of files. The
536
+ batch_size option can configure the amount of futures that can be executed
537
+ at the same time. If it is -1, then all the files will be uploaded concurrently.
538
+ The default can be set for this instance by passing "batch_size" in the
539
+ constructor, or for all instances by setting the "gather_batch_size" key
540
+ in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
541
+ """
542
+ if isinstance(lpath, list) and isinstance(rpath, list):
543
+ # No need to expand paths when both source and destination
544
+ # are provided as lists
545
+ rpaths = rpath
546
+ lpaths = lpath
547
+ else:
548
+ source_is_str = isinstance(lpath, str)
549
+ if source_is_str:
550
+ lpath = make_path_posix(lpath)
551
+ fs = LocalFileSystem()
552
+ lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
553
+ if source_is_str and (not recursive or maxdepth is not None):
554
+ # Non-recursive glob does not copy directories
555
+ lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
556
+ if not lpaths:
557
+ return
558
+
559
+ source_is_file = len(lpaths) == 1
560
+ dest_is_dir = isinstance(rpath, str) and (
561
+ trailing_sep(rpath) or await self._isdir(rpath)
562
+ )
563
+
564
+ rpath = self._strip_protocol(rpath)
565
+ exists = source_is_str and (
566
+ (has_magic(lpath) and source_is_file)
567
+ or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
568
+ )
569
+ rpaths = other_paths(
570
+ lpaths,
571
+ rpath,
572
+ exists=exists,
573
+ flatten=not source_is_str,
574
+ )
575
+
576
+ is_dir = {l: os.path.isdir(l) for l in lpaths}
577
+ rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
578
+ file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]]
579
+
580
+ await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs])
581
+ batch_size = batch_size or self.batch_size
582
+
583
+ coros = []
584
+ callback.set_size(len(file_pairs))
585
+ for lfile, rfile in file_pairs:
586
+ put_file = callback.branch_coro(self._put_file)
587
+ coros.append(put_file(lfile, rfile, **kwargs))
588
+
589
+ return await _run_coros_in_chunks(
590
+ coros, batch_size=batch_size, callback=callback
591
+ )
592
+
593
+ async def _get_file(self, rpath, lpath, **kwargs):
594
+ raise NotImplementedError
595
+
596
+ async def _get(
597
+ self,
598
+ rpath,
599
+ lpath,
600
+ recursive=False,
601
+ callback=DEFAULT_CALLBACK,
602
+ maxdepth=None,
603
+ **kwargs,
604
+ ):
605
+ """Copy file(s) to local.
606
+
607
+ Copies a specific file or tree of files (if recursive=True). If lpath
608
+ ends with a "/", it will be assumed to be a directory, and target files
609
+ will go within. Can submit a list of paths, which may be glob-patterns
610
+ and will be expanded.
611
+
612
+ The get_file method will be called concurrently on a batch of files. The
613
+ batch_size option can configure the amount of futures that can be executed
614
+ at the same time. If it is -1, then all the files will be uploaded concurrently.
615
+ The default can be set for this instance by passing "batch_size" in the
616
+ constructor, or for all instances by setting the "gather_batch_size" key
617
+ in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
618
+ """
619
+ if isinstance(lpath, list) and isinstance(rpath, list):
620
+ # No need to expand paths when both source and destination
621
+ # are provided as lists
622
+ rpaths = rpath
623
+ lpaths = lpath
624
+ else:
625
+ source_is_str = isinstance(rpath, str)
626
+ # First check for rpath trailing slash as _strip_protocol removes it.
627
+ source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
628
+ rpath = self._strip_protocol(rpath)
629
+ rpaths = await self._expand_path(
630
+ rpath, recursive=recursive, maxdepth=maxdepth
631
+ )
632
+ if source_is_str and (not recursive or maxdepth is not None):
633
+ # Non-recursive glob does not copy directories
634
+ rpaths = [
635
+ p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
636
+ ]
637
+ if not rpaths:
638
+ return
639
+
640
+ lpath = make_path_posix(lpath)
641
+ source_is_file = len(rpaths) == 1
642
+ dest_is_dir = isinstance(lpath, str) and (
643
+ trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
644
+ )
645
+
646
+ exists = source_is_str and (
647
+ (has_magic(rpath) and source_is_file)
648
+ or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
649
+ )
650
+ lpaths = other_paths(
651
+ rpaths,
652
+ lpath,
653
+ exists=exists,
654
+ flatten=not source_is_str,
655
+ )
656
+
657
+ [os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
658
+ batch_size = kwargs.pop("batch_size", self.batch_size)
659
+
660
+ coros = []
661
+ callback.set_size(len(lpaths))
662
+ for lpath, rpath in zip(lpaths, rpaths):
663
+ get_file = callback.branch_coro(self._get_file)
664
+ coros.append(get_file(rpath, lpath, **kwargs))
665
+ return await _run_coros_in_chunks(
666
+ coros, batch_size=batch_size, callback=callback
667
+ )
668
+
669
+ async def _isfile(self, path):
670
+ try:
671
+ return (await self._info(path))["type"] == "file"
672
+ except: # noqa: E722
673
+ return False
674
+
675
+ async def _isdir(self, path):
676
+ try:
677
+ return (await self._info(path))["type"] == "directory"
678
+ except OSError:
679
+ return False
680
+
681
+ async def _size(self, path):
682
+ return (await self._info(path)).get("size", None)
683
+
684
+ async def _sizes(self, paths, batch_size=None):
685
+ batch_size = batch_size or self.batch_size
686
+ return await _run_coros_in_chunks(
687
+ [self._size(p) for p in paths], batch_size=batch_size
688
+ )
689
+
690
+ async def _exists(self, path, **kwargs):
691
+ try:
692
+ await self._info(path, **kwargs)
693
+ return True
694
+ except FileNotFoundError:
695
+ return False
696
+
697
+ async def _info(self, path, **kwargs):
698
+ raise NotImplementedError
699
+
700
+ async def _ls(self, path, detail=True, **kwargs):
701
+ raise NotImplementedError
702
+
703
+ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
704
+ if maxdepth is not None and maxdepth < 1:
705
+ raise ValueError("maxdepth must be at least 1")
706
+
707
+ path = self._strip_protocol(path)
708
+ full_dirs = {}
709
+ dirs = {}
710
+ files = {}
711
+
712
+ detail = kwargs.pop("detail", False)
713
+ try:
714
+ listing = await self._ls(path, detail=True, **kwargs)
715
+ except (FileNotFoundError, OSError) as e:
716
+ if on_error == "raise":
717
+ raise
718
+ elif callable(on_error):
719
+ on_error(e)
720
+ if detail:
721
+ yield path, {}, {}
722
+ else:
723
+ yield path, [], []
724
+ return
725
+
726
+ for info in listing:
727
+ # each info name must be at least [path]/part , but here
728
+ # we check also for names like [path]/part/
729
+ pathname = info["name"].rstrip("/")
730
+ name = pathname.rsplit("/", 1)[-1]
731
+ if info["type"] == "directory" and pathname != path:
732
+ # do not include "self" path
733
+ full_dirs[name] = pathname
734
+ dirs[name] = info
735
+ elif pathname == path:
736
+ # file-like with same name as give path
737
+ files[""] = info
738
+ else:
739
+ files[name] = info
740
+
741
+ if detail:
742
+ yield path, dirs, files
743
+ else:
744
+ yield path, list(dirs), list(files)
745
+
746
+ if maxdepth is not None:
747
+ maxdepth -= 1
748
+ if maxdepth < 1:
749
+ return
750
+
751
+ for d in dirs:
752
+ async for _ in self._walk(
753
+ full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs
754
+ ):
755
+ yield _
756
+
757
+ async def _glob(self, path, maxdepth=None, **kwargs):
758
+ if maxdepth is not None and maxdepth < 1:
759
+ raise ValueError("maxdepth must be at least 1")
760
+
761
+ import re
762
+
763
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
764
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
765
+ path = self._strip_protocol(path)
766
+ append_slash_to_dirname = ends_with_sep or path.endswith(
767
+ tuple(sep + "**" for sep in seps)
768
+ )
769
+ idx_star = path.find("*") if path.find("*") >= 0 else len(path)
770
+ idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
771
+ idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
772
+
773
+ min_idx = min(idx_star, idx_qmark, idx_brace)
774
+
775
+ detail = kwargs.pop("detail", False)
776
+
777
+ if not has_magic(path):
778
+ if await self._exists(path, **kwargs):
779
+ if not detail:
780
+ return [path]
781
+ else:
782
+ return {path: await self._info(path, **kwargs)}
783
+ else:
784
+ if not detail:
785
+ return [] # glob of non-existent returns empty
786
+ else:
787
+ return {}
788
+ elif "/" in path[:min_idx]:
789
+ min_idx = path[:min_idx].rindex("/")
790
+ root = path[: min_idx + 1]
791
+ depth = path[min_idx + 1 :].count("/") + 1
792
+ else:
793
+ root = ""
794
+ depth = path[min_idx + 1 :].count("/") + 1
795
+
796
+ if "**" in path:
797
+ if maxdepth is not None:
798
+ idx_double_stars = path.find("**")
799
+ depth_double_stars = path[idx_double_stars:].count("/") + 1
800
+ depth = depth - depth_double_stars + maxdepth
801
+ else:
802
+ depth = None
803
+
804
+ allpaths = await self._find(
805
+ root, maxdepth=depth, withdirs=True, detail=True, **kwargs
806
+ )
807
+
808
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
809
+ pattern = re.compile(pattern)
810
+
811
+ out = {
812
+ p: info
813
+ for p, info in sorted(allpaths.items())
814
+ if pattern.match(
815
+ (
816
+ p + "/"
817
+ if append_slash_to_dirname and info["type"] == "directory"
818
+ else p
819
+ )
820
+ )
821
+ }
822
+
823
+ if detail:
824
+ return out
825
+ else:
826
+ return list(out)
827
+
828
+ async def _du(self, path, total=True, maxdepth=None, **kwargs):
829
+ sizes = {}
830
+ # async for?
831
+ for f in await self._find(path, maxdepth=maxdepth, **kwargs):
832
+ info = await self._info(f)
833
+ sizes[info["name"]] = info["size"]
834
+ if total:
835
+ return sum(sizes.values())
836
+ else:
837
+ return sizes
838
+
839
+ async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
840
+ path = self._strip_protocol(path)
841
+ out = {}
842
+ detail = kwargs.pop("detail", False)
843
+
844
+ # Add the root directory if withdirs is requested
845
+ # This is needed for posix glob compliance
846
+ if withdirs and path != "" and await self._isdir(path):
847
+ out[path] = await self._info(path)
848
+
849
+ # async for?
850
+ async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
851
+ if withdirs:
852
+ files.update(dirs)
853
+ out.update({info["name"]: info for name, info in files.items()})
854
+ if not out and (await self._isfile(path)):
855
+ # walk works on directories, but find should also return [path]
856
+ # when path happens to be a file
857
+ out[path] = {}
858
+ names = sorted(out)
859
+ if not detail:
860
+ return names
861
+ else:
862
+ return {name: out[name] for name in names}
863
+
864
+ async def _expand_path(self, path, recursive=False, maxdepth=None):
865
+ if maxdepth is not None and maxdepth < 1:
866
+ raise ValueError("maxdepth must be at least 1")
867
+
868
+ if isinstance(path, str):
869
+ out = await self._expand_path([path], recursive, maxdepth)
870
+ else:
871
+ out = set()
872
+ path = [self._strip_protocol(p) for p in path]
873
+ for p in path: # can gather here
874
+ if has_magic(p):
875
+ bit = set(await self._glob(p, maxdepth=maxdepth))
876
+ out |= bit
877
+ if recursive:
878
+ # glob call above expanded one depth so if maxdepth is defined
879
+ # then decrement it in expand_path call below. If it is zero
880
+ # after decrementing then avoid expand_path call.
881
+ if maxdepth is not None and maxdepth <= 1:
882
+ continue
883
+ out |= set(
884
+ await self._expand_path(
885
+ list(bit),
886
+ recursive=recursive,
887
+ maxdepth=maxdepth - 1 if maxdepth is not None else None,
888
+ )
889
+ )
890
+ continue
891
+ elif recursive:
892
+ rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True))
893
+ out |= rec
894
+ if p not in out and (recursive is False or (await self._exists(p))):
895
+ # should only check once, for the root
896
+ out.add(p)
897
+ if not out:
898
+ raise FileNotFoundError(path)
899
+ return sorted(out)
900
+
901
+ async def _mkdir(self, path, create_parents=True, **kwargs):
902
+ pass # not necessary to implement, may not have directories
903
+
904
+ async def _makedirs(self, path, exist_ok=False):
905
+ pass # not necessary to implement, may not have directories
906
+
907
+ async def open_async(self, path, mode="rb", **kwargs):
908
+ if "b" not in mode or kwargs.get("compression"):
909
+ raise ValueError
910
+ raise NotImplementedError
911
+
912
+
913
+ def mirror_sync_methods(obj):
914
+ """Populate sync and async methods for obj
915
+
916
+ For each method will create a sync version if the name refers to an async method
917
+ (coroutine) and there is no override in the child class; will create an async
918
+ method for the corresponding sync method if there is no implementation.
919
+
920
+ Uses the methods specified in
921
+ - async_methods: the set that an implementation is expected to provide
922
+ - default_async_methods: that can be derived from their sync version in
923
+ AbstractFileSystem
924
+ - AsyncFileSystem: async-specific default coroutines
925
+ """
926
+ from fsspec import AbstractFileSystem
927
+
928
+ for method in async_methods + dir(AsyncFileSystem):
929
+ if not method.startswith("_"):
930
+ continue
931
+ smethod = method[1:]
932
+ if private.match(method):
933
+ isco = inspect.iscoroutinefunction(getattr(obj, method, None))
934
+ unsync = getattr(getattr(obj, smethod, False), "__func__", None)
935
+ is_default = unsync is getattr(AbstractFileSystem, smethod, "")
936
+ if isco and is_default:
937
+ mth = sync_wrapper(getattr(obj, method), obj=obj)
938
+ setattr(obj, smethod, mth)
939
+ if not mth.__doc__:
940
+ mth.__doc__ = getattr(
941
+ getattr(AbstractFileSystem, smethod, None), "__doc__", ""
942
+ )
943
+
944
+
945
+ class FSSpecCoroutineCancel(Exception):
946
+ pass
947
+
948
+
949
+ def _dump_running_tasks(
950
+ printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False
951
+ ):
952
+ import traceback
953
+
954
+ tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()]
955
+ if printout:
956
+ [task.print_stack() for task in tasks]
957
+ out = [
958
+ {
959
+ "locals": task._coro.cr_frame.f_locals,
960
+ "file": task._coro.cr_frame.f_code.co_filename,
961
+ "firstline": task._coro.cr_frame.f_code.co_firstlineno,
962
+ "linelo": task._coro.cr_frame.f_lineno,
963
+ "stack": traceback.format_stack(task._coro.cr_frame),
964
+ "task": task if with_task else None,
965
+ }
966
+ for task in tasks
967
+ ]
968
+ if cancel:
969
+ for t in tasks:
970
+ cbs = t._callbacks
971
+ t.cancel()
972
+ asyncio.futures.Future.set_exception(t, exc)
973
+ asyncio.futures.Future.cancel(t)
974
+ [cb[0](t) for cb in cbs] # cancels any dependent concurrent.futures
975
+ try:
976
+ t._coro.throw(exc) # exits coro, unless explicitly handled
977
+ except exc:
978
+ pass
979
+ return out
980
+
981
+
982
+ class AbstractAsyncStreamedFile(AbstractBufferedFile):
983
+ # no read buffering, and always auto-commit
984
+ # TODO: readahead might still be useful here, but needs async version
985
+
986
+ async def read(self, length=-1):
987
+ """
988
+ Return data from cache, or fetch pieces as necessary
989
+
990
+ Parameters
991
+ ----------
992
+ length: int (-1)
993
+ Number of bytes to read; if <0, all remaining bytes.
994
+ """
995
+ length = -1 if length is None else int(length)
996
+ if self.mode != "rb":
997
+ raise ValueError("File not in read mode")
998
+ if length < 0:
999
+ length = self.size - self.loc
1000
+ if self.closed:
1001
+ raise ValueError("I/O operation on closed file.")
1002
+ if length == 0:
1003
+ # don't even bother calling fetch
1004
+ return b""
1005
+ out = await self._fetch_range(self.loc, self.loc + length)
1006
+ self.loc += len(out)
1007
+ return out
1008
+
1009
+ async def write(self, data):
1010
+ """
1011
+ Write data to buffer.
1012
+
1013
+ Buffer only sent on flush() or if buffer is greater than
1014
+ or equal to blocksize.
1015
+
1016
+ Parameters
1017
+ ----------
1018
+ data: bytes
1019
+ Set of bytes to be written.
1020
+ """
1021
+ if self.mode not in {"wb", "ab"}:
1022
+ raise ValueError("File not in write mode")
1023
+ if self.closed:
1024
+ raise ValueError("I/O operation on closed file.")
1025
+ if self.forced:
1026
+ raise ValueError("This file has been force-flushed, can only close")
1027
+ out = self.buffer.write(data)
1028
+ self.loc += out
1029
+ if self.buffer.tell() >= self.blocksize:
1030
+ await self.flush()
1031
+ return out
1032
+
1033
+ async def close(self):
1034
+ """Close file
1035
+
1036
+ Finalizes writes, discards cache
1037
+ """
1038
+ if getattr(self, "_unclosable", False):
1039
+ return
1040
+ if self.closed:
1041
+ return
1042
+ if self.mode == "rb":
1043
+ self.cache = None
1044
+ else:
1045
+ if not self.forced:
1046
+ await self.flush(force=True)
1047
+
1048
+ if self.fs is not None:
1049
+ self.fs.invalidate_cache(self.path)
1050
+ self.fs.invalidate_cache(self.fs._parent(self.path))
1051
+
1052
+ self.closed = True
1053
+
1054
+ async def flush(self, force=False):
1055
+ if self.closed:
1056
+ raise ValueError("Flush on closed file")
1057
+ if force and self.forced:
1058
+ raise ValueError("Force flush cannot be called more than once")
1059
+ if force:
1060
+ self.forced = True
1061
+
1062
+ if self.mode not in {"wb", "ab"}:
1063
+ # no-op to flush on read-mode
1064
+ return
1065
+
1066
+ if not force and self.buffer.tell() < self.blocksize:
1067
+ # Defer write on small block
1068
+ return
1069
+
1070
+ if self.offset is None:
1071
+ # Initialize a multipart upload
1072
+ self.offset = 0
1073
+ try:
1074
+ await self._initiate_upload()
1075
+ except: # noqa: E722
1076
+ self.closed = True
1077
+ raise
1078
+
1079
+ if await self._upload_chunk(final=force) is not False:
1080
+ self.offset += self.buffer.seek(0, 2)
1081
+ self.buffer = io.BytesIO()
1082
+
1083
+ async def __aenter__(self):
1084
+ return self
1085
+
1086
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1087
+ await self.close()
1088
+
1089
+ async def _fetch_range(self, start, end):
1090
+ raise NotImplementedError
1091
+
1092
+ async def _initiate_upload(self):
1093
+ pass
1094
+
1095
+ async def _upload_chunk(self, final=False):
1096
+ raise NotImplementedError
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/callbacks.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import wraps
2
+
3
+
4
+ class Callback:
5
+ """
6
+ Base class and interface for callback mechanism
7
+
8
+ This class can be used directly for monitoring file transfers by
9
+ providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument,
10
+ below), or subclassed for more specialised behaviour.
11
+
12
+ Parameters
13
+ ----------
14
+ size: int (optional)
15
+ Nominal quantity for the value that corresponds to a complete
16
+ transfer, e.g., total number of tiles or total number of
17
+ bytes
18
+ value: int (0)
19
+ Starting internal counter value
20
+ hooks: dict or None
21
+ A dict of named functions to be called on each update. The signature
22
+ of these must be ``f(size, value, **kwargs)``
23
+ """
24
+
25
+ def __init__(self, size=None, value=0, hooks=None, **kwargs):
26
+ self.size = size
27
+ self.value = value
28
+ self.hooks = hooks or {}
29
+ self.kw = kwargs
30
+
31
+ def __enter__(self):
32
+ return self
33
+
34
+ def __exit__(self, *exc_args):
35
+ self.close()
36
+
37
+ def close(self):
38
+ """Close callback."""
39
+
40
+ def branched(self, path_1, path_2, **kwargs):
41
+ """
42
+ Return callback for child transfers
43
+
44
+ If this callback is operating at a higher level, e.g., put, which may
45
+ trigger transfers that can also be monitored. The function returns a callback
46
+ that has to be passed to the child method, e.g., put_file,
47
+ as `callback=` argument.
48
+
49
+ The implementation uses `callback.branch` for compatibility.
50
+ When implementing callbacks, it is recommended to override this function instead
51
+ of `branch` and avoid calling `super().branched(...)`.
52
+
53
+ Prefer using this function over `branch`.
54
+
55
+ Parameters
56
+ ----------
57
+ path_1: str
58
+ Child's source path
59
+ path_2: str
60
+ Child's destination path
61
+ **kwargs:
62
+ Arbitrary keyword arguments
63
+
64
+ Returns
65
+ -------
66
+ callback: Callback
67
+ A callback instance to be passed to the child method
68
+ """
69
+ self.branch(path_1, path_2, kwargs)
70
+ # mutate kwargs so that we can force the caller to pass "callback=" explicitly
71
+ return kwargs.pop("callback", DEFAULT_CALLBACK)
72
+
73
+ def branch_coro(self, fn):
74
+ """
75
+ Wraps a coroutine, and pass a new child callback to it.
76
+ """
77
+
78
+ @wraps(fn)
79
+ async def func(path1, path2: str, **kwargs):
80
+ with self.branched(path1, path2, **kwargs) as child:
81
+ return await fn(path1, path2, callback=child, **kwargs)
82
+
83
+ return func
84
+
85
+ def set_size(self, size):
86
+ """
87
+ Set the internal maximum size attribute
88
+
89
+ Usually called if not initially set at instantiation. Note that this
90
+ triggers a ``call()``.
91
+
92
+ Parameters
93
+ ----------
94
+ size: int
95
+ """
96
+ self.size = size
97
+ self.call()
98
+
99
+ def absolute_update(self, value):
100
+ """
101
+ Set the internal value state
102
+
103
+ Triggers ``call()``
104
+
105
+ Parameters
106
+ ----------
107
+ value: int
108
+ """
109
+ self.value = value
110
+ self.call()
111
+
112
+ def relative_update(self, inc=1):
113
+ """
114
+ Delta increment the internal counter
115
+
116
+ Triggers ``call()``
117
+
118
+ Parameters
119
+ ----------
120
+ inc: int
121
+ """
122
+ self.value += inc
123
+ self.call()
124
+
125
+ def call(self, hook_name=None, **kwargs):
126
+ """
127
+ Execute hook(s) with current state
128
+
129
+ Each function is passed the internal size and current value
130
+
131
+ Parameters
132
+ ----------
133
+ hook_name: str or None
134
+ If given, execute on this hook
135
+ kwargs: passed on to (all) hook(s)
136
+ """
137
+ if not self.hooks:
138
+ return
139
+ kw = self.kw.copy()
140
+ kw.update(kwargs)
141
+ if hook_name:
142
+ if hook_name not in self.hooks:
143
+ return
144
+ return self.hooks[hook_name](self.size, self.value, **kw)
145
+ for hook in self.hooks.values() or []:
146
+ hook(self.size, self.value, **kw)
147
+
148
+ def wrap(self, iterable):
149
+ """
150
+ Wrap an iterable to call ``relative_update`` on each iterations
151
+
152
+ Parameters
153
+ ----------
154
+ iterable: Iterable
155
+ The iterable that is being wrapped
156
+ """
157
+ for item in iterable:
158
+ self.relative_update()
159
+ yield item
160
+
161
+ def branch(self, path_1, path_2, kwargs):
162
+ """
163
+ Set callbacks for child transfers
164
+
165
+ If this callback is operating at a higher level, e.g., put, which may
166
+ trigger transfers that can also be monitored. The passed kwargs are
167
+ to be *mutated* to add ``callback=``, if this class supports branching
168
+ to children.
169
+
170
+ Parameters
171
+ ----------
172
+ path_1: str
173
+ Child's source path
174
+ path_2: str
175
+ Child's destination path
176
+ kwargs: dict
177
+ arguments passed to child method, e.g., put_file.
178
+
179
+ Returns
180
+ -------
181
+
182
+ """
183
+ return None
184
+
185
+ def no_op(self, *_, **__):
186
+ pass
187
+
188
+ def __getattr__(self, item):
189
+ """
190
+ If undefined methods are called on this class, nothing happens
191
+ """
192
+ return self.no_op
193
+
194
+ @classmethod
195
+ def as_callback(cls, maybe_callback=None):
196
+ """Transform callback=... into Callback instance
197
+
198
+ For the special value of ``None``, return the global instance of
199
+ ``NoOpCallback``. This is an alternative to including
200
+ ``callback=DEFAULT_CALLBACK`` directly in a method signature.
201
+ """
202
+ if maybe_callback is None:
203
+ return DEFAULT_CALLBACK
204
+ return maybe_callback
205
+
206
+
207
+ class NoOpCallback(Callback):
208
+ """
209
+ This implementation of Callback does exactly nothing
210
+ """
211
+
212
+ def call(self, *args, **kwargs):
213
+ return None
214
+
215
+
216
+ class DotPrinterCallback(Callback):
217
+ """
218
+ Simple example Callback implementation
219
+
220
+ Almost identical to Callback with a hook that prints a char; here we
221
+ demonstrate how the outer layer may print "#" and the inner layer "."
222
+ """
223
+
224
+ def __init__(self, chr_to_print="#", **kwargs):
225
+ self.chr = chr_to_print
226
+ super().__init__(**kwargs)
227
+
228
+ def branch(self, path_1, path_2, kwargs):
229
+ """Mutate kwargs to add new instance with different print char"""
230
+ kwargs["callback"] = DotPrinterCallback(".")
231
+
232
+ def call(self, **kwargs):
233
+ """Just outputs a character"""
234
+ print(self.chr, end="")
235
+
236
+
237
+ class TqdmCallback(Callback):
238
+ """
239
+ A callback to display a progress bar using tqdm
240
+
241
+ Parameters
242
+ ----------
243
+ tqdm_kwargs : dict, (optional)
244
+ Any argument accepted by the tqdm constructor.
245
+ See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
246
+ Will be forwarded to `tqdm_cls`.
247
+ tqdm_cls: (optional)
248
+ subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
249
+
250
+ Examples
251
+ --------
252
+ >>> import fsspec
253
+ >>> from fsspec.callbacks import TqdmCallback
254
+ >>> fs = fsspec.filesystem("memory")
255
+ >>> path2distant_data = "/your-path"
256
+ >>> fs.upload(
257
+ ".",
258
+ path2distant_data,
259
+ recursive=True,
260
+ callback=TqdmCallback(),
261
+ )
262
+
263
+ You can forward args to tqdm using the ``tqdm_kwargs`` parameter.
264
+
265
+ >>> fs.upload(
266
+ ".",
267
+ path2distant_data,
268
+ recursive=True,
269
+ callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
270
+ )
271
+
272
+ You can also customize the progress bar by passing a subclass of `tqdm`.
273
+
274
+ .. code-block:: python
275
+
276
+ class TqdmFormat(tqdm):
277
+ '''Provides a `total_time` format parameter'''
278
+ @property
279
+ def format_dict(self):
280
+ d = super().format_dict
281
+ total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
282
+ d.update(total_time=self.format_interval(total_time) + " in total")
283
+ return d
284
+
285
+ >>> with TqdmCallback(
286
+ tqdm_kwargs={
287
+ "desc": "desc",
288
+ "bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
289
+ },
290
+ tqdm_cls=TqdmFormat,
291
+ ) as callback:
292
+ fs.upload(".", path2distant_data, recursive=True, callback=callback)
293
+ """
294
+
295
+ def __init__(self, tqdm_kwargs=None, *args, **kwargs):
296
+ try:
297
+ from tqdm import tqdm
298
+
299
+ except ImportError as exce:
300
+ raise ImportError(
301
+ "Using TqdmCallback requires tqdm to be installed"
302
+ ) from exce
303
+
304
+ self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
305
+ self._tqdm_kwargs = tqdm_kwargs or {}
306
+ self.tqdm = None
307
+ super().__init__(*args, **kwargs)
308
+
309
+ def call(self, *args, **kwargs):
310
+ if self.tqdm is None:
311
+ self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
312
+ self.tqdm.total = self.size
313
+ self.tqdm.update(self.value - self.tqdm.n)
314
+
315
+ def close(self):
316
+ if self.tqdm is not None:
317
+ self.tqdm.close()
318
+ self.tqdm = None
319
+
320
+ def __del__(self):
321
+ return self.close()
322
+
323
+
324
+ DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/compression.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Helper functions for a standard streaming compression API"""
2
+
3
+ from zipfile import ZipFile
4
+
5
+ import fsspec.utils
6
+ from fsspec.spec import AbstractBufferedFile
7
+
8
+
9
+ def noop_file(file, mode, **kwargs):
10
+ return file
11
+
12
+
13
+ # TODO: files should also be available as contexts
14
+ # should be functions of the form func(infile, mode=, **kwargs) -> file-like
15
+ compr = {None: noop_file}
16
+
17
+
18
+ def register_compression(name, callback, extensions, force=False):
19
+ """Register an "inferable" file compression type.
20
+
21
+ Registers transparent file compression type for use with fsspec.open.
22
+ Compression can be specified by name in open, or "infer"-ed for any files
23
+ ending with the given extensions.
24
+
25
+ Args:
26
+ name: (str) The compression type name. Eg. "gzip".
27
+ callback: A callable of form (infile, mode, **kwargs) -> file-like.
28
+ Accepts an input file-like object, the target mode and kwargs.
29
+ Returns a wrapped file-like object.
30
+ extensions: (str, Iterable[str]) A file extension, or list of file
31
+ extensions for which to infer this compression scheme. Eg. "gz".
32
+ force: (bool) Force re-registration of compression type or extensions.
33
+
34
+ Raises:
35
+ ValueError: If name or extensions already registered, and not force.
36
+
37
+ """
38
+ if isinstance(extensions, str):
39
+ extensions = [extensions]
40
+
41
+ # Validate registration
42
+ if name in compr and not force:
43
+ raise ValueError(f"Duplicate compression registration: {name}")
44
+
45
+ for ext in extensions:
46
+ if ext in fsspec.utils.compressions and not force:
47
+ raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
48
+
49
+ compr[name] = callback
50
+
51
+ for ext in extensions:
52
+ fsspec.utils.compressions[ext] = name
53
+
54
+
55
+ def unzip(infile, mode="rb", filename=None, **kwargs):
56
+ if "r" not in mode:
57
+ filename = filename or "file"
58
+ z = ZipFile(infile, mode="w", **kwargs)
59
+ fo = z.open(filename, mode="w")
60
+ fo.close = lambda closer=fo.close: closer() or z.close()
61
+ return fo
62
+ z = ZipFile(infile)
63
+ if filename is None:
64
+ filename = z.namelist()[0]
65
+ return z.open(filename, mode="r", **kwargs)
66
+
67
+
68
+ register_compression("zip", unzip, "zip")
69
+
70
+ try:
71
+ from bz2 import BZ2File
72
+ except ImportError:
73
+ pass
74
+ else:
75
+ register_compression("bz2", BZ2File, "bz2")
76
+
77
+ try: # pragma: no cover
78
+ from isal import igzip
79
+
80
+ def isal(infile, mode="rb", **kwargs):
81
+ return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
82
+
83
+ register_compression("gzip", isal, "gz")
84
+ except ImportError:
85
+ from gzip import GzipFile
86
+
87
+ register_compression(
88
+ "gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
89
+ )
90
+
91
+ try:
92
+ from lzma import LZMAFile
93
+
94
+ register_compression("lzma", LZMAFile, "lzma")
95
+ register_compression("xz", LZMAFile, "xz")
96
+ except ImportError:
97
+ pass
98
+
99
+ try:
100
+ import lzmaffi
101
+
102
+ register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
103
+ register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
104
+ except ImportError:
105
+ pass
106
+
107
+
108
+ class SnappyFile(AbstractBufferedFile):
109
+ def __init__(self, infile, mode, **kwargs):
110
+ import snappy
111
+
112
+ super().__init__(
113
+ fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
114
+ )
115
+ self.infile = infile
116
+ if "r" in mode:
117
+ self.codec = snappy.StreamDecompressor()
118
+ else:
119
+ self.codec = snappy.StreamCompressor()
120
+
121
+ def _upload_chunk(self, final=False):
122
+ self.buffer.seek(0)
123
+ out = self.codec.add_chunk(self.buffer.read())
124
+ self.infile.write(out)
125
+ return True
126
+
127
+ def seek(self, loc, whence=0):
128
+ raise NotImplementedError("SnappyFile is not seekable")
129
+
130
+ def seekable(self):
131
+ return False
132
+
133
+ def _fetch_range(self, start, end):
134
+ """Get the specified set of bytes from remote"""
135
+ data = self.infile.read(end - start)
136
+ return self.codec.decompress(data)
137
+
138
+
139
+ try:
140
+ import snappy
141
+
142
+ snappy.compress
143
+ # Snappy may use the .sz file extension, but this is not part of the
144
+ # standard implementation.
145
+ register_compression("snappy", SnappyFile, [])
146
+
147
+ except (ImportError, NameError, AttributeError):
148
+ pass
149
+
150
+ try:
151
+ import lz4.frame
152
+
153
+ register_compression("lz4", lz4.frame.open, "lz4")
154
+ except ImportError:
155
+ pass
156
+
157
+ try:
158
+ import zstandard as zstd
159
+
160
+ def zstandard_file(infile, mode="rb"):
161
+ if "r" in mode:
162
+ cctx = zstd.ZstdDecompressor()
163
+ return cctx.stream_reader(infile)
164
+ else:
165
+ cctx = zstd.ZstdCompressor(level=10)
166
+ return cctx.stream_writer(infile)
167
+
168
+ register_compression("zstd", zstandard_file, "zst")
169
+ except ImportError:
170
+ pass
171
+
172
+
173
+ def available_compressions():
174
+ """Return a list of the implemented compressions."""
175
+ return list(compr)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/config.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import configparser
4
+ import json
5
+ import os
6
+ import warnings
7
+ from typing import Any
8
+
9
+ conf: dict[str, dict[str, Any]] = {}
10
+ default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
11
+ conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
12
+
13
+
14
+ def set_conf_env(conf_dict, envdict=os.environ):
15
+ """Set config values from environment variables
16
+
17
+ Looks for variables of the form ``FSSPEC_<protocol>`` and
18
+ ``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
19
+ as a json dictionary and used to ``update`` the config of the
20
+ corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
21
+ attempt to convert the string value, but the kwarg keys will be lower-cased.
22
+
23
+ The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
24
+ ``FSSPEC_<protocol>`` ones.
25
+
26
+ Parameters
27
+ ----------
28
+ conf_dict : dict(str, dict)
29
+ This dict will be mutated
30
+ envdict : dict-like(str, str)
31
+ Source for the values - usually the real environment
32
+ """
33
+ kwarg_keys = []
34
+ for key in envdict:
35
+ if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
36
+ if key.count("_") > 1:
37
+ kwarg_keys.append(key)
38
+ continue
39
+ try:
40
+ value = json.loads(envdict[key])
41
+ except json.decoder.JSONDecodeError as ex:
42
+ warnings.warn(
43
+ f"Ignoring environment variable {key} due to a parse failure: {ex}"
44
+ )
45
+ else:
46
+ if isinstance(value, dict):
47
+ _, proto = key.split("_", 1)
48
+ conf_dict.setdefault(proto.lower(), {}).update(value)
49
+ else:
50
+ warnings.warn(
51
+ f"Ignoring environment variable {key} due to not being a dict:"
52
+ f" {type(value)}"
53
+ )
54
+ elif key.startswith("FSSPEC"):
55
+ warnings.warn(
56
+ f"Ignoring environment variable {key} due to having an unexpected name"
57
+ )
58
+
59
+ for key in kwarg_keys:
60
+ _, proto, kwarg = key.split("_", 2)
61
+ conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
62
+
63
+
64
+ def set_conf_files(cdir, conf_dict):
65
+ """Set config values from files
66
+
67
+ Scans for INI and JSON files in the given dictionary, and uses their
68
+ contents to set the config. In case of repeated values, later values
69
+ win.
70
+
71
+ In the case of INI files, all values are strings, and these will not
72
+ be converted.
73
+
74
+ Parameters
75
+ ----------
76
+ cdir : str
77
+ Directory to search
78
+ conf_dict : dict(str, dict)
79
+ This dict will be mutated
80
+ """
81
+ if not os.path.isdir(cdir):
82
+ return
83
+ allfiles = sorted(os.listdir(cdir))
84
+ for fn in allfiles:
85
+ if fn.endswith(".ini"):
86
+ ini = configparser.ConfigParser()
87
+ ini.read(os.path.join(cdir, fn))
88
+ for key in ini:
89
+ if key == "DEFAULT":
90
+ continue
91
+ conf_dict.setdefault(key, {}).update(dict(ini[key]))
92
+ if fn.endswith(".json"):
93
+ with open(os.path.join(cdir, fn)) as f:
94
+ js = json.load(f)
95
+ for key in js:
96
+ conf_dict.setdefault(key, {}).update(dict(js[key]))
97
+
98
+
99
+ def apply_config(cls, kwargs, conf_dict=None):
100
+ """Supply default values for kwargs when instantiating class
101
+
102
+ Augments the passed kwargs, by finding entries in the config dict
103
+ which match the classes ``.protocol`` attribute (one or more str)
104
+
105
+ Parameters
106
+ ----------
107
+ cls : file system implementation
108
+ kwargs : dict
109
+ conf_dict : dict of dict
110
+ Typically this is the global configuration
111
+
112
+ Returns
113
+ -------
114
+ dict : the modified set of kwargs
115
+ """
116
+ if conf_dict is None:
117
+ conf_dict = conf
118
+ protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
119
+ kw = {}
120
+ for proto in protos:
121
+ # default kwargs from the current state of the config
122
+ if proto in conf_dict:
123
+ kw.update(conf_dict[proto])
124
+ # explicit kwargs always win
125
+ kw.update(**kwargs)
126
+ kwargs = kw
127
+ return kwargs
128
+
129
+
130
+ set_conf_files(conf_dir, conf)
131
+ set_conf_env(conf)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/conftest.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import sys
5
+ import time
6
+
7
+ import pytest
8
+
9
+ import fsspec
10
+ from fsspec.implementations.cached import CachingFileSystem
11
+
12
+
13
+ @pytest.fixture()
14
+ def m():
15
+ """
16
+ Fixture providing a memory filesystem.
17
+ """
18
+ m = fsspec.filesystem("memory")
19
+ m.store.clear()
20
+ m.pseudo_dirs.clear()
21
+ m.pseudo_dirs.append("")
22
+ try:
23
+ yield m
24
+ finally:
25
+ m.store.clear()
26
+ m.pseudo_dirs.clear()
27
+ m.pseudo_dirs.append("")
28
+
29
+
30
+ @pytest.fixture
31
+ def ftp_writable(tmpdir):
32
+ """
33
+ Fixture providing a writable FTP filesystem.
34
+ """
35
+ pytest.importorskip("pyftpdlib")
36
+ from fsspec.implementations.ftp import FTPFileSystem
37
+
38
+ FTPFileSystem.clear_instance_cache() # remove lingering connections
39
+ CachingFileSystem.clear_instance_cache()
40
+ d = str(tmpdir)
41
+ with open(os.path.join(d, "out"), "wb") as f:
42
+ f.write(b"hello" * 10000)
43
+ P = subprocess.Popen(
44
+ [sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
45
+ )
46
+ try:
47
+ time.sleep(1)
48
+ yield "localhost", 2121, "user", "pass"
49
+ finally:
50
+ P.terminate()
51
+ P.wait()
52
+ try:
53
+ shutil.rmtree(tmpdir)
54
+ except Exception:
55
+ pass
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/core.py ADDED
@@ -0,0 +1,738 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+ import os
6
+ import re
7
+ from glob import has_magic
8
+ from pathlib import Path
9
+
10
+ # for backwards compat, we export cache things from here too
11
+ from fsspec.caching import ( # noqa: F401
12
+ BaseCache,
13
+ BlockCache,
14
+ BytesCache,
15
+ MMapCache,
16
+ ReadAheadCache,
17
+ caches,
18
+ )
19
+ from fsspec.compression import compr
20
+ from fsspec.config import conf
21
+ from fsspec.registry import filesystem, get_filesystem_class
22
+ from fsspec.utils import (
23
+ _unstrip_protocol,
24
+ build_name_function,
25
+ infer_compression,
26
+ stringify_path,
27
+ )
28
+
29
+ logger = logging.getLogger("fsspec")
30
+
31
+
32
+ class OpenFile:
33
+ """
34
+ File-like object to be used in a context
35
+
36
+ Can layer (buffered) text-mode and compression over any file-system, which
37
+ are typically binary-only.
38
+
39
+ These instances are safe to serialize, as the low-level file object
40
+ is not created until invoked using ``with``.
41
+
42
+ Parameters
43
+ ----------
44
+ fs: FileSystem
45
+ The file system to use for opening the file. Should be a subclass or duck-type
46
+ with ``fsspec.spec.AbstractFileSystem``
47
+ path: str
48
+ Location to open
49
+ mode: str like 'rb', optional
50
+ Mode of the opened file
51
+ compression: str or None, optional
52
+ Compression to apply
53
+ encoding: str or None, optional
54
+ The encoding to use if opened in text mode.
55
+ errors: str or None, optional
56
+ How to handle encoding errors if opened in text mode.
57
+ newline: None or str
58
+ Passed to TextIOWrapper in text mode, how to handle line endings.
59
+ autoopen: bool
60
+ If True, calls open() immediately. Mostly used by pickle
61
+ pos: int
62
+ If given and autoopen is True, seek to this location immediately
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ fs,
68
+ path,
69
+ mode="rb",
70
+ compression=None,
71
+ encoding=None,
72
+ errors=None,
73
+ newline=None,
74
+ ):
75
+ self.fs = fs
76
+ self.path = path
77
+ self.mode = mode
78
+ self.compression = get_compression(path, compression)
79
+ self.encoding = encoding
80
+ self.errors = errors
81
+ self.newline = newline
82
+ self.fobjects = []
83
+
84
+ def __reduce__(self):
85
+ return (
86
+ OpenFile,
87
+ (
88
+ self.fs,
89
+ self.path,
90
+ self.mode,
91
+ self.compression,
92
+ self.encoding,
93
+ self.errors,
94
+ self.newline,
95
+ ),
96
+ )
97
+
98
+ def __repr__(self):
99
+ return f"<OpenFile '{self.path}'>"
100
+
101
+ def __enter__(self):
102
+ mode = self.mode.replace("t", "").replace("b", "") + "b"
103
+
104
+ try:
105
+ f = self.fs.open(self.path, mode=mode)
106
+ except FileNotFoundError as e:
107
+ if has_magic(self.path):
108
+ raise FileNotFoundError(
109
+ "%s not found. The URL contains glob characters: you maybe needed\n"
110
+ "to pass expand=True in fsspec.open() or the storage_options of \n"
111
+ "your library. You can also set the config value 'open_expand'\n"
112
+ "before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
113
+ self.path,
114
+ ) from e
115
+ raise
116
+
117
+ self.fobjects = [f]
118
+
119
+ if self.compression is not None:
120
+ compress = compr[self.compression]
121
+ f = compress(f, mode=mode[0])
122
+ self.fobjects.append(f)
123
+
124
+ if "b" not in self.mode:
125
+ # assume, for example, that 'r' is equivalent to 'rt' as in builtin
126
+ f = PickleableTextIOWrapper(
127
+ f, encoding=self.encoding, errors=self.errors, newline=self.newline
128
+ )
129
+ self.fobjects.append(f)
130
+
131
+ return self.fobjects[-1]
132
+
133
+ def __exit__(self, *args):
134
+ self.close()
135
+
136
+ @property
137
+ def full_name(self):
138
+ return _unstrip_protocol(self.path, self.fs)
139
+
140
+ def open(self):
141
+ """Materialise this as a real open file without context
142
+
143
+ The OpenFile object should be explicitly closed to avoid enclosed file
144
+ instances persisting. You must, therefore, keep a reference to the OpenFile
145
+ during the life of the file-like it generates.
146
+ """
147
+ return self.__enter__()
148
+
149
+ def close(self):
150
+ """Close all encapsulated file objects"""
151
+ for f in reversed(self.fobjects):
152
+ if "r" not in self.mode and not f.closed:
153
+ f.flush()
154
+ f.close()
155
+ self.fobjects.clear()
156
+
157
+
158
+ class OpenFiles(list):
159
+ """List of OpenFile instances
160
+
161
+ Can be used in a single context, which opens and closes all of the
162
+ contained files. Normal list access to get the elements works as
163
+ normal.
164
+
165
+ A special case is made for caching filesystems - the files will
166
+ be down/uploaded together at the start or end of the context, and
167
+ this may happen concurrently, if the target filesystem supports it.
168
+ """
169
+
170
+ def __init__(self, *args, mode="rb", fs=None):
171
+ self.mode = mode
172
+ self.fs = fs
173
+ self.files = []
174
+ super().__init__(*args)
175
+
176
+ def __enter__(self):
177
+ if self.fs is None:
178
+ raise ValueError("Context has already been used")
179
+
180
+ fs = self.fs
181
+ while True:
182
+ if hasattr(fs, "open_many"):
183
+ # check for concurrent cache download; or set up for upload
184
+ self.files = fs.open_many(self)
185
+ return self.files
186
+ if hasattr(fs, "fs") and fs.fs is not None:
187
+ fs = fs.fs
188
+ else:
189
+ break
190
+ return [s.__enter__() for s in self]
191
+
192
+ def __exit__(self, *args):
193
+ fs = self.fs
194
+ [s.__exit__(*args) for s in self]
195
+ if "r" not in self.mode:
196
+ while True:
197
+ if hasattr(fs, "open_many"):
198
+ # check for concurrent cache upload
199
+ fs.commit_many(self.files)
200
+ return
201
+ if hasattr(fs, "fs") and fs.fs is not None:
202
+ fs = fs.fs
203
+ else:
204
+ break
205
+
206
+ def __getitem__(self, item):
207
+ out = super().__getitem__(item)
208
+ if isinstance(item, slice):
209
+ return OpenFiles(out, mode=self.mode, fs=self.fs)
210
+ return out
211
+
212
+ def __repr__(self):
213
+ return f"<List of {len(self)} OpenFile instances>"
214
+
215
+
216
+ def open_files(
217
+ urlpath,
218
+ mode="rb",
219
+ compression=None,
220
+ encoding="utf8",
221
+ errors=None,
222
+ name_function=None,
223
+ num=1,
224
+ protocol=None,
225
+ newline=None,
226
+ auto_mkdir=True,
227
+ expand=True,
228
+ **kwargs,
229
+ ):
230
+ """Given a path or paths, return a list of ``OpenFile`` objects.
231
+
232
+ For writing, a str path must contain the "*" character, which will be filled
233
+ in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.
234
+
235
+ For either reading or writing, can instead provide explicit list of paths.
236
+
237
+ Parameters
238
+ ----------
239
+ urlpath: string or list
240
+ Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
241
+ to read from alternative filesystems. To read from multiple files you
242
+ can pass a globstring or a list of paths, with the caveat that they
243
+ must all have the same protocol.
244
+ mode: 'rb', 'wt', etc.
245
+ compression: string or None
246
+ If given, open file using compression codec. Can either be a compression
247
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
248
+ compression from the filename suffix.
249
+ encoding: str
250
+ For text mode only
251
+ errors: None or str
252
+ Passed to TextIOWrapper in text mode
253
+ name_function: function or None
254
+ if opening a set of files for writing, those files do not yet exist,
255
+ so we need to generate their names by formatting the urlpath for
256
+ each sequence number
257
+ num: int [1]
258
+ if writing mode, number of files we expect to create (passed to
259
+ name+function)
260
+ protocol: str or None
261
+ If given, overrides the protocol found in the URL.
262
+ newline: bytes or None
263
+ Used for line terminator in text mode. If None, uses system default;
264
+ if blank, uses no translation.
265
+ auto_mkdir: bool (True)
266
+ If in write mode, this will ensure the target directory exists before
267
+ writing, by calling ``fs.mkdirs(exist_ok=True)``.
268
+ expand: bool
269
+ **kwargs: dict
270
+ Extra options that make sense to a particular storage connection, e.g.
271
+ host, port, username, password, etc.
272
+
273
+ Examples
274
+ --------
275
+ >>> files = open_files('2015-*-*.csv') # doctest: +SKIP
276
+ >>> files = open_files(
277
+ ... 's3://bucket/2015-*-*.csv.gz', compression='gzip'
278
+ ... ) # doctest: +SKIP
279
+
280
+ Returns
281
+ -------
282
+ An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
283
+ be used as a single context
284
+
285
+ Notes
286
+ -----
287
+ For a full list of the available protocols and the implementations that
288
+ they map across to see the latest online documentation:
289
+
290
+ - For implementations built into ``fsspec`` see
291
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
292
+ - For implementations in separate packages see
293
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
294
+ """
295
+ fs, fs_token, paths = get_fs_token_paths(
296
+ urlpath,
297
+ mode,
298
+ num=num,
299
+ name_function=name_function,
300
+ storage_options=kwargs,
301
+ protocol=protocol,
302
+ expand=expand,
303
+ )
304
+ if fs.protocol == "file":
305
+ fs.auto_mkdir = auto_mkdir
306
+ elif "r" not in mode and auto_mkdir:
307
+ parents = {fs._parent(path) for path in paths}
308
+ for parent in parents:
309
+ try:
310
+ fs.makedirs(parent, exist_ok=True)
311
+ except PermissionError:
312
+ pass
313
+ return OpenFiles(
314
+ [
315
+ OpenFile(
316
+ fs,
317
+ path,
318
+ mode=mode,
319
+ compression=compression,
320
+ encoding=encoding,
321
+ errors=errors,
322
+ newline=newline,
323
+ )
324
+ for path in paths
325
+ ],
326
+ mode=mode,
327
+ fs=fs,
328
+ )
329
+
330
+
331
+ def _un_chain(path, kwargs):
332
+ x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
333
+ bits = (
334
+ [p if "://" in p or x.match(p) else p + "://" for p in path.split("::")]
335
+ if "::" in path
336
+ else [path]
337
+ )
338
+ # [[url, protocol, kwargs], ...]
339
+ out = []
340
+ previous_bit = None
341
+ kwargs = kwargs.copy()
342
+ for bit in reversed(bits):
343
+ protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
344
+ cls = get_filesystem_class(protocol)
345
+ extra_kwargs = cls._get_kwargs_from_urls(bit)
346
+ kws = kwargs.pop(protocol, {})
347
+ if bit is bits[0]:
348
+ kws.update(kwargs)
349
+ kw = dict(**extra_kwargs, **kws)
350
+ bit = cls._strip_protocol(bit)
351
+ if (
352
+ protocol in {"blockcache", "filecache", "simplecache"}
353
+ and "target_protocol" not in kw
354
+ ):
355
+ bit = previous_bit
356
+ out.append((bit, protocol, kw))
357
+ previous_bit = bit
358
+ out.reverse()
359
+ return out
360
+
361
+
362
+ def url_to_fs(url, **kwargs):
363
+ """
364
+ Turn fully-qualified and potentially chained URL into filesystem instance
365
+
366
+ Parameters
367
+ ----------
368
+ url : str
369
+ The fsspec-compatible URL
370
+ **kwargs: dict
371
+ Extra options that make sense to a particular storage connection, e.g.
372
+ host, port, username, password, etc.
373
+
374
+ Returns
375
+ -------
376
+ filesystem : FileSystem
377
+ The new filesystem discovered from ``url`` and created with
378
+ ``**kwargs``.
379
+ urlpath : str
380
+ The file-systems-specific URL for ``url``.
381
+ """
382
+ url = stringify_path(url)
383
+ # non-FS arguments that appear in fsspec.open()
384
+ # inspect could keep this in sync with open()'s signature
385
+ known_kwargs = {
386
+ "compression",
387
+ "encoding",
388
+ "errors",
389
+ "expand",
390
+ "mode",
391
+ "name_function",
392
+ "newline",
393
+ "num",
394
+ }
395
+ kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
396
+ chain = _un_chain(url, kwargs)
397
+ inkwargs = {}
398
+ # Reverse iterate the chain, creating a nested target_* structure
399
+ for i, ch in enumerate(reversed(chain)):
400
+ urls, protocol, kw = ch
401
+ if i == len(chain) - 1:
402
+ inkwargs = dict(**kw, **inkwargs)
403
+ continue
404
+ inkwargs["target_options"] = dict(**kw, **inkwargs)
405
+ inkwargs["target_protocol"] = protocol
406
+ inkwargs["fo"] = urls
407
+ urlpath, protocol, _ = chain[0]
408
+ fs = filesystem(protocol, **inkwargs)
409
+ return fs, urlpath
410
+
411
+
412
+ DEFAULT_EXPAND = conf.get("open_expand", False)
413
+
414
+
415
+ def open(
416
+ urlpath,
417
+ mode="rb",
418
+ compression=None,
419
+ encoding="utf8",
420
+ errors=None,
421
+ protocol=None,
422
+ newline=None,
423
+ expand=None,
424
+ **kwargs,
425
+ ):
426
+ """Given a path or paths, return one ``OpenFile`` object.
427
+
428
+ Parameters
429
+ ----------
430
+ urlpath: string or list
431
+ Absolute or relative filepath. Prefix with a protocol like ``s3://``
432
+ to read from alternative filesystems. Should not include glob
433
+ character(s).
434
+ mode: 'rb', 'wt', etc.
435
+ compression: string or None
436
+ If given, open file using compression codec. Can either be a compression
437
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
438
+ compression from the filename suffix.
439
+ encoding: str
440
+ For text mode only
441
+ errors: None or str
442
+ Passed to TextIOWrapper in text mode
443
+ protocol: str or None
444
+ If given, overrides the protocol found in the URL.
445
+ newline: bytes or None
446
+ Used for line terminator in text mode. If None, uses system default;
447
+ if blank, uses no translation.
448
+ expand: bool or Nonw
449
+ Whether to regard file paths containing special glob characters as needing
450
+ expansion (finding the first match) or absolute. Setting False allows using
451
+ paths which do embed such characters. If None (default), this argument
452
+ takes its value from the DEFAULT_EXPAND module variable, which takes
453
+ its initial value from the "open_expand" config value at startup, which will
454
+ be False if not set.
455
+ **kwargs: dict
456
+ Extra options that make sense to a particular storage connection, e.g.
457
+ host, port, username, password, etc.
458
+
459
+ Examples
460
+ --------
461
+ >>> openfile = open('2015-01-01.csv') # doctest: +SKIP
462
+ >>> openfile = open(
463
+ ... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
464
+ ... ) # doctest: +SKIP
465
+ >>> with openfile as f:
466
+ ... df = pd.read_csv(f) # doctest: +SKIP
467
+ ...
468
+
469
+ Returns
470
+ -------
471
+ ``OpenFile`` object.
472
+
473
+ Notes
474
+ -----
475
+ For a full list of the available protocols and the implementations that
476
+ they map across to see the latest online documentation:
477
+
478
+ - For implementations built into ``fsspec`` see
479
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
480
+ - For implementations in separate packages see
481
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
482
+ """
483
+ expand = DEFAULT_EXPAND if expand is None else expand
484
+ out = open_files(
485
+ urlpath=[urlpath],
486
+ mode=mode,
487
+ compression=compression,
488
+ encoding=encoding,
489
+ errors=errors,
490
+ protocol=protocol,
491
+ newline=newline,
492
+ expand=expand,
493
+ **kwargs,
494
+ )
495
+ if not out:
496
+ raise FileNotFoundError(urlpath)
497
+ return out[0]
498
+
499
+
500
+ def open_local(
501
+ url: str | list[str] | Path | list[Path],
502
+ mode: str = "rb",
503
+ **storage_options: dict,
504
+ ) -> str | list[str]:
505
+ """Open file(s) which can be resolved to local
506
+
507
+ For files which either are local, or get downloaded upon open
508
+ (e.g., by file caching)
509
+
510
+ Parameters
511
+ ----------
512
+ url: str or list(str)
513
+ mode: str
514
+ Must be read mode
515
+ storage_options:
516
+ passed on to FS for or used by open_files (e.g., compression)
517
+ """
518
+ if "r" not in mode:
519
+ raise ValueError("Can only ensure local files when reading")
520
+ of = open_files(url, mode=mode, **storage_options)
521
+ if not getattr(of[0].fs, "local_file", False):
522
+ raise ValueError(
523
+ "open_local can only be used on a filesystem which"
524
+ " has attribute local_file=True"
525
+ )
526
+ with of as files:
527
+ paths = [f.name for f in files]
528
+ if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
529
+ return paths[0]
530
+ return paths
531
+
532
+
533
+ def get_compression(urlpath, compression):
534
+ if compression == "infer":
535
+ compression = infer_compression(urlpath)
536
+ if compression is not None and compression not in compr:
537
+ raise ValueError(f"Compression type {compression} not supported")
538
+ return compression
539
+
540
+
541
+ def split_protocol(urlpath):
542
+ """Return protocol, path pair"""
543
+ urlpath = stringify_path(urlpath)
544
+ if "://" in urlpath:
545
+ protocol, path = urlpath.split("://", 1)
546
+ if len(protocol) > 1:
547
+ # excludes Windows paths
548
+ return protocol, path
549
+ if urlpath.startswith("data:"):
550
+ return urlpath.split(":", 1)
551
+ return None, urlpath
552
+
553
+
554
+ def strip_protocol(urlpath):
555
+ """Return only path part of full URL, according to appropriate backend"""
556
+ protocol, _ = split_protocol(urlpath)
557
+ cls = get_filesystem_class(protocol)
558
+ return cls._strip_protocol(urlpath)
559
+
560
+
561
+ def expand_paths_if_needed(paths, mode, num, fs, name_function):
562
+ """Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
563
+ in them (read mode).
564
+
565
+ :param paths: list of paths
566
+ mode: str
567
+ Mode in which to open files.
568
+ num: int
569
+ If opening in writing mode, number of files we expect to create.
570
+ fs: filesystem object
571
+ name_function: callable
572
+ If opening in writing mode, this callable is used to generate path
573
+ names. Names are generated for each partition by
574
+ ``urlpath.replace('*', name_function(partition_index))``.
575
+ :return: list of paths
576
+ """
577
+ expanded_paths = []
578
+ paths = list(paths)
579
+
580
+ if "w" in mode: # read mode
581
+ if sum([1 for p in paths if "*" in p]) > 1:
582
+ raise ValueError(
583
+ "When writing data, only one filename mask can be specified."
584
+ )
585
+ num = max(num, len(paths))
586
+
587
+ for curr_path in paths:
588
+ if "*" in curr_path:
589
+ # expand using name_function
590
+ expanded_paths.extend(_expand_paths(curr_path, name_function, num))
591
+ else:
592
+ expanded_paths.append(curr_path)
593
+ # if we generated more paths that asked for, trim the list
594
+ if len(expanded_paths) > num:
595
+ expanded_paths = expanded_paths[:num]
596
+
597
+ else: # read mode
598
+ for curr_path in paths:
599
+ if has_magic(curr_path):
600
+ # expand using glob
601
+ expanded_paths.extend(fs.glob(curr_path))
602
+ else:
603
+ expanded_paths.append(curr_path)
604
+
605
+ return expanded_paths
606
+
607
+
608
+ def get_fs_token_paths(
609
+ urlpath,
610
+ mode="rb",
611
+ num=1,
612
+ name_function=None,
613
+ storage_options=None,
614
+ protocol=None,
615
+ expand=True,
616
+ ):
617
+ """Filesystem, deterministic token, and paths from a urlpath and options.
618
+
619
+ Parameters
620
+ ----------
621
+ urlpath: string or iterable
622
+ Absolute or relative filepath, URL (may include protocols like
623
+ ``s3://``), or globstring pointing to data.
624
+ mode: str, optional
625
+ Mode in which to open files.
626
+ num: int, optional
627
+ If opening in writing mode, number of files we expect to create.
628
+ name_function: callable, optional
629
+ If opening in writing mode, this callable is used to generate path
630
+ names. Names are generated for each partition by
631
+ ``urlpath.replace('*', name_function(partition_index))``.
632
+ storage_options: dict, optional
633
+ Additional keywords to pass to the filesystem class.
634
+ protocol: str or None
635
+ To override the protocol specifier in the URL
636
+ expand: bool
637
+ Expand string paths for writing, assuming the path is a directory
638
+ """
639
+ if isinstance(urlpath, (list, tuple, set)):
640
+ if not urlpath:
641
+ raise ValueError("empty urlpath sequence")
642
+ urlpath0 = stringify_path(list(urlpath)[0])
643
+ else:
644
+ urlpath0 = stringify_path(urlpath)
645
+ storage_options = storage_options or {}
646
+ if protocol:
647
+ storage_options["protocol"] = protocol
648
+ chain = _un_chain(urlpath0, storage_options or {})
649
+ inkwargs = {}
650
+ # Reverse iterate the chain, creating a nested target_* structure
651
+ for i, ch in enumerate(reversed(chain)):
652
+ urls, nested_protocol, kw = ch
653
+ if i == len(chain) - 1:
654
+ inkwargs = dict(**kw, **inkwargs)
655
+ continue
656
+ inkwargs["target_options"] = dict(**kw, **inkwargs)
657
+ inkwargs["target_protocol"] = nested_protocol
658
+ inkwargs["fo"] = urls
659
+ paths, protocol, _ = chain[0]
660
+ fs = filesystem(protocol, **inkwargs)
661
+ if isinstance(urlpath, (list, tuple, set)):
662
+ pchains = [
663
+ _un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
664
+ ]
665
+ if len({pc[1] for pc in pchains}) > 1:
666
+ raise ValueError("Protocol mismatch getting fs from %s", urlpath)
667
+ paths = [pc[0] for pc in pchains]
668
+ else:
669
+ paths = fs._strip_protocol(paths)
670
+ if isinstance(paths, (list, tuple, set)):
671
+ if expand:
672
+ paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
673
+ elif not isinstance(paths, list):
674
+ paths = list(paths)
675
+ else:
676
+ if "w" in mode and expand:
677
+ paths = _expand_paths(paths, name_function, num)
678
+ elif "x" in mode and expand:
679
+ paths = _expand_paths(paths, name_function, num)
680
+ elif "*" in paths:
681
+ paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
682
+ else:
683
+ paths = [paths]
684
+
685
+ return fs, fs._fs_token, paths
686
+
687
+
688
+ def _expand_paths(path, name_function, num):
689
+ if isinstance(path, str):
690
+ if path.count("*") > 1:
691
+ raise ValueError("Output path spec must contain exactly one '*'.")
692
+ elif "*" not in path:
693
+ path = os.path.join(path, "*.part")
694
+
695
+ if name_function is None:
696
+ name_function = build_name_function(num - 1)
697
+
698
+ paths = [path.replace("*", name_function(i)) for i in range(num)]
699
+ if paths != sorted(paths):
700
+ logger.warning(
701
+ "In order to preserve order between partitions"
702
+ " paths created with ``name_function`` should "
703
+ "sort to partition order"
704
+ )
705
+ elif isinstance(path, (tuple, list)):
706
+ assert len(path) == num
707
+ paths = list(path)
708
+ else:
709
+ raise ValueError(
710
+ "Path should be either\n"
711
+ "1. A list of paths: ['foo.json', 'bar.json', ...]\n"
712
+ "2. A directory: 'foo/\n"
713
+ "3. A path with a '*' in it: 'foo.*.json'"
714
+ )
715
+ return paths
716
+
717
+
718
+ class PickleableTextIOWrapper(io.TextIOWrapper):
719
+ """TextIOWrapper cannot be pickled. This solves it.
720
+
721
+ Requires that ``buffer`` be pickleable, which all instances of
722
+ AbstractBufferedFile are.
723
+ """
724
+
725
+ def __init__(
726
+ self,
727
+ buffer,
728
+ encoding=None,
729
+ errors=None,
730
+ newline=None,
731
+ line_buffering=False,
732
+ write_through=False,
733
+ ):
734
+ self.args = buffer, encoding, errors, newline, line_buffering, write_through
735
+ super().__init__(*self.args)
736
+
737
+ def __reduce__(self):
738
+ return PickleableTextIOWrapper, self.args
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/exceptions.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ fsspec user-defined exception classes
3
+ """
4
+
5
+ import asyncio
6
+
7
+
8
+ class BlocksizeMismatchError(ValueError):
9
+ """
10
+ Raised when a cached file is opened with a different blocksize than it was
11
+ written with
12
+ """
13
+
14
+
15
+ class FSTimeoutError(asyncio.TimeoutError):
16
+ """
17
+ Raised when a fsspec function timed out occurs
18
+ """
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/fuse.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import logging
3
+ import os
4
+ import stat
5
+ import threading
6
+ import time
7
+ from errno import EIO, ENOENT
8
+
9
+ from fuse import FUSE, FuseOSError, LoggingMixIn, Operations
10
+
11
+ from fsspec import __version__
12
+ from fsspec.core import url_to_fs
13
+
14
+ logger = logging.getLogger("fsspec.fuse")
15
+
16
+
17
+ class FUSEr(Operations):
18
+ def __init__(self, fs, path, ready_file=False):
19
+ self.fs = fs
20
+ self.cache = {}
21
+ self.root = path.rstrip("/") + "/"
22
+ self.counter = 0
23
+ logger.info("Starting FUSE at %s", path)
24
+ self._ready_file = ready_file
25
+
26
+ def getattr(self, path, fh=None):
27
+ logger.debug("getattr %s", path)
28
+ if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
29
+ return {"type": "file", "st_size": 5}
30
+
31
+ path = "".join([self.root, path.lstrip("/")]).rstrip("/")
32
+ try:
33
+ info = self.fs.info(path)
34
+ except FileNotFoundError:
35
+ raise FuseOSError(ENOENT)
36
+
37
+ data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
38
+ perm = info.get("mode", 0o777)
39
+
40
+ if info["type"] != "file":
41
+ data["st_mode"] = stat.S_IFDIR | perm
42
+ data["st_size"] = 0
43
+ data["st_blksize"] = 0
44
+ else:
45
+ data["st_mode"] = stat.S_IFREG | perm
46
+ data["st_size"] = info["size"]
47
+ data["st_blksize"] = 5 * 2**20
48
+ data["st_nlink"] = 1
49
+ data["st_atime"] = info["atime"] if "atime" in info else time.time()
50
+ data["st_ctime"] = info["ctime"] if "ctime" in info else time.time()
51
+ data["st_mtime"] = info["mtime"] if "mtime" in info else time.time()
52
+ return data
53
+
54
+ def readdir(self, path, fh):
55
+ logger.debug("readdir %s", path)
56
+ path = "".join([self.root, path.lstrip("/")])
57
+ files = self.fs.ls(path, False)
58
+ files = [os.path.basename(f.rstrip("/")) for f in files]
59
+ return [".", ".."] + files
60
+
61
+ def mkdir(self, path, mode):
62
+ path = "".join([self.root, path.lstrip("/")])
63
+ self.fs.mkdir(path)
64
+ return 0
65
+
66
+ def rmdir(self, path):
67
+ path = "".join([self.root, path.lstrip("/")])
68
+ self.fs.rmdir(path)
69
+ return 0
70
+
71
+ def read(self, path, size, offset, fh):
72
+ logger.debug("read %s", (path, size, offset))
73
+ if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
74
+ # status indicator
75
+ return b"ready"
76
+
77
+ f = self.cache[fh]
78
+ f.seek(offset)
79
+ out = f.read(size)
80
+ return out
81
+
82
+ def write(self, path, data, offset, fh):
83
+ logger.debug("write %s", (path, offset))
84
+ f = self.cache[fh]
85
+ f.seek(offset)
86
+ f.write(data)
87
+ return len(data)
88
+
89
+ def create(self, path, flags, fi=None):
90
+ logger.debug("create %s", (path, flags))
91
+ fn = "".join([self.root, path.lstrip("/")])
92
+ self.fs.touch(fn) # OS will want to get attributes immediately
93
+ f = self.fs.open(fn, "wb")
94
+ self.cache[self.counter] = f
95
+ self.counter += 1
96
+ return self.counter - 1
97
+
98
+ def open(self, path, flags):
99
+ logger.debug("open %s", (path, flags))
100
+ fn = "".join([self.root, path.lstrip("/")])
101
+ if flags % 2 == 0:
102
+ # read
103
+ mode = "rb"
104
+ else:
105
+ # write/create
106
+ mode = "wb"
107
+ self.cache[self.counter] = self.fs.open(fn, mode)
108
+ self.counter += 1
109
+ return self.counter - 1
110
+
111
+ def truncate(self, path, length, fh=None):
112
+ fn = "".join([self.root, path.lstrip("/")])
113
+ if length != 0:
114
+ raise NotImplementedError
115
+ # maybe should be no-op since open with write sets size to zero anyway
116
+ self.fs.touch(fn)
117
+
118
+ def unlink(self, path):
119
+ fn = "".join([self.root, path.lstrip("/")])
120
+ try:
121
+ self.fs.rm(fn, False)
122
+ except (OSError, FileNotFoundError):
123
+ raise FuseOSError(EIO)
124
+
125
+ def release(self, path, fh):
126
+ try:
127
+ if fh in self.cache:
128
+ f = self.cache[fh]
129
+ f.close()
130
+ self.cache.pop(fh)
131
+ except Exception as e:
132
+ print(e)
133
+ return 0
134
+
135
+ def chmod(self, path, mode):
136
+ if hasattr(self.fs, "chmod"):
137
+ path = "".join([self.root, path.lstrip("/")])
138
+ return self.fs.chmod(path, mode)
139
+ raise NotImplementedError
140
+
141
+
142
+ def run(
143
+ fs,
144
+ path,
145
+ mount_point,
146
+ foreground=True,
147
+ threads=False,
148
+ ready_file=False,
149
+ ops_class=FUSEr,
150
+ ):
151
+ """Mount stuff in a local directory
152
+
153
+ This uses fusepy to make it appear as if a given path on an fsspec
154
+ instance is in fact resident within the local file-system.
155
+
156
+ This requires that fusepy by installed, and that FUSE be available on
157
+ the system (typically requiring a package to be installed with
158
+ apt, yum, brew, etc.).
159
+
160
+ Parameters
161
+ ----------
162
+ fs: file-system instance
163
+ From one of the compatible implementations
164
+ path: str
165
+ Location on that file-system to regard as the root directory to
166
+ mount. Note that you typically should include the terminating "/"
167
+ character.
168
+ mount_point: str
169
+ An empty directory on the local file-system where the contents of
170
+ the remote path will appear.
171
+ foreground: bool
172
+ Whether or not calling this function will block. Operation will
173
+ typically be more stable if True.
174
+ threads: bool
175
+ Whether or not to create threads when responding to file operations
176
+ within the mounter directory. Operation will typically be more
177
+ stable if False.
178
+ ready_file: bool
179
+ Whether the FUSE process is ready. The ``.fuse_ready`` file will
180
+ exist in the ``mount_point`` directory if True. Debugging purpose.
181
+ ops_class: FUSEr or Subclass of FUSEr
182
+ To override the default behavior of FUSEr. For Example, logging
183
+ to file.
184
+
185
+ """
186
+ func = lambda: FUSE(
187
+ ops_class(fs, path, ready_file=ready_file),
188
+ mount_point,
189
+ nothreads=not threads,
190
+ foreground=foreground,
191
+ )
192
+ if not foreground:
193
+ th = threading.Thread(target=func)
194
+ th.daemon = True
195
+ th.start()
196
+ return th
197
+ else: # pragma: no cover
198
+ try:
199
+ func()
200
+ except KeyboardInterrupt:
201
+ pass
202
+
203
+
204
+ def main(args):
205
+ """Mount filesystem from chained URL to MOUNT_POINT.
206
+
207
+ Examples:
208
+
209
+ python3 -m fsspec.fuse memory /usr/share /tmp/mem
210
+
211
+ python3 -m fsspec.fuse local /tmp/source /tmp/local \\
212
+ -l /tmp/fsspecfuse.log
213
+
214
+ You can also mount chained-URLs and use special settings:
215
+
216
+ python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\
217
+ / /tmp/zip \\
218
+ -o 'filecache-cache_storage=/tmp/simplecache'
219
+
220
+ You can specify the type of the setting by using `[int]` or `[bool]`,
221
+ (`true`, `yes`, `1` represents the Boolean value `True`):
222
+
223
+ python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\
224
+ /historic/packages/RPMS /tmp/ftp \\
225
+ -o 'simplecache-cache_storage=/tmp/simplecache' \\
226
+ -o 'simplecache-check_files=false[bool]' \\
227
+ -o 'ftp-listings_expiry_time=60[int]' \\
228
+ -o 'ftp-username=anonymous' \\
229
+ -o 'ftp-password=xieyanbo'
230
+ """
231
+
232
+ class RawDescriptionArgumentParser(argparse.ArgumentParser):
233
+ def format_help(self):
234
+ usage = super().format_help()
235
+ parts = usage.split("\n\n")
236
+ parts[1] = self.description.rstrip()
237
+ return "\n\n".join(parts)
238
+
239
+ parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__)
240
+ parser.add_argument("--version", action="version", version=__version__)
241
+ parser.add_argument("url", type=str, help="fs url")
242
+ parser.add_argument("source_path", type=str, help="source directory in fs")
243
+ parser.add_argument("mount_point", type=str, help="local directory")
244
+ parser.add_argument(
245
+ "-o",
246
+ "--option",
247
+ action="append",
248
+ help="Any options of protocol included in the chained URL",
249
+ )
250
+ parser.add_argument(
251
+ "-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')"
252
+ )
253
+ parser.add_argument(
254
+ "-f",
255
+ "--foreground",
256
+ action="store_false",
257
+ help="Running in foreground or not (Default: False)",
258
+ )
259
+ parser.add_argument(
260
+ "-t",
261
+ "--threads",
262
+ action="store_false",
263
+ help="Running with threads support (Default: False)",
264
+ )
265
+ parser.add_argument(
266
+ "-r",
267
+ "--ready-file",
268
+ action="store_false",
269
+ help="The `.fuse_ready` file will exist after FUSE is ready. "
270
+ "(Debugging purpose, Default: False)",
271
+ )
272
+ args = parser.parse_args(args)
273
+
274
+ kwargs = {}
275
+ for item in args.option or []:
276
+ key, sep, value = item.partition("=")
277
+ if not sep:
278
+ parser.error(message=f"Wrong option: {item!r}")
279
+ val = value.lower()
280
+ if val.endswith("[int]"):
281
+ value = int(value[: -len("[int]")])
282
+ elif val.endswith("[bool]"):
283
+ value = val[: -len("[bool]")] in ["1", "yes", "true"]
284
+
285
+ if "-" in key:
286
+ fs_name, setting_name = key.split("-", 1)
287
+ if fs_name in kwargs:
288
+ kwargs[fs_name][setting_name] = value
289
+ else:
290
+ kwargs[fs_name] = {setting_name: value}
291
+ else:
292
+ kwargs[key] = value
293
+
294
+ if args.log_file:
295
+ logging.basicConfig(
296
+ level=logging.DEBUG,
297
+ filename=args.log_file,
298
+ format="%(asctime)s %(message)s",
299
+ )
300
+
301
+ class LoggingFUSEr(FUSEr, LoggingMixIn):
302
+ pass
303
+
304
+ fuser = LoggingFUSEr
305
+ else:
306
+ fuser = FUSEr
307
+
308
+ fs, url_path = url_to_fs(args.url, **kwargs)
309
+ logger.debug("Mounting %s to %s", url_path, str(args.mount_point))
310
+ run(
311
+ fs,
312
+ args.source_path,
313
+ args.mount_point,
314
+ foreground=args.foreground,
315
+ threads=args.threads,
316
+ ready_file=args.ready_file,
317
+ ops_class=fuser,
318
+ )
319
+
320
+
321
+ if __name__ == "__main__":
322
+ import sys
323
+
324
+ main(sys.argv[1:])
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/generic.py ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import logging
5
+ import os
6
+ import shutil
7
+ import uuid
8
+ from typing import Optional
9
+
10
+ from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
11
+ from .callbacks import DEFAULT_CALLBACK
12
+ from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
13
+
14
+ _generic_fs = {}
15
+ logger = logging.getLogger("fsspec.generic")
16
+
17
+
18
+ def set_generic_fs(protocol, **storage_options):
19
+ _generic_fs[protocol] = filesystem(protocol, **storage_options)
20
+
21
+
22
+ default_method = "default"
23
+
24
+
25
+ def _resolve_fs(url, method=None, protocol=None, storage_options=None):
26
+ """Pick instance of backend FS"""
27
+ method = method or default_method
28
+ protocol = protocol or split_protocol(url)[0]
29
+ storage_options = storage_options or {}
30
+ if method == "default":
31
+ return filesystem(protocol)
32
+ if method == "generic":
33
+ return _generic_fs[protocol]
34
+ if method == "current":
35
+ cls = get_filesystem_class(protocol)
36
+ return cls.current()
37
+ if method == "options":
38
+ fs, _ = url_to_fs(url, **storage_options.get(protocol, {}))
39
+ return fs
40
+ raise ValueError(f"Unknown FS resolution method: {method}")
41
+
42
+
43
+ def rsync(
44
+ source,
45
+ destination,
46
+ delete_missing=False,
47
+ source_field="size",
48
+ dest_field="size",
49
+ update_cond="different",
50
+ inst_kwargs=None,
51
+ fs=None,
52
+ **kwargs,
53
+ ):
54
+ """Sync files between two directory trees
55
+
56
+ (experimental)
57
+
58
+ Parameters
59
+ ----------
60
+ source: str
61
+ Root of the directory tree to take files from. This must be a directory, but
62
+ do not include any terminating "/" character
63
+ destination: str
64
+ Root path to copy into. The contents of this location should be
65
+ identical to the contents of ``source`` when done. This will be made a
66
+ directory, and the terminal "/" should not be included.
67
+ delete_missing: bool
68
+ If there are paths in the destination that don't exist in the
69
+ source and this is True, delete them. Otherwise, leave them alone.
70
+ source_field: str | callable
71
+ If ``update_field`` is "different", this is the key in the info
72
+ of source files to consider for difference. Maybe a function of the
73
+ info dict.
74
+ dest_field: str | callable
75
+ If ``update_field`` is "different", this is the key in the info
76
+ of destination files to consider for difference. May be a function of
77
+ the info dict.
78
+ update_cond: "different"|"always"|"never"
79
+ If "always", every file is copied, regardless of whether it exists in
80
+ the destination. If "never", files that exist in the destination are
81
+ not copied again. If "different" (default), only copy if the info
82
+ fields given by ``source_field`` and ``dest_field`` (usually "size")
83
+ are different. Other comparisons may be added in the future.
84
+ inst_kwargs: dict|None
85
+ If ``fs`` is None, use this set of keyword arguments to make a
86
+ GenericFileSystem instance
87
+ fs: GenericFileSystem|None
88
+ Instance to use if explicitly given. The instance defines how to
89
+ to make downstream file system instances from paths.
90
+
91
+ Returns
92
+ -------
93
+ dict of the copy operations that were performed, {source: destination}
94
+ """
95
+ fs = fs or GenericFileSystem(**(inst_kwargs or {}))
96
+ source = fs._strip_protocol(source)
97
+ destination = fs._strip_protocol(destination)
98
+ allfiles = fs.find(source, withdirs=True, detail=True)
99
+ if not fs.isdir(source):
100
+ raise ValueError("Can only rsync on a directory")
101
+ otherfiles = fs.find(destination, withdirs=True, detail=True)
102
+ dirs = [
103
+ a
104
+ for a, v in allfiles.items()
105
+ if v["type"] == "directory" and a.replace(source, destination) not in otherfiles
106
+ ]
107
+ logger.debug(f"{len(dirs)} directories to create")
108
+ if dirs:
109
+ fs.make_many_dirs(
110
+ [dirn.replace(source, destination) for dirn in dirs], exist_ok=True
111
+ )
112
+ allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"}
113
+ logger.debug(f"{len(allfiles)} files to consider for copy")
114
+ to_delete = [
115
+ o
116
+ for o, v in otherfiles.items()
117
+ if o.replace(destination, source) not in allfiles and v["type"] == "file"
118
+ ]
119
+ for k, v in allfiles.copy().items():
120
+ otherfile = k.replace(source, destination)
121
+ if otherfile in otherfiles:
122
+ if update_cond == "always":
123
+ allfiles[k] = otherfile
124
+ elif update_cond == "different":
125
+ inf1 = source_field(v) if callable(source_field) else v[source_field]
126
+ v2 = otherfiles[otherfile]
127
+ inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field]
128
+ if inf1 != inf2:
129
+ # details mismatch, make copy
130
+ allfiles[k] = otherfile
131
+ else:
132
+ # details match, don't copy
133
+ allfiles.pop(k)
134
+ else:
135
+ # file not in target yet
136
+ allfiles[k] = otherfile
137
+ logger.debug(f"{len(allfiles)} files to copy")
138
+ if allfiles:
139
+ source_files, target_files = zip(*allfiles.items())
140
+ fs.cp(source_files, target_files, **kwargs)
141
+ logger.debug(f"{len(to_delete)} files to delete")
142
+ if delete_missing and to_delete:
143
+ fs.rm(to_delete)
144
+ return allfiles
145
+
146
+
147
+ class GenericFileSystem(AsyncFileSystem):
148
+ """Wrapper over all other FS types
149
+
150
+ <experimental!>
151
+
152
+ This implementation is a single unified interface to be able to run FS operations
153
+ over generic URLs, and dispatch to the specific implementations using the URL
154
+ protocol prefix.
155
+
156
+ Note: instances of this FS are always async, even if you never use it with any async
157
+ backend.
158
+ """
159
+
160
+ protocol = "generic" # there is no real reason to ever use a protocol with this FS
161
+
162
+ def __init__(self, default_method="default", **kwargs):
163
+ """
164
+
165
+ Parameters
166
+ ----------
167
+ default_method: str (optional)
168
+ Defines how to configure backend FS instances. Options are:
169
+ - "default": instantiate like FSClass(), with no
170
+ extra arguments; this is the default instance of that FS, and can be
171
+ configured via the config system
172
+ - "generic": takes instances from the `_generic_fs` dict in this module,
173
+ which you must populate before use. Keys are by protocol
174
+ - "current": takes the most recently instantiated version of each FS
175
+ """
176
+ self.method = default_method
177
+ super().__init__(**kwargs)
178
+
179
+ def _parent(self, path):
180
+ fs = _resolve_fs(path, self.method)
181
+ return fs.unstrip_protocol(fs._parent(path))
182
+
183
+ def _strip_protocol(self, path):
184
+ # normalization only
185
+ fs = _resolve_fs(path, self.method)
186
+ return fs.unstrip_protocol(fs._strip_protocol(path))
187
+
188
+ async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
189
+ fs = _resolve_fs(path, self.method)
190
+ if fs.async_impl:
191
+ out = await fs._find(
192
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
193
+ )
194
+ else:
195
+ out = fs.find(
196
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
197
+ )
198
+ result = {}
199
+ for k, v in out.items():
200
+ name = fs.unstrip_protocol(k)
201
+ v["name"] = name
202
+ result[name] = v
203
+ if detail:
204
+ return result
205
+ return list(result)
206
+
207
+ async def _info(self, url, **kwargs):
208
+ fs = _resolve_fs(url, self.method)
209
+ if fs.async_impl:
210
+ out = await fs._info(url, **kwargs)
211
+ else:
212
+ out = fs.info(url, **kwargs)
213
+ out["name"] = fs.unstrip_protocol(out["name"])
214
+ return out
215
+
216
+ async def _ls(
217
+ self,
218
+ url,
219
+ detail=True,
220
+ **kwargs,
221
+ ):
222
+ fs = _resolve_fs(url, self.method)
223
+ if fs.async_impl:
224
+ out = await fs._ls(url, detail=True, **kwargs)
225
+ else:
226
+ out = fs.ls(url, detail=True, **kwargs)
227
+ for o in out:
228
+ o["name"] = fs.unstrip_protocol(o["name"])
229
+ if detail:
230
+ return out
231
+ else:
232
+ return [o["name"] for o in out]
233
+
234
+ async def _cat_file(
235
+ self,
236
+ url,
237
+ **kwargs,
238
+ ):
239
+ fs = _resolve_fs(url, self.method)
240
+ if fs.async_impl:
241
+ return await fs._cat_file(url, **kwargs)
242
+ else:
243
+ return fs.cat_file(url, **kwargs)
244
+
245
+ async def _pipe_file(
246
+ self,
247
+ path,
248
+ value,
249
+ **kwargs,
250
+ ):
251
+ fs = _resolve_fs(path, self.method)
252
+ if fs.async_impl:
253
+ return await fs._pipe_file(path, value, **kwargs)
254
+ else:
255
+ return fs.pipe_file(path, value, **kwargs)
256
+
257
+ async def _rm(self, url, **kwargs):
258
+ urls = url
259
+ if isinstance(urls, str):
260
+ urls = [urls]
261
+ fs = _resolve_fs(urls[0], self.method)
262
+ if fs.async_impl:
263
+ await fs._rm(urls, **kwargs)
264
+ else:
265
+ fs.rm(url, **kwargs)
266
+
267
+ async def _makedirs(self, path, exist_ok=False):
268
+ logger.debug("Make dir %s", path)
269
+ fs = _resolve_fs(path, self.method)
270
+ if fs.async_impl:
271
+ await fs._makedirs(path, exist_ok=exist_ok)
272
+ else:
273
+ fs.makedirs(path, exist_ok=exist_ok)
274
+
275
+ def rsync(self, source, destination, **kwargs):
276
+ """Sync files between two directory trees
277
+
278
+ See `func:rsync` for more details.
279
+ """
280
+ rsync(source, destination, fs=self, **kwargs)
281
+
282
+ async def _cp_file(
283
+ self,
284
+ url,
285
+ url2,
286
+ blocksize=2**20,
287
+ callback=DEFAULT_CALLBACK,
288
+ **kwargs,
289
+ ):
290
+ fs = _resolve_fs(url, self.method)
291
+ fs2 = _resolve_fs(url2, self.method)
292
+ if fs is fs2:
293
+ # pure remote
294
+ if fs.async_impl:
295
+ return await fs._cp_file(url, url2, **kwargs)
296
+ else:
297
+ return fs.cp_file(url, url2, **kwargs)
298
+ kw = {"blocksize": 0, "cache_type": "none"}
299
+ try:
300
+ f1 = (
301
+ await fs.open_async(url, "rb")
302
+ if hasattr(fs, "open_async")
303
+ else fs.open(url, "rb", **kw)
304
+ )
305
+ callback.set_size(await maybe_await(f1.size))
306
+ f2 = (
307
+ await fs2.open_async(url2, "wb")
308
+ if hasattr(fs2, "open_async")
309
+ else fs2.open(url2, "wb", **kw)
310
+ )
311
+ while f1.size is None or f2.tell() < f1.size:
312
+ data = await maybe_await(f1.read(blocksize))
313
+ if f1.size is None and not data:
314
+ break
315
+ await maybe_await(f2.write(data))
316
+ callback.absolute_update(f2.tell())
317
+ finally:
318
+ try:
319
+ await maybe_await(f2.close())
320
+ await maybe_await(f1.close())
321
+ except NameError:
322
+ # fail while opening f1 or f2
323
+ pass
324
+
325
+ async def _make_many_dirs(self, urls, exist_ok=True):
326
+ fs = _resolve_fs(urls[0], self.method)
327
+ if fs.async_impl:
328
+ coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls]
329
+ await _run_coros_in_chunks(coros)
330
+ else:
331
+ for u in urls:
332
+ fs.makedirs(u, exist_ok=exist_ok)
333
+
334
+ make_many_dirs = sync_wrapper(_make_many_dirs)
335
+
336
+ async def _copy(
337
+ self,
338
+ path1: list[str],
339
+ path2: list[str],
340
+ recursive: bool = False,
341
+ on_error: str = "ignore",
342
+ maxdepth: Optional[int] = None,
343
+ batch_size: Optional[int] = None,
344
+ tempdir: Optional[str] = None,
345
+ **kwargs,
346
+ ):
347
+ if recursive:
348
+ raise NotImplementedError
349
+ fs = _resolve_fs(path1[0], self.method)
350
+ fs2 = _resolve_fs(path2[0], self.method)
351
+ # not expanding paths atm., assume call is from rsync()
352
+ if fs is fs2:
353
+ # pure remote
354
+ if fs.async_impl:
355
+ return await fs._copy(path1, path2, **kwargs)
356
+ else:
357
+ return fs.copy(path1, path2, **kwargs)
358
+ await copy_file_op(
359
+ fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
360
+ )
361
+
362
+
363
+ async def copy_file_op(
364
+ fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore"
365
+ ):
366
+ import tempfile
367
+
368
+ tempdir = tempdir or tempfile.mkdtemp()
369
+ try:
370
+ coros = [
371
+ _copy_file_op(
372
+ fs1,
373
+ u1,
374
+ fs2,
375
+ u2,
376
+ os.path.join(tempdir, uuid.uuid4().hex),
377
+ on_error=on_error,
378
+ )
379
+ for u1, u2 in zip(url1, url2)
380
+ ]
381
+ await _run_coros_in_chunks(coros, batch_size=batch_size)
382
+ finally:
383
+ shutil.rmtree(tempdir)
384
+
385
+
386
+ async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
387
+ ex = () if on_error == "raise" else Exception
388
+ logger.debug("Copy %s -> %s", url1, url2)
389
+ try:
390
+ if fs1.async_impl:
391
+ await fs1._get_file(url1, local)
392
+ else:
393
+ fs1.get_file(url1, local)
394
+ if fs2.async_impl:
395
+ await fs2._put_file(local, url2)
396
+ else:
397
+ fs2.put_file(local, url2)
398
+ os.unlink(local)
399
+ logger.debug("Copy %s -> %s; done", url1, url2)
400
+ except ex as e:
401
+ logger.debug("ignoring cp exception for %s: %s", url1, e)
402
+
403
+
404
+ async def maybe_await(cor):
405
+ if inspect.iscoroutine(cor):
406
+ return await cor
407
+ else:
408
+ return cor
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/mapping.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import array
2
+ import logging
3
+ import posixpath
4
+ import warnings
5
+ from collections.abc import MutableMapping
6
+ from functools import cached_property
7
+
8
+ from fsspec.core import url_to_fs
9
+
10
+ logger = logging.getLogger("fsspec.mapping")
11
+
12
+
13
+ class FSMap(MutableMapping):
14
+ """Wrap a FileSystem instance as a mutable wrapping.
15
+
16
+ The keys of the mapping become files under the given root, and the
17
+ values (which must be bytes) the contents of those files.
18
+
19
+ Parameters
20
+ ----------
21
+ root: string
22
+ prefix for all the files
23
+ fs: FileSystem instance
24
+ check: bool (=True)
25
+ performs a touch at the location, to check for write access.
26
+
27
+ Examples
28
+ --------
29
+ >>> fs = FileSystem(**parameters) # doctest: +SKIP
30
+ >>> d = FSMap('my-data/path/', fs) # doctest: +SKIP
31
+ or, more likely
32
+ >>> d = fs.get_mapper('my-data/path/')
33
+
34
+ >>> d['loc1'] = b'Hello World' # doctest: +SKIP
35
+ >>> list(d.keys()) # doctest: +SKIP
36
+ ['loc1']
37
+ >>> d['loc1'] # doctest: +SKIP
38
+ b'Hello World'
39
+ """
40
+
41
+ def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
42
+ self.fs = fs
43
+ self.root = fs._strip_protocol(root)
44
+ self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
45
+ if missing_exceptions is None:
46
+ missing_exceptions = (
47
+ FileNotFoundError,
48
+ IsADirectoryError,
49
+ NotADirectoryError,
50
+ )
51
+ self.missing_exceptions = missing_exceptions
52
+ self.check = check
53
+ self.create = create
54
+ if create:
55
+ if not self.fs.exists(root):
56
+ self.fs.mkdir(root)
57
+ if check:
58
+ if not self.fs.exists(root):
59
+ raise ValueError(
60
+ f"Path {root} does not exist. Create "
61
+ f" with the ``create=True`` keyword"
62
+ )
63
+ self.fs.touch(root + "/a")
64
+ self.fs.rm(root + "/a")
65
+
66
+ @cached_property
67
+ def dirfs(self):
68
+ """dirfs instance that can be used with the same keys as the mapper"""
69
+ from .implementations.dirfs import DirFileSystem
70
+
71
+ return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
72
+
73
+ def clear(self):
74
+ """Remove all keys below root - empties out mapping"""
75
+ logger.info("Clear mapping at %s", self.root)
76
+ try:
77
+ self.fs.rm(self.root, True)
78
+ self.fs.mkdir(self.root)
79
+ except: # noqa: E722
80
+ pass
81
+
82
+ def getitems(self, keys, on_error="raise"):
83
+ """Fetch multiple items from the store
84
+
85
+ If the backend is async-able, this might proceed concurrently
86
+
87
+ Parameters
88
+ ----------
89
+ keys: list(str)
90
+ They keys to be fetched
91
+ on_error : "raise", "omit", "return"
92
+ If raise, an underlying exception will be raised (converted to KeyError
93
+ if the type is in self.missing_exceptions); if omit, keys with exception
94
+ will simply not be included in the output; if "return", all keys are
95
+ included in the output, but the value will be bytes or an exception
96
+ instance.
97
+
98
+ Returns
99
+ -------
100
+ dict(key, bytes|exception)
101
+ """
102
+ keys2 = [self._key_to_str(k) for k in keys]
103
+ oe = on_error if on_error == "raise" else "return"
104
+ try:
105
+ out = self.fs.cat(keys2, on_error=oe)
106
+ if isinstance(out, bytes):
107
+ out = {keys2[0]: out}
108
+ except self.missing_exceptions as e:
109
+ raise KeyError from e
110
+ out = {
111
+ k: (KeyError() if isinstance(v, self.missing_exceptions) else v)
112
+ for k, v in out.items()
113
+ }
114
+ return {
115
+ key: out[k2]
116
+ for key, k2 in zip(keys, keys2)
117
+ if on_error == "return" or not isinstance(out[k2], BaseException)
118
+ }
119
+
120
+ def setitems(self, values_dict):
121
+ """Set the values of multiple items in the store
122
+
123
+ Parameters
124
+ ----------
125
+ values_dict: dict(str, bytes)
126
+ """
127
+ values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
128
+ self.fs.pipe(values)
129
+
130
+ def delitems(self, keys):
131
+ """Remove multiple keys from the store"""
132
+ self.fs.rm([self._key_to_str(k) for k in keys])
133
+
134
+ def _key_to_str(self, key):
135
+ """Generate full path for the key"""
136
+ if not isinstance(key, str):
137
+ # raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
138
+ warnings.warn(
139
+ "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError",
140
+ DeprecationWarning,
141
+ )
142
+ if isinstance(key, list):
143
+ key = tuple(key)
144
+ key = str(key)
145
+ return f"{self._root_key_to_str}{key}".rstrip("/")
146
+
147
+ def _str_to_key(self, s):
148
+ """Strip path of to leave key name"""
149
+ return s[len(self.root) :].lstrip("/")
150
+
151
+ def __getitem__(self, key, default=None):
152
+ """Retrieve data"""
153
+ k = self._key_to_str(key)
154
+ try:
155
+ result = self.fs.cat(k)
156
+ except self.missing_exceptions:
157
+ if default is not None:
158
+ return default
159
+ raise KeyError(key)
160
+ return result
161
+
162
+ def pop(self, key, default=None):
163
+ """Pop data"""
164
+ result = self.__getitem__(key, default)
165
+ try:
166
+ del self[key]
167
+ except KeyError:
168
+ pass
169
+ return result
170
+
171
+ def __setitem__(self, key, value):
172
+ """Store value in key"""
173
+ key = self._key_to_str(key)
174
+ self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
175
+ self.fs.pipe_file(key, maybe_convert(value))
176
+
177
+ def __iter__(self):
178
+ return (self._str_to_key(x) for x in self.fs.find(self.root))
179
+
180
+ def __len__(self):
181
+ return len(self.fs.find(self.root))
182
+
183
+ def __delitem__(self, key):
184
+ """Remove key"""
185
+ try:
186
+ self.fs.rm(self._key_to_str(key))
187
+ except: # noqa: E722
188
+ raise KeyError
189
+
190
+ def __contains__(self, key):
191
+ """Does key exist in mapping?"""
192
+ path = self._key_to_str(key)
193
+ return self.fs.isfile(path)
194
+
195
+ def __reduce__(self):
196
+ return FSMap, (self.root, self.fs, False, False, self.missing_exceptions)
197
+
198
+
199
+ def maybe_convert(value):
200
+ if isinstance(value, array.array) or hasattr(value, "__array__"):
201
+ # bytes-like things
202
+ if hasattr(value, "dtype") and value.dtype.kind in "Mm":
203
+ # The buffer interface doesn't support datetime64/timdelta64 numpy
204
+ # arrays
205
+ value = value.view("int64")
206
+ value = bytes(memoryview(value))
207
+ return value
208
+
209
+
210
+ def get_mapper(
211
+ url="",
212
+ check=False,
213
+ create=False,
214
+ missing_exceptions=None,
215
+ alternate_root=None,
216
+ **kwargs,
217
+ ):
218
+ """Create key-value interface for given URL and options
219
+
220
+ The URL will be of the form "protocol://location" and point to the root
221
+ of the mapper required. All keys will be file-names below this location,
222
+ and their values the contents of each key.
223
+
224
+ Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``.
225
+
226
+ Parameters
227
+ ----------
228
+ url: str
229
+ Root URL of mapping
230
+ check: bool
231
+ Whether to attempt to read from the location before instantiation, to
232
+ check that the mapping does exist
233
+ create: bool
234
+ Whether to make the directory corresponding to the root before
235
+ instantiating
236
+ missing_exceptions: None or tuple
237
+ If given, these exception types will be regarded as missing keys and
238
+ return KeyError when trying to read data. By default, you get
239
+ (FileNotFoundError, IsADirectoryError, NotADirectoryError)
240
+ alternate_root: None or str
241
+ In cases of complex URLs, the parser may fail to pick the correct part
242
+ for the mapper root, so this arg can override
243
+
244
+ Returns
245
+ -------
246
+ ``FSMap`` instance, the dict-like key-value store.
247
+ """
248
+ # Removing protocol here - could defer to each open() on the backend
249
+ fs, urlpath = url_to_fs(url, **kwargs)
250
+ root = alternate_root if alternate_root is not None else urlpath
251
+ return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/parquet.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import json
3
+ import warnings
4
+
5
+ from .core import url_to_fs
6
+ from .utils import merge_offset_ranges
7
+
8
+ # Parquet-Specific Utilities for fsspec
9
+ #
10
+ # Most of the functions defined in this module are NOT
11
+ # intended for public consumption. The only exception
12
+ # to this is `open_parquet_file`, which should be used
13
+ # place of `fs.open()` to open parquet-formatted files
14
+ # on remote file systems.
15
+
16
+
17
+ def open_parquet_file(
18
+ path,
19
+ mode="rb",
20
+ fs=None,
21
+ metadata=None,
22
+ columns=None,
23
+ row_groups=None,
24
+ storage_options=None,
25
+ strict=False,
26
+ engine="auto",
27
+ max_gap=64_000,
28
+ max_block=256_000_000,
29
+ footer_sample_size=1_000_000,
30
+ **kwargs,
31
+ ):
32
+ """
33
+ Return a file-like object for a single Parquet file.
34
+
35
+ The specified parquet `engine` will be used to parse the
36
+ footer metadata, and determine the required byte ranges
37
+ from the file. The target path will then be opened with
38
+ the "parts" (`KnownPartsOfAFile`) caching strategy.
39
+
40
+ Note that this method is intended for usage with remote
41
+ file systems, and is unlikely to improve parquet-read
42
+ performance on local file systems.
43
+
44
+ Parameters
45
+ ----------
46
+ path: str
47
+ Target file path.
48
+ mode: str, optional
49
+ Mode option to be passed through to `fs.open`. Default is "rb".
50
+ metadata: Any, optional
51
+ Parquet metadata object. Object type must be supported
52
+ by the backend parquet engine. For now, only the "fastparquet"
53
+ engine supports an explicit `ParquetFile` metadata object.
54
+ If a metadata object is supplied, the remote footer metadata
55
+ will not need to be transferred into local memory.
56
+ fs: AbstractFileSystem, optional
57
+ Filesystem object to use for opening the file. If nothing is
58
+ specified, an `AbstractFileSystem` object will be inferred.
59
+ engine : str, default "auto"
60
+ Parquet engine to use for metadata parsing. Allowed options
61
+ include "fastparquet", "pyarrow", and "auto". The specified
62
+ engine must be installed in the current environment. If
63
+ "auto" is specified, and both engines are installed,
64
+ "fastparquet" will take precedence over "pyarrow".
65
+ columns: list, optional
66
+ List of all column names that may be read from the file.
67
+ row_groups : list, optional
68
+ List of all row-groups that may be read from the file. This
69
+ may be a list of row-group indices (integers), or it may be
70
+ a list of `RowGroup` metadata objects (if the "fastparquet"
71
+ engine is used).
72
+ storage_options : dict, optional
73
+ Used to generate an `AbstractFileSystem` object if `fs` was
74
+ not specified.
75
+ strict : bool, optional
76
+ Whether the resulting `KnownPartsOfAFile` cache should
77
+ fetch reads that go beyond a known byte-range boundary.
78
+ If `False` (the default), any read that ends outside a
79
+ known part will be zero padded. Note that using
80
+ `strict=True` may be useful for debugging.
81
+ max_gap : int, optional
82
+ Neighboring byte ranges will only be merged when their
83
+ inter-range gap is <= `max_gap`. Default is 64KB.
84
+ max_block : int, optional
85
+ Neighboring byte ranges will only be merged when the size of
86
+ the aggregated range is <= `max_block`. Default is 256MB.
87
+ footer_sample_size : int, optional
88
+ Number of bytes to read from the end of the path to look
89
+ for the footer metadata. If the sampled bytes do not contain
90
+ the footer, a second read request will be required, and
91
+ performance will suffer. Default is 1MB.
92
+ **kwargs :
93
+ Optional key-word arguments to pass to `fs.open`
94
+ """
95
+
96
+ # Make sure we have an `AbstractFileSystem` object
97
+ # to work with
98
+ if fs is None:
99
+ fs = url_to_fs(path, **(storage_options or {}))[0]
100
+
101
+ # For now, `columns == []` not supported. Just use
102
+ # default `open` command with `path` input
103
+ if columns is not None and len(columns) == 0:
104
+ return fs.open(path, mode=mode)
105
+
106
+ # Set the engine
107
+ engine = _set_engine(engine)
108
+
109
+ # Fetch the known byte ranges needed to read
110
+ # `columns` and/or `row_groups`
111
+ data = _get_parquet_byte_ranges(
112
+ [path],
113
+ fs,
114
+ metadata=metadata,
115
+ columns=columns,
116
+ row_groups=row_groups,
117
+ engine=engine,
118
+ max_gap=max_gap,
119
+ max_block=max_block,
120
+ footer_sample_size=footer_sample_size,
121
+ )
122
+
123
+ # Extract file name from `data`
124
+ fn = next(iter(data)) if data else path
125
+
126
+ # Call self.open with "parts" caching
127
+ options = kwargs.pop("cache_options", {}).copy()
128
+ return fs.open(
129
+ fn,
130
+ mode=mode,
131
+ cache_type="parts",
132
+ cache_options={
133
+ **options,
134
+ "data": data.get(fn, {}),
135
+ "strict": strict,
136
+ },
137
+ **kwargs,
138
+ )
139
+
140
+
141
+ def _get_parquet_byte_ranges(
142
+ paths,
143
+ fs,
144
+ metadata=None,
145
+ columns=None,
146
+ row_groups=None,
147
+ max_gap=64_000,
148
+ max_block=256_000_000,
149
+ footer_sample_size=1_000_000,
150
+ engine="auto",
151
+ ):
152
+ """Get a dictionary of the known byte ranges needed
153
+ to read a specific column/row-group selection from a
154
+ Parquet dataset. Each value in the output dictionary
155
+ is intended for use as the `data` argument for the
156
+ `KnownPartsOfAFile` caching strategy of a single path.
157
+ """
158
+
159
+ # Set engine if necessary
160
+ if isinstance(engine, str):
161
+ engine = _set_engine(engine)
162
+
163
+ # Pass to specialized function if metadata is defined
164
+ if metadata is not None:
165
+ # Use the provided parquet metadata object
166
+ # to avoid transferring/parsing footer metadata
167
+ return _get_parquet_byte_ranges_from_metadata(
168
+ metadata,
169
+ fs,
170
+ engine,
171
+ columns=columns,
172
+ row_groups=row_groups,
173
+ max_gap=max_gap,
174
+ max_block=max_block,
175
+ )
176
+
177
+ # Get file sizes asynchronously
178
+ file_sizes = fs.sizes(paths)
179
+
180
+ # Populate global paths, starts, & ends
181
+ result = {}
182
+ data_paths = []
183
+ data_starts = []
184
+ data_ends = []
185
+ add_header_magic = True
186
+ if columns is None and row_groups is None:
187
+ # We are NOT selecting specific columns or row-groups.
188
+ #
189
+ # We can avoid sampling the footers, and just transfer
190
+ # all file data with cat_ranges
191
+ for i, path in enumerate(paths):
192
+ result[path] = {}
193
+ for b in range(0, file_sizes[i], max_block):
194
+ data_paths.append(path)
195
+ data_starts.append(b)
196
+ data_ends.append(min(b + max_block, file_sizes[i]))
197
+ add_header_magic = False # "Magic" should already be included
198
+ else:
199
+ # We ARE selecting specific columns or row-groups.
200
+ #
201
+ # Gather file footers.
202
+ # We just take the last `footer_sample_size` bytes of each
203
+ # file (or the entire file if it is smaller than that)
204
+ footer_starts = []
205
+ footer_ends = []
206
+ for i, path in enumerate(paths):
207
+ footer_ends.append(file_sizes[i])
208
+ sample_size = max(0, file_sizes[i] - footer_sample_size)
209
+ footer_starts.append(sample_size)
210
+ footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
211
+
212
+ # Check our footer samples and re-sample if necessary.
213
+ missing_footer_starts = footer_starts.copy()
214
+ large_footer = 0
215
+ for i, path in enumerate(paths):
216
+ footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
217
+ real_footer_start = file_sizes[i] - (footer_size + 8)
218
+ if real_footer_start < footer_starts[i]:
219
+ missing_footer_starts[i] = real_footer_start
220
+ large_footer = max(large_footer, (footer_size + 8))
221
+ if large_footer:
222
+ warnings.warn(
223
+ f"Not enough data was used to sample the parquet footer. "
224
+ f"Try setting footer_sample_size >= {large_footer}."
225
+ )
226
+ for i, block in enumerate(
227
+ fs.cat_ranges(
228
+ paths,
229
+ missing_footer_starts,
230
+ footer_starts,
231
+ )
232
+ ):
233
+ footer_samples[i] = block + footer_samples[i]
234
+ footer_starts[i] = missing_footer_starts[i]
235
+
236
+ # Calculate required byte ranges for each path
237
+ for i, path in enumerate(paths):
238
+ # Deal with small-file case.
239
+ # Just include all remaining bytes of the file
240
+ # in a single range.
241
+ if file_sizes[i] < max_block:
242
+ if footer_starts[i] > 0:
243
+ # Only need to transfer the data if the
244
+ # footer sample isn't already the whole file
245
+ data_paths.append(path)
246
+ data_starts.append(0)
247
+ data_ends.append(footer_starts[i])
248
+ continue
249
+
250
+ # Use "engine" to collect data byte ranges
251
+ path_data_starts, path_data_ends = engine._parquet_byte_ranges(
252
+ columns,
253
+ row_groups=row_groups,
254
+ footer=footer_samples[i],
255
+ footer_start=footer_starts[i],
256
+ )
257
+
258
+ data_paths += [path] * len(path_data_starts)
259
+ data_starts += path_data_starts
260
+ data_ends += path_data_ends
261
+
262
+ # Merge adjacent offset ranges
263
+ data_paths, data_starts, data_ends = merge_offset_ranges(
264
+ data_paths,
265
+ data_starts,
266
+ data_ends,
267
+ max_gap=max_gap,
268
+ max_block=max_block,
269
+ sort=False, # Should already be sorted
270
+ )
271
+
272
+ # Start by populating `result` with footer samples
273
+ for i, path in enumerate(paths):
274
+ result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
275
+
276
+ # Transfer the data byte-ranges into local memory
277
+ _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
278
+
279
+ # Add b"PAR1" to header if necessary
280
+ if add_header_magic:
281
+ _add_header_magic(result)
282
+
283
+ return result
284
+
285
+
286
+ def _get_parquet_byte_ranges_from_metadata(
287
+ metadata,
288
+ fs,
289
+ engine,
290
+ columns=None,
291
+ row_groups=None,
292
+ max_gap=64_000,
293
+ max_block=256_000_000,
294
+ ):
295
+ """Simplified version of `_get_parquet_byte_ranges` for
296
+ the case that an engine-specific `metadata` object is
297
+ provided, and the remote footer metadata does not need to
298
+ be transferred before calculating the required byte ranges.
299
+ """
300
+
301
+ # Use "engine" to collect data byte ranges
302
+ data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
303
+ columns,
304
+ row_groups=row_groups,
305
+ metadata=metadata,
306
+ )
307
+
308
+ # Merge adjacent offset ranges
309
+ data_paths, data_starts, data_ends = merge_offset_ranges(
310
+ data_paths,
311
+ data_starts,
312
+ data_ends,
313
+ max_gap=max_gap,
314
+ max_block=max_block,
315
+ sort=False, # Should be sorted
316
+ )
317
+
318
+ # Transfer the data byte-ranges into local memory
319
+ result = {fn: {} for fn in list(set(data_paths))}
320
+ _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
321
+
322
+ # Add b"PAR1" to header
323
+ _add_header_magic(result)
324
+
325
+ return result
326
+
327
+
328
+ def _transfer_ranges(fs, blocks, paths, starts, ends):
329
+ # Use cat_ranges to gather the data byte_ranges
330
+ ranges = (paths, starts, ends)
331
+ for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
332
+ blocks[path][(start, stop)] = data
333
+
334
+
335
+ def _add_header_magic(data):
336
+ # Add b"PAR1" to file headers
337
+ for path in list(data.keys()):
338
+ add_magic = True
339
+ for k in data[path].keys():
340
+ if k[0] == 0 and k[1] >= 4:
341
+ add_magic = False
342
+ break
343
+ if add_magic:
344
+ data[path][(0, 4)] = b"PAR1"
345
+
346
+
347
+ def _set_engine(engine_str):
348
+ # Define a list of parquet engines to try
349
+ if engine_str == "auto":
350
+ try_engines = ("fastparquet", "pyarrow")
351
+ elif not isinstance(engine_str, str):
352
+ raise ValueError(
353
+ "Failed to set parquet engine! "
354
+ "Please pass 'fastparquet', 'pyarrow', or 'auto'"
355
+ )
356
+ elif engine_str not in ("fastparquet", "pyarrow"):
357
+ raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
358
+ else:
359
+ try_engines = [engine_str]
360
+
361
+ # Try importing the engines in `try_engines`,
362
+ # and choose the first one that succeeds
363
+ for engine in try_engines:
364
+ try:
365
+ if engine == "fastparquet":
366
+ return FastparquetEngine()
367
+ elif engine == "pyarrow":
368
+ return PyarrowEngine()
369
+ except ImportError:
370
+ pass
371
+
372
+ # Raise an error if a supported parquet engine
373
+ # was not found
374
+ raise ImportError(
375
+ f"The following parquet engines are not installed "
376
+ f"in your python environment: {try_engines}."
377
+ f"Please install 'fastparquert' or 'pyarrow' to "
378
+ f"utilize the `fsspec.parquet` module."
379
+ )
380
+
381
+
382
+ class FastparquetEngine:
383
+ # The purpose of the FastparquetEngine class is
384
+ # to check if fastparquet can be imported (on initialization)
385
+ # and to define a `_parquet_byte_ranges` method. In the
386
+ # future, this class may also be used to define other
387
+ # methods/logic that are specific to fastparquet.
388
+
389
+ def __init__(self):
390
+ import fastparquet as fp
391
+
392
+ self.fp = fp
393
+
394
+ def _row_group_filename(self, row_group, pf):
395
+ return pf.row_group_filename(row_group)
396
+
397
+ def _parquet_byte_ranges(
398
+ self,
399
+ columns,
400
+ row_groups=None,
401
+ metadata=None,
402
+ footer=None,
403
+ footer_start=None,
404
+ ):
405
+ # Initialize offset ranges and define ParqetFile metadata
406
+ pf = metadata
407
+ data_paths, data_starts, data_ends = [], [], []
408
+ if pf is None:
409
+ pf = self.fp.ParquetFile(io.BytesIO(footer))
410
+
411
+ # Convert columns to a set and add any index columns
412
+ # specified in the pandas metadata (just in case)
413
+ column_set = None if columns is None else set(columns)
414
+ if column_set is not None and hasattr(pf, "pandas_metadata"):
415
+ md_index = [
416
+ ind
417
+ for ind in pf.pandas_metadata.get("index_columns", [])
418
+ # Ignore RangeIndex information
419
+ if not isinstance(ind, dict)
420
+ ]
421
+ column_set |= set(md_index)
422
+
423
+ # Check if row_groups is a list of integers
424
+ # or a list of row-group metadata
425
+ if row_groups and not isinstance(row_groups[0], int):
426
+ # Input row_groups contains row-group metadata
427
+ row_group_indices = None
428
+ else:
429
+ # Input row_groups contains row-group indices
430
+ row_group_indices = row_groups
431
+ row_groups = pf.row_groups
432
+
433
+ # Loop through column chunks to add required byte ranges
434
+ for r, row_group in enumerate(row_groups):
435
+ # Skip this row-group if we are targeting
436
+ # specific row-groups
437
+ if row_group_indices is None or r in row_group_indices:
438
+ # Find the target parquet-file path for `row_group`
439
+ fn = self._row_group_filename(row_group, pf)
440
+
441
+ for column in row_group.columns:
442
+ name = column.meta_data.path_in_schema[0]
443
+ # Skip this column if we are targeting a
444
+ # specific columns
445
+ if column_set is None or name in column_set:
446
+ file_offset0 = column.meta_data.dictionary_page_offset
447
+ if file_offset0 is None:
448
+ file_offset0 = column.meta_data.data_page_offset
449
+ num_bytes = column.meta_data.total_compressed_size
450
+ if footer_start is None or file_offset0 < footer_start:
451
+ data_paths.append(fn)
452
+ data_starts.append(file_offset0)
453
+ data_ends.append(
454
+ min(
455
+ file_offset0 + num_bytes,
456
+ footer_start or (file_offset0 + num_bytes),
457
+ )
458
+ )
459
+
460
+ if metadata:
461
+ # The metadata in this call may map to multiple
462
+ # file paths. Need to include `data_paths`
463
+ return data_paths, data_starts, data_ends
464
+ return data_starts, data_ends
465
+
466
+
467
+ class PyarrowEngine:
468
+ # The purpose of the PyarrowEngine class is
469
+ # to check if pyarrow can be imported (on initialization)
470
+ # and to define a `_parquet_byte_ranges` method. In the
471
+ # future, this class may also be used to define other
472
+ # methods/logic that are specific to pyarrow.
473
+
474
+ def __init__(self):
475
+ import pyarrow.parquet as pq
476
+
477
+ self.pq = pq
478
+
479
+ def _row_group_filename(self, row_group, metadata):
480
+ raise NotImplementedError
481
+
482
+ def _parquet_byte_ranges(
483
+ self,
484
+ columns,
485
+ row_groups=None,
486
+ metadata=None,
487
+ footer=None,
488
+ footer_start=None,
489
+ ):
490
+ if metadata is not None:
491
+ raise ValueError("metadata input not supported for PyarrowEngine")
492
+
493
+ data_starts, data_ends = [], []
494
+ md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
495
+
496
+ # Convert columns to a set and add any index columns
497
+ # specified in the pandas metadata (just in case)
498
+ column_set = None if columns is None else set(columns)
499
+ if column_set is not None:
500
+ schema = md.schema.to_arrow_schema()
501
+ has_pandas_metadata = (
502
+ schema.metadata is not None and b"pandas" in schema.metadata
503
+ )
504
+ if has_pandas_metadata:
505
+ md_index = [
506
+ ind
507
+ for ind in json.loads(
508
+ schema.metadata[b"pandas"].decode("utf8")
509
+ ).get("index_columns", [])
510
+ # Ignore RangeIndex information
511
+ if not isinstance(ind, dict)
512
+ ]
513
+ column_set |= set(md_index)
514
+
515
+ # Loop through column chunks to add required byte ranges
516
+ for r in range(md.num_row_groups):
517
+ # Skip this row-group if we are targeting
518
+ # specific row-groups
519
+ if row_groups is None or r in row_groups:
520
+ row_group = md.row_group(r)
521
+ for c in range(row_group.num_columns):
522
+ column = row_group.column(c)
523
+ name = column.path_in_schema
524
+ # Skip this column if we are targeting a
525
+ # specific columns
526
+ split_name = name.split(".")[0]
527
+ if (
528
+ column_set is None
529
+ or name in column_set
530
+ or split_name in column_set
531
+ ):
532
+ file_offset0 = column.dictionary_page_offset
533
+ if file_offset0 is None:
534
+ file_offset0 = column.data_page_offset
535
+ num_bytes = column.total_compressed_size
536
+ if file_offset0 < footer_start:
537
+ data_starts.append(file_offset0)
538
+ data_ends.append(
539
+ min(file_offset0 + num_bytes, footer_start)
540
+ )
541
+ return data_starts, data_ends
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/registry.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import types
5
+ import warnings
6
+
7
+ __all__ = ["registry", "get_filesystem_class", "default"]
8
+
9
+ # internal, mutable
10
+ _registry: dict[str, type] = {}
11
+
12
+ # external, immutable
13
+ registry = types.MappingProxyType(_registry)
14
+ default = "file"
15
+
16
+
17
+ def register_implementation(name, cls, clobber=False, errtxt=None):
18
+ """Add implementation class to the registry
19
+
20
+ Parameters
21
+ ----------
22
+ name: str
23
+ Protocol name to associate with the class
24
+ cls: class or str
25
+ if a class: fsspec-compliant implementation class (normally inherits from
26
+ ``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
27
+ str, the full path to an implementation class like package.module.class,
28
+ which gets added to known_implementations,
29
+ so the import is deferred until the filesystem is actually used.
30
+ clobber: bool (optional)
31
+ Whether to overwrite a protocol with the same name; if False, will raise
32
+ instead.
33
+ errtxt: str (optional)
34
+ If given, then a failure to import the given class will result in this
35
+ text being given.
36
+ """
37
+ if isinstance(cls, str):
38
+ if name in known_implementations and clobber is False:
39
+ if cls != known_implementations[name]["class"]:
40
+ raise ValueError(
41
+ f"Name ({name}) already in the known_implementations and clobber "
42
+ f"is False"
43
+ )
44
+ else:
45
+ known_implementations[name] = {
46
+ "class": cls,
47
+ "err": errtxt or f"{cls} import failed for protocol {name}",
48
+ }
49
+
50
+ else:
51
+ if name in registry and clobber is False:
52
+ if _registry[name] is not cls:
53
+ raise ValueError(
54
+ f"Name ({name}) already in the registry and clobber is False"
55
+ )
56
+ else:
57
+ _registry[name] = cls
58
+
59
+
60
+ # protocols mapped to the class which implements them. This dict can be
61
+ # updated with register_implementation
62
+ known_implementations = {
63
+ "abfs": {
64
+ "class": "adlfs.AzureBlobFileSystem",
65
+ "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
66
+ },
67
+ "adl": {
68
+ "class": "adlfs.AzureDatalakeFileSystem",
69
+ "err": "Install adlfs to access Azure Datalake Gen1",
70
+ },
71
+ "arrow_hdfs": {
72
+ "class": "fsspec.implementations.arrow.HadoopFileSystem",
73
+ "err": "pyarrow and local java libraries required for HDFS",
74
+ },
75
+ "asynclocal": {
76
+ "class": "morefs.asyn_local.AsyncLocalFileSystem",
77
+ "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
78
+ },
79
+ "az": {
80
+ "class": "adlfs.AzureBlobFileSystem",
81
+ "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
82
+ },
83
+ "blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
84
+ "box": {
85
+ "class": "boxfs.BoxFileSystem",
86
+ "err": "Please install boxfs to access BoxFileSystem",
87
+ },
88
+ "cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
89
+ "dask": {
90
+ "class": "fsspec.implementations.dask.DaskWorkerFileSystem",
91
+ "err": "Install dask distributed to access worker file system",
92
+ },
93
+ "data": {"class": "fsspec.implementations.data.DataFileSystem"},
94
+ "dbfs": {
95
+ "class": "fsspec.implementations.dbfs.DatabricksFileSystem",
96
+ "err": "Install the requests package to use the DatabricksFileSystem",
97
+ },
98
+ "dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
99
+ "dropbox": {
100
+ "class": "dropboxdrivefs.DropboxDriveFileSystem",
101
+ "err": (
102
+ 'DropboxFileSystem requires "dropboxdrivefs","requests" and "'
103
+ '"dropbox" to be installed'
104
+ ),
105
+ },
106
+ "dvc": {
107
+ "class": "dvc.api.DVCFileSystem",
108
+ "err": "Install dvc to access DVCFileSystem",
109
+ },
110
+ "file": {"class": "fsspec.implementations.local.LocalFileSystem"},
111
+ "filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
112
+ "ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
113
+ "gcs": {
114
+ "class": "gcsfs.GCSFileSystem",
115
+ "err": "Please install gcsfs to access Google Storage",
116
+ },
117
+ "gdrive": {
118
+ "class": "gdrivefs.GoogleDriveFileSystem",
119
+ "err": "Please install gdrivefs for access to Google Drive",
120
+ },
121
+ "generic": {"class": "fsspec.generic.GenericFileSystem"},
122
+ "git": {
123
+ "class": "fsspec.implementations.git.GitFileSystem",
124
+ "err": "Install pygit2 to browse local git repos",
125
+ },
126
+ "github": {
127
+ "class": "fsspec.implementations.github.GithubFileSystem",
128
+ "err": "Install the requests package to use the github FS",
129
+ },
130
+ "gs": {
131
+ "class": "gcsfs.GCSFileSystem",
132
+ "err": "Please install gcsfs to access Google Storage",
133
+ },
134
+ "hdfs": {
135
+ "class": "fsspec.implementations.arrow.HadoopFileSystem",
136
+ "err": "pyarrow and local java libraries required for HDFS",
137
+ },
138
+ "hf": {
139
+ "class": "huggingface_hub.HfFileSystem",
140
+ "err": "Install huggingface_hub to access HfFileSystem",
141
+ },
142
+ "http": {
143
+ "class": "fsspec.implementations.http.HTTPFileSystem",
144
+ "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
145
+ },
146
+ "https": {
147
+ "class": "fsspec.implementations.http.HTTPFileSystem",
148
+ "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
149
+ },
150
+ "jlab": {
151
+ "class": "fsspec.implementations.jupyter.JupyterFileSystem",
152
+ "err": "Jupyter FS requires requests to be installed",
153
+ },
154
+ "jupyter": {
155
+ "class": "fsspec.implementations.jupyter.JupyterFileSystem",
156
+ "err": "Jupyter FS requires requests to be installed",
157
+ },
158
+ "lakefs": {
159
+ "class": "lakefs_spec.LakeFSFileSystem",
160
+ "err": "Please install lakefs-spec to access LakeFSFileSystem",
161
+ },
162
+ "libarchive": {
163
+ "class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
164
+ "err": "LibArchive requires to be installed",
165
+ },
166
+ "local": {"class": "fsspec.implementations.local.LocalFileSystem"},
167
+ "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
168
+ "oci": {
169
+ "class": "ocifs.OCIFileSystem",
170
+ "err": "Install ocifs to access OCI Object Storage",
171
+ },
172
+ "ocilake": {
173
+ "class": "ocifs.OCIFileSystem",
174
+ "err": "Install ocifs to access OCI Data Lake",
175
+ },
176
+ "oss": {
177
+ "class": "ossfs.OSSFileSystem",
178
+ "err": "Install ossfs to access Alibaba Object Storage System",
179
+ },
180
+ "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
181
+ "root": {
182
+ "class": "fsspec_xrootd.XRootDFileSystem",
183
+ "err": (
184
+ "Install fsspec-xrootd to access xrootd storage system. "
185
+ "Note: 'root' is the protocol name for xrootd storage systems, "
186
+ "not referring to root directories"
187
+ ),
188
+ },
189
+ "s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
190
+ "s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
191
+ "sftp": {
192
+ "class": "fsspec.implementations.sftp.SFTPFileSystem",
193
+ "err": 'SFTPFileSystem requires "paramiko" to be installed',
194
+ },
195
+ "simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
196
+ "smb": {
197
+ "class": "fsspec.implementations.smb.SMBFileSystem",
198
+ "err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
199
+ },
200
+ "ssh": {
201
+ "class": "fsspec.implementations.sftp.SFTPFileSystem",
202
+ "err": 'SFTPFileSystem requires "paramiko" to be installed',
203
+ },
204
+ "tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
205
+ "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
206
+ "webdav": {
207
+ "class": "webdav4.fsspec.WebdavFileSystem",
208
+ "err": "Install webdav4 to access WebDAV",
209
+ },
210
+ "webhdfs": {
211
+ "class": "fsspec.implementations.webhdfs.WebHDFS",
212
+ "err": 'webHDFS access requires "requests" to be installed',
213
+ },
214
+ "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
215
+ }
216
+
217
+ assert list(known_implementations) == sorted(
218
+ known_implementations
219
+ ), "Not in alphabetical order"
220
+
221
+
222
+ def get_filesystem_class(protocol):
223
+ """Fetch named protocol implementation from the registry
224
+
225
+ The dict ``known_implementations`` maps protocol names to the locations
226
+ of classes implementing the corresponding file-system. When used for the
227
+ first time, appropriate imports will happen and the class will be placed in
228
+ the registry. All subsequent calls will fetch directly from the registry.
229
+
230
+ Some protocol implementations require additional dependencies, and so the
231
+ import may fail. In this case, the string in the "err" field of the
232
+ ``known_implementations`` will be given as the error message.
233
+ """
234
+ if not protocol:
235
+ protocol = default
236
+
237
+ if protocol not in registry:
238
+ if protocol not in known_implementations:
239
+ raise ValueError(f"Protocol not known: {protocol}")
240
+ bit = known_implementations[protocol]
241
+ try:
242
+ register_implementation(protocol, _import_class(bit["class"]))
243
+ except ImportError as e:
244
+ raise ImportError(bit["err"]) from e
245
+ cls = registry[protocol]
246
+ if getattr(cls, "protocol", None) in ("abstract", None):
247
+ cls.protocol = protocol
248
+
249
+ return cls
250
+
251
+
252
+ s3_msg = """Your installed version of s3fs is very old and known to cause
253
+ severe performance issues, see also https://github.com/dask/dask/issues/10276
254
+
255
+ To fix, you should specify a lower version bound on s3fs, or
256
+ update the current installation.
257
+ """
258
+
259
+
260
+ def _import_class(cls, minv=None):
261
+ """Take a string FQP and return the imported class or identifier
262
+
263
+ cls is of the form "package.module.klass" or "package.module:subobject.klass"
264
+ """
265
+ if ":" in cls:
266
+ mod, name = cls.rsplit(":", 1)
267
+ s3 = mod == "s3fs"
268
+ mod = importlib.import_module(mod)
269
+ if s3 and mod.__version__.split(".") < ["0", "5"]:
270
+ warnings.warn(s3_msg)
271
+ for part in name.split("."):
272
+ mod = getattr(mod, part)
273
+ return mod
274
+ else:
275
+ mod, name = cls.rsplit(".", 1)
276
+ s3 = mod == "s3fs"
277
+ mod = importlib.import_module(mod)
278
+ if s3 and mod.__version__.split(".") < ["0", "5"]:
279
+ warnings.warn(s3_msg)
280
+ return getattr(mod, name)
281
+
282
+
283
+ def filesystem(protocol, **storage_options):
284
+ """Instantiate filesystems for given protocol and arguments
285
+
286
+ ``storage_options`` are specific to the protocol being chosen, and are
287
+ passed directly to the class.
288
+ """
289
+ if protocol == "arrow_hdfs":
290
+ warnings.warn(
291
+ "The 'arrow_hdfs' protocol has been deprecated and will be "
292
+ "removed in the future. Specify it as 'hdfs'.",
293
+ DeprecationWarning,
294
+ )
295
+
296
+ cls = get_filesystem_class(protocol)
297
+ return cls(**storage_options)
298
+
299
+
300
+ def available_protocols():
301
+ """Return a list of the implemented protocols.
302
+
303
+ Note that any given protocol may require extra packages to be importable.
304
+ """
305
+ return list(known_implementations)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/transaction.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import deque
2
+
3
+
4
+ class Transaction:
5
+ """Filesystem transaction write context
6
+
7
+ Gathers files for deferred commit or discard, so that several write
8
+ operations can be finalized semi-atomically. This works by having this
9
+ instance as the ``.transaction`` attribute of the given filesystem
10
+ """
11
+
12
+ def __init__(self, fs, **kwargs):
13
+ """
14
+ Parameters
15
+ ----------
16
+ fs: FileSystem instance
17
+ """
18
+ self.fs = fs
19
+ self.files = deque()
20
+
21
+ def __enter__(self):
22
+ self.start()
23
+ return self
24
+
25
+ def __exit__(self, exc_type, exc_val, exc_tb):
26
+ """End transaction and commit, if exit is not due to exception"""
27
+ # only commit if there was no exception
28
+ self.complete(commit=exc_type is None)
29
+ if self.fs:
30
+ self.fs._intrans = False
31
+ self.fs._transaction = None
32
+ self.fs = None
33
+
34
+ def start(self):
35
+ """Start a transaction on this FileSystem"""
36
+ self.files = deque() # clean up after previous failed completions
37
+ self.fs._intrans = True
38
+
39
+ def complete(self, commit=True):
40
+ """Finish transaction: commit or discard all deferred files"""
41
+ while self.files:
42
+ f = self.files.popleft()
43
+ if commit:
44
+ f.commit()
45
+ else:
46
+ f.discard()
47
+ self.fs._intrans = False
48
+ self.fs._transaction = None
49
+ self.fs = None
50
+
51
+
52
+ class FileActor:
53
+ def __init__(self):
54
+ self.files = []
55
+
56
+ def commit(self):
57
+ for f in self.files:
58
+ f.commit()
59
+ self.files.clear()
60
+
61
+ def discard(self):
62
+ for f in self.files:
63
+ f.discard()
64
+ self.files.clear()
65
+
66
+ def append(self, f):
67
+ self.files.append(f)
68
+
69
+
70
+ class DaskTransaction(Transaction):
71
+ def __init__(self, fs):
72
+ """
73
+ Parameters
74
+ ----------
75
+ fs: FileSystem instance
76
+ """
77
+ import distributed
78
+
79
+ super().__init__(fs)
80
+ client = distributed.default_client()
81
+ self.files = client.submit(FileActor, actor=True).result()
82
+
83
+ def complete(self, commit=True):
84
+ """Finish transaction: commit or discard all deferred files"""
85
+ if commit:
86
+ self.files.commit().result()
87
+ else:
88
+ self.files.discard().result()
89
+ self.fs._intrans = False
90
+ self.fs = None
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/fsspec/utils.py ADDED
@@ -0,0 +1,740 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import logging
5
+ import math
6
+ import os
7
+ import pathlib
8
+ import re
9
+ import sys
10
+ import tempfile
11
+ from functools import partial
12
+ from hashlib import md5
13
+ from importlib.metadata import version
14
+ from typing import (
15
+ IO,
16
+ TYPE_CHECKING,
17
+ Any,
18
+ Callable,
19
+ Iterable,
20
+ Iterator,
21
+ Sequence,
22
+ TypeVar,
23
+ )
24
+ from urllib.parse import urlsplit
25
+
26
+ if TYPE_CHECKING:
27
+ from typing_extensions import TypeGuard
28
+
29
+ from fsspec.spec import AbstractFileSystem
30
+
31
+
32
+ DEFAULT_BLOCK_SIZE = 5 * 2**20
33
+
34
+ T = TypeVar("T")
35
+
36
+
37
+ def infer_storage_options(
38
+ urlpath: str, inherit_storage_options: dict[str, Any] | None = None
39
+ ) -> dict[str, Any]:
40
+ """Infer storage options from URL path and merge it with existing storage
41
+ options.
42
+
43
+ Parameters
44
+ ----------
45
+ urlpath: str or unicode
46
+ Either local absolute file path or URL (hdfs://namenode:8020/file.csv)
47
+ inherit_storage_options: dict (optional)
48
+ Its contents will get merged with the inferred information from the
49
+ given path
50
+
51
+ Returns
52
+ -------
53
+ Storage options dict.
54
+
55
+ Examples
56
+ --------
57
+ >>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP
58
+ {"protocol": "file", "path", "/mnt/datasets/test.csv"}
59
+ >>> infer_storage_options(
60
+ ... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1',
61
+ ... inherit_storage_options={'extra': 'value'},
62
+ ... ) # doctest: +SKIP
63
+ {"protocol": "hdfs", "username": "username", "password": "pwd",
64
+ "host": "node", "port": 123, "path": "/mnt/datasets/test.csv",
65
+ "url_query": "q=1", "extra": "value"}
66
+ """
67
+ # Handle Windows paths including disk name in this special case
68
+ if (
69
+ re.match(r"^[a-zA-Z]:[\\/]", urlpath)
70
+ or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None
71
+ ):
72
+ return {"protocol": "file", "path": urlpath}
73
+
74
+ parsed_path = urlsplit(urlpath)
75
+ protocol = parsed_path.scheme or "file"
76
+ if parsed_path.fragment:
77
+ path = "#".join([parsed_path.path, parsed_path.fragment])
78
+ else:
79
+ path = parsed_path.path
80
+ if protocol == "file":
81
+ # Special case parsing file protocol URL on Windows according to:
82
+ # https://msdn.microsoft.com/en-us/library/jj710207.aspx
83
+ windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
84
+ if windows_path:
85
+ path = "%s:%s" % windows_path.groups()
86
+
87
+ if protocol in ["http", "https"]:
88
+ # for HTTP, we don't want to parse, as requests will anyway
89
+ return {"protocol": protocol, "path": urlpath}
90
+
91
+ options: dict[str, Any] = {"protocol": protocol, "path": path}
92
+
93
+ if parsed_path.netloc:
94
+ # Parse `hostname` from netloc manually because `parsed_path.hostname`
95
+ # lowercases the hostname which is not always desirable (e.g. in S3):
96
+ # https://github.com/dask/dask/issues/1417
97
+ options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
98
+
99
+ if protocol in ("s3", "s3a", "gcs", "gs"):
100
+ options["path"] = options["host"] + options["path"]
101
+ else:
102
+ options["host"] = options["host"]
103
+ if parsed_path.port:
104
+ options["port"] = parsed_path.port
105
+ if parsed_path.username:
106
+ options["username"] = parsed_path.username
107
+ if parsed_path.password:
108
+ options["password"] = parsed_path.password
109
+
110
+ if parsed_path.query:
111
+ options["url_query"] = parsed_path.query
112
+ if parsed_path.fragment:
113
+ options["url_fragment"] = parsed_path.fragment
114
+
115
+ if inherit_storage_options:
116
+ update_storage_options(options, inherit_storage_options)
117
+
118
+ return options
119
+
120
+
121
+ def update_storage_options(
122
+ options: dict[str, Any], inherited: dict[str, Any] | None = None
123
+ ) -> None:
124
+ if not inherited:
125
+ inherited = {}
126
+ collisions = set(options) & set(inherited)
127
+ if collisions:
128
+ for collision in collisions:
129
+ if options.get(collision) != inherited.get(collision):
130
+ raise KeyError(
131
+ f"Collision between inferred and specified storage "
132
+ f"option:\n{collision}"
133
+ )
134
+ options.update(inherited)
135
+
136
+
137
+ # Compression extensions registered via fsspec.compression.register_compression
138
+ compressions: dict[str, str] = {}
139
+
140
+
141
+ def infer_compression(filename: str) -> str | None:
142
+ """Infer compression, if available, from filename.
143
+
144
+ Infer a named compression type, if registered and available, from filename
145
+ extension. This includes builtin (gz, bz2, zip) compressions, as well as
146
+ optional compressions. See fsspec.compression.register_compression.
147
+ """
148
+ extension = os.path.splitext(filename)[-1].strip(".").lower()
149
+ if extension in compressions:
150
+ return compressions[extension]
151
+ return None
152
+
153
+
154
+ def build_name_function(max_int: float) -> Callable[[int], str]:
155
+ """Returns a function that receives a single integer
156
+ and returns it as a string padded by enough zero characters
157
+ to align with maximum possible integer
158
+
159
+ >>> name_f = build_name_function(57)
160
+
161
+ >>> name_f(7)
162
+ '07'
163
+ >>> name_f(31)
164
+ '31'
165
+ >>> build_name_function(1000)(42)
166
+ '0042'
167
+ >>> build_name_function(999)(42)
168
+ '042'
169
+ >>> build_name_function(0)(0)
170
+ '0'
171
+ """
172
+ # handle corner cases max_int is 0 or exact power of 10
173
+ max_int += 1e-8
174
+
175
+ pad_length = int(math.ceil(math.log10(max_int)))
176
+
177
+ def name_function(i: int) -> str:
178
+ return str(i).zfill(pad_length)
179
+
180
+ return name_function
181
+
182
+
183
+ def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
184
+ r"""Seek current file to file start, file end, or byte after delimiter seq.
185
+
186
+ Seeks file to next chunk delimiter, where chunks are defined on file start,
187
+ a delimiting sequence, and file end. Use file.tell() to see location afterwards.
188
+ Note that file start is a valid split, so must be at offset > 0 to seek for
189
+ delimiter.
190
+
191
+ Parameters
192
+ ----------
193
+ file: a file
194
+ delimiter: bytes
195
+ a delimiter like ``b'\n'`` or message sentinel, matching file .read() type
196
+ blocksize: int
197
+ Number of bytes to read from the file at once.
198
+
199
+
200
+ Returns
201
+ -------
202
+ Returns True if a delimiter was found, False if at file start or end.
203
+
204
+ """
205
+
206
+ if file.tell() == 0:
207
+ # beginning-of-file, return without seek
208
+ return False
209
+
210
+ # Interface is for binary IO, with delimiter as bytes, but initialize last
211
+ # with result of file.read to preserve compatibility with text IO.
212
+ last: bytes | None = None
213
+ while True:
214
+ current = file.read(blocksize)
215
+ if not current:
216
+ # end-of-file without delimiter
217
+ return False
218
+ full = last + current if last else current
219
+ try:
220
+ if delimiter in full:
221
+ i = full.index(delimiter)
222
+ file.seek(file.tell() - (len(full) - i) + len(delimiter))
223
+ return True
224
+ elif len(current) < blocksize:
225
+ # end-of-file without delimiter
226
+ return False
227
+ except (OSError, ValueError):
228
+ pass
229
+ last = full[-len(delimiter) :]
230
+
231
+
232
+ def read_block(
233
+ f: IO[bytes],
234
+ offset: int,
235
+ length: int | None,
236
+ delimiter: bytes | None = None,
237
+ split_before: bool = False,
238
+ ) -> bytes:
239
+ """Read a block of bytes from a file
240
+
241
+ Parameters
242
+ ----------
243
+ f: File
244
+ Open file
245
+ offset: int
246
+ Byte offset to start read
247
+ length: int
248
+ Number of bytes to read, read through end of file if None
249
+ delimiter: bytes (optional)
250
+ Ensure reading starts and stops at delimiter bytestring
251
+ split_before: bool (optional)
252
+ Start/stop read *before* delimiter bytestring.
253
+
254
+
255
+ If using the ``delimiter=`` keyword argument we ensure that the read
256
+ starts and stops at delimiter boundaries that follow the locations
257
+ ``offset`` and ``offset + length``. If ``offset`` is zero then we
258
+ start at zero, regardless of delimiter. The bytestring returned WILL
259
+ include the terminating delimiter string.
260
+
261
+ Examples
262
+ --------
263
+
264
+ >>> from io import BytesIO # doctest: +SKIP
265
+ >>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP
266
+ >>> read_block(f, 0, 13) # doctest: +SKIP
267
+ b'Alice, 100\\nBo'
268
+
269
+ >>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP
270
+ b'Alice, 100\\nBob, 200\\n'
271
+
272
+ >>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP
273
+ b'Bob, 200\\nCharlie, 300'
274
+ """
275
+ if delimiter:
276
+ f.seek(offset)
277
+ found_start_delim = seek_delimiter(f, delimiter, 2**16)
278
+ if length is None:
279
+ return f.read()
280
+ start = f.tell()
281
+ length -= start - offset
282
+
283
+ f.seek(start + length)
284
+ found_end_delim = seek_delimiter(f, delimiter, 2**16)
285
+ end = f.tell()
286
+
287
+ # Adjust split location to before delimiter if seek found the
288
+ # delimiter sequence, not start or end of file.
289
+ if found_start_delim and split_before:
290
+ start -= len(delimiter)
291
+
292
+ if found_end_delim and split_before:
293
+ end -= len(delimiter)
294
+
295
+ offset = start
296
+ length = end - start
297
+
298
+ f.seek(offset)
299
+
300
+ # TODO: allow length to be None and read to the end of the file?
301
+ assert length is not None
302
+ b = f.read(length)
303
+ return b
304
+
305
+
306
+ def tokenize(*args: Any, **kwargs: Any) -> str:
307
+ """Deterministic token
308
+
309
+ (modified from dask.base)
310
+
311
+ >>> tokenize([1, 2, '3'])
312
+ '9d71491b50023b06fc76928e6eddb952'
313
+
314
+ >>> tokenize('Hello') == tokenize('Hello')
315
+ True
316
+ """
317
+ if kwargs:
318
+ args += (kwargs,)
319
+ try:
320
+ h = md5(str(args).encode())
321
+ except ValueError:
322
+ # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
323
+ h = md5(str(args).encode(), usedforsecurity=False)
324
+ return h.hexdigest()
325
+
326
+
327
+ def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
328
+ """Attempt to convert a path-like object to a string.
329
+
330
+ Parameters
331
+ ----------
332
+ filepath: object to be converted
333
+
334
+ Returns
335
+ -------
336
+ filepath_str: maybe a string version of the object
337
+
338
+ Notes
339
+ -----
340
+ Objects supporting the fspath protocol are coerced according to its
341
+ __fspath__ method.
342
+
343
+ For backwards compatibility with older Python version, pathlib.Path
344
+ objects are specially coerced.
345
+
346
+ Any other object is passed through unchanged, which includes bytes,
347
+ strings, buffers, or anything else that's not even path-like.
348
+ """
349
+ if isinstance(filepath, str):
350
+ return filepath
351
+ elif hasattr(filepath, "__fspath__"):
352
+ return filepath.__fspath__()
353
+ elif hasattr(filepath, "path"):
354
+ return filepath.path
355
+ else:
356
+ return filepath # type: ignore[return-value]
357
+
358
+
359
+ def make_instance(
360
+ cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
361
+ ) -> T:
362
+ inst = cls(*args, **kwargs)
363
+ inst._determine_worker() # type: ignore[attr-defined]
364
+ return inst
365
+
366
+
367
+ def common_prefix(paths: Iterable[str]) -> str:
368
+ """For a list of paths, find the shortest prefix common to all"""
369
+ parts = [p.split("/") for p in paths]
370
+ lmax = min(len(p) for p in parts)
371
+ end = 0
372
+ for i in range(lmax):
373
+ end = all(p[i] == parts[0][i] for p in parts)
374
+ if not end:
375
+ break
376
+ i += end
377
+ return "/".join(parts[0][:i])
378
+
379
+
380
+ def other_paths(
381
+ paths: list[str],
382
+ path2: str | list[str],
383
+ exists: bool = False,
384
+ flatten: bool = False,
385
+ ) -> list[str]:
386
+ """In bulk file operations, construct a new file tree from a list of files
387
+
388
+ Parameters
389
+ ----------
390
+ paths: list of str
391
+ The input file tree
392
+ path2: str or list of str
393
+ Root to construct the new list in. If this is already a list of str, we just
394
+ assert it has the right number of elements.
395
+ exists: bool (optional)
396
+ For a str destination, it is already exists (and is a dir), files should
397
+ end up inside.
398
+ flatten: bool (optional)
399
+ Whether to flatten the input directory tree structure so that the output files
400
+ are in the same directory.
401
+
402
+ Returns
403
+ -------
404
+ list of str
405
+ """
406
+
407
+ if isinstance(path2, str):
408
+ path2 = path2.rstrip("/")
409
+
410
+ if flatten:
411
+ path2 = ["/".join((path2, p.split("/")[-1])) for p in paths]
412
+ else:
413
+ cp = common_prefix(paths)
414
+ if exists:
415
+ cp = cp.rsplit("/", 1)[0]
416
+ if not cp and all(not s.startswith("/") for s in paths):
417
+ path2 = ["/".join([path2, p]) for p in paths]
418
+ else:
419
+ path2 = [p.replace(cp, path2, 1) for p in paths]
420
+ else:
421
+ assert len(paths) == len(path2)
422
+ return path2
423
+
424
+
425
+ def is_exception(obj: Any) -> bool:
426
+ return isinstance(obj, BaseException)
427
+
428
+
429
+ def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
430
+ for attr in ["read", "close", "tell"]:
431
+ if not hasattr(f, attr):
432
+ return False
433
+ return True
434
+
435
+
436
+ def get_protocol(url: str) -> str:
437
+ url = stringify_path(url)
438
+ parts = re.split(r"(\:\:|\://)", url, 1)
439
+ if len(parts) > 1:
440
+ return parts[0]
441
+ return "file"
442
+
443
+
444
+ def can_be_local(path: str) -> bool:
445
+ """Can the given URL be used with open_local?"""
446
+ from fsspec import get_filesystem_class
447
+
448
+ try:
449
+ return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
450
+ except (ValueError, ImportError):
451
+ # not in registry or import failed
452
+ return False
453
+
454
+
455
+ def get_package_version_without_import(name: str) -> str | None:
456
+ """For given package name, try to find the version without importing it
457
+
458
+ Import and package.__version__ is still the backup here, so an import
459
+ *might* happen.
460
+
461
+ Returns either the version string, or None if the package
462
+ or the version was not readily found.
463
+ """
464
+ if name in sys.modules:
465
+ mod = sys.modules[name]
466
+ if hasattr(mod, "__version__"):
467
+ return mod.__version__
468
+ try:
469
+ return version(name)
470
+ except: # noqa: E722
471
+ pass
472
+ try:
473
+ import importlib
474
+
475
+ mod = importlib.import_module(name)
476
+ return mod.__version__
477
+ except (ImportError, AttributeError):
478
+ return None
479
+
480
+
481
+ def setup_logging(
482
+ logger: logging.Logger | None = None,
483
+ logger_name: str | None = None,
484
+ level: str = "DEBUG",
485
+ clear: bool = True,
486
+ ) -> logging.Logger:
487
+ if logger is None and logger_name is None:
488
+ raise ValueError("Provide either logger object or logger name")
489
+ logger = logger or logging.getLogger(logger_name)
490
+ handle = logging.StreamHandler()
491
+ formatter = logging.Formatter(
492
+ "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s"
493
+ )
494
+ handle.setFormatter(formatter)
495
+ if clear:
496
+ logger.handlers.clear()
497
+ logger.addHandler(handle)
498
+ logger.setLevel(level)
499
+ return logger
500
+
501
+
502
+ def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
503
+ return fs.unstrip_protocol(name)
504
+
505
+
506
+ def mirror_from(
507
+ origin_name: str, methods: Iterable[str]
508
+ ) -> Callable[[type[T]], type[T]]:
509
+ """Mirror attributes and methods from the given
510
+ origin_name attribute of the instance to the
511
+ decorated class"""
512
+
513
+ def origin_getter(method: str, self: Any) -> Any:
514
+ origin = getattr(self, origin_name)
515
+ return getattr(origin, method)
516
+
517
+ def wrapper(cls: type[T]) -> type[T]:
518
+ for method in methods:
519
+ wrapped_method = partial(origin_getter, method)
520
+ setattr(cls, method, property(wrapped_method))
521
+ return cls
522
+
523
+ return wrapper
524
+
525
+
526
+ @contextlib.contextmanager
527
+ def nullcontext(obj: T) -> Iterator[T]:
528
+ yield obj
529
+
530
+
531
+ def merge_offset_ranges(
532
+ paths: list[str],
533
+ starts: list[int] | int,
534
+ ends: list[int] | int,
535
+ max_gap: int = 0,
536
+ max_block: int | None = None,
537
+ sort: bool = True,
538
+ ) -> tuple[list[str], list[int], list[int]]:
539
+ """Merge adjacent byte-offset ranges when the inter-range
540
+ gap is <= `max_gap`, and when the merged byte range does not
541
+ exceed `max_block` (if specified). By default, this function
542
+ will re-order the input paths and byte ranges to ensure sorted
543
+ order. If the user can guarantee that the inputs are already
544
+ sorted, passing `sort=False` will skip the re-ordering.
545
+ """
546
+ # Check input
547
+ if not isinstance(paths, list):
548
+ raise TypeError
549
+ if not isinstance(starts, list):
550
+ starts = [starts] * len(paths)
551
+ if not isinstance(ends, list):
552
+ ends = [ends] * len(paths)
553
+ if len(starts) != len(paths) or len(ends) != len(paths):
554
+ raise ValueError
555
+
556
+ # Early Return
557
+ if len(starts) <= 1:
558
+ return paths, starts, ends
559
+
560
+ starts = [s or 0 for s in starts]
561
+ # Sort by paths and then ranges if `sort=True`
562
+ if sort:
563
+ paths, starts, ends = (
564
+ list(v)
565
+ for v in zip(
566
+ *sorted(
567
+ zip(paths, starts, ends),
568
+ )
569
+ )
570
+ )
571
+
572
+ if paths:
573
+ # Loop through the coupled `paths`, `starts`, and
574
+ # `ends`, and merge adjacent blocks when appropriate
575
+ new_paths = paths[:1]
576
+ new_starts = starts[:1]
577
+ new_ends = ends[:1]
578
+ for i in range(1, len(paths)):
579
+ if paths[i] == paths[i - 1] and new_ends[-1] is None:
580
+ continue
581
+ elif (
582
+ paths[i] != paths[i - 1]
583
+ or ((starts[i] - new_ends[-1]) > max_gap)
584
+ or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
585
+ ):
586
+ # Cannot merge with previous block.
587
+ # Add new `paths`, `starts`, and `ends` elements
588
+ new_paths.append(paths[i])
589
+ new_starts.append(starts[i])
590
+ new_ends.append(ends[i])
591
+ else:
592
+ # Merge with previous block by updating the
593
+ # last element of `ends`
594
+ new_ends[-1] = ends[i]
595
+ return new_paths, new_starts, new_ends
596
+
597
+ # `paths` is empty. Just return input lists
598
+ return paths, starts, ends
599
+
600
+
601
+ def file_size(filelike: IO[bytes]) -> int:
602
+ """Find length of any open read-mode file-like"""
603
+ pos = filelike.tell()
604
+ try:
605
+ return filelike.seek(0, 2)
606
+ finally:
607
+ filelike.seek(pos)
608
+
609
+
610
+ @contextlib.contextmanager
611
+ def atomic_write(path: str, mode: str = "wb"):
612
+ """
613
+ A context manager that opens a temporary file next to `path` and, on exit,
614
+ replaces `path` with the temporary file, thereby updating `path`
615
+ atomically.
616
+ """
617
+ fd, fn = tempfile.mkstemp(
618
+ dir=os.path.dirname(path), prefix=os.path.basename(path) + "-"
619
+ )
620
+ try:
621
+ with open(fd, mode) as fp:
622
+ yield fp
623
+ except BaseException:
624
+ with contextlib.suppress(FileNotFoundError):
625
+ os.unlink(fn)
626
+ raise
627
+ else:
628
+ os.replace(fn, path)
629
+
630
+
631
+ def _translate(pat, STAR, QUESTION_MARK):
632
+ # Copied from: https://github.com/python/cpython/pull/106703.
633
+ res: list[str] = []
634
+ add = res.append
635
+ i, n = 0, len(pat)
636
+ while i < n:
637
+ c = pat[i]
638
+ i = i + 1
639
+ if c == "*":
640
+ # compress consecutive `*` into one
641
+ if (not res) or res[-1] is not STAR:
642
+ add(STAR)
643
+ elif c == "?":
644
+ add(QUESTION_MARK)
645
+ elif c == "[":
646
+ j = i
647
+ if j < n and pat[j] == "!":
648
+ j = j + 1
649
+ if j < n and pat[j] == "]":
650
+ j = j + 1
651
+ while j < n and pat[j] != "]":
652
+ j = j + 1
653
+ if j >= n:
654
+ add("\\[")
655
+ else:
656
+ stuff = pat[i:j]
657
+ if "-" not in stuff:
658
+ stuff = stuff.replace("\\", r"\\")
659
+ else:
660
+ chunks = []
661
+ k = i + 2 if pat[i] == "!" else i + 1
662
+ while True:
663
+ k = pat.find("-", k, j)
664
+ if k < 0:
665
+ break
666
+ chunks.append(pat[i:k])
667
+ i = k + 1
668
+ k = k + 3
669
+ chunk = pat[i:j]
670
+ if chunk:
671
+ chunks.append(chunk)
672
+ else:
673
+ chunks[-1] += "-"
674
+ # Remove empty ranges -- invalid in RE.
675
+ for k in range(len(chunks) - 1, 0, -1):
676
+ if chunks[k - 1][-1] > chunks[k][0]:
677
+ chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
678
+ del chunks[k]
679
+ # Escape backslashes and hyphens for set difference (--).
680
+ # Hyphens that create ranges shouldn't be escaped.
681
+ stuff = "-".join(
682
+ s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
683
+ )
684
+ # Escape set operations (&&, ~~ and ||).
685
+ stuff = re.sub(r"([&~|])", r"\\\1", stuff)
686
+ i = j + 1
687
+ if not stuff:
688
+ # Empty range: never match.
689
+ add("(?!)")
690
+ elif stuff == "!":
691
+ # Negated empty range: match any character.
692
+ add(".")
693
+ else:
694
+ if stuff[0] == "!":
695
+ stuff = "^" + stuff[1:]
696
+ elif stuff[0] in ("^", "["):
697
+ stuff = "\\" + stuff
698
+ add(f"[{stuff}]")
699
+ else:
700
+ add(re.escape(c))
701
+ assert i == n
702
+ return res
703
+
704
+
705
+ def glob_translate(pat):
706
+ # Copied from: https://github.com/python/cpython/pull/106703.
707
+ # The keyword parameters' values are fixed to:
708
+ # recursive=True, include_hidden=True, seps=None
709
+ """Translate a pathname with shell wildcards to a regular expression."""
710
+ if os.path.altsep:
711
+ seps = os.path.sep + os.path.altsep
712
+ else:
713
+ seps = os.path.sep
714
+ escaped_seps = "".join(map(re.escape, seps))
715
+ any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
716
+ not_sep = f"[^{escaped_seps}]"
717
+ one_last_segment = f"{not_sep}+"
718
+ one_segment = f"{one_last_segment}{any_sep}"
719
+ any_segments = f"(?:.+{any_sep})?"
720
+ any_last_segments = ".*"
721
+ results = []
722
+ parts = re.split(any_sep, pat)
723
+ last_part_idx = len(parts) - 1
724
+ for idx, part in enumerate(parts):
725
+ if part == "*":
726
+ results.append(one_segment if idx < last_part_idx else one_last_segment)
727
+ continue
728
+ if part == "**":
729
+ results.append(any_segments if idx < last_part_idx else any_last_segments)
730
+ continue
731
+ elif "**" in part:
732
+ raise ValueError(
733
+ "Invalid pattern: '**' can only be an entire path component"
734
+ )
735
+ if part:
736
+ results.extend(_translate(part, f"{not_sep}*", not_sep))
737
+ if idx < last_part_idx:
738
+ results.append(any_sep)
739
+ res = "".join(results)
740
+ return rf"(?s:{res})\Z"
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/METADATA ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: huggingface_hub
3
+ Version: 0.36.2
4
+ Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
5
+ Home-page: https://github.com/huggingface/huggingface_hub
6
+ Author: Hugging Face, Inc.
7
+ Author-email: julien@huggingface.co
8
+ License: Apache
9
+ Keywords: model-hub machine-learning models natural-language-processing deep-learning pytorch pretrained-models
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Education
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: Apache Software License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Requires-Python: >=3.8.0
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: filelock
28
+ Requires-Dist: fsspec>=2023.5.0
29
+ Requires-Dist: hf-xet<2.0.0,>=1.1.3; platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "arm64" or platform_machine == "aarch64"
30
+ Requires-Dist: packaging>=20.9
31
+ Requires-Dist: pyyaml>=5.1
32
+ Requires-Dist: requests
33
+ Requires-Dist: tqdm>=4.42.1
34
+ Requires-Dist: typing-extensions>=3.7.4.3
35
+ Provides-Extra: cli
36
+ Requires-Dist: InquirerPy==0.3.4; extra == "cli"
37
+ Provides-Extra: inference
38
+ Requires-Dist: aiohttp; extra == "inference"
39
+ Provides-Extra: oauth
40
+ Requires-Dist: authlib>=1.3.2; extra == "oauth"
41
+ Requires-Dist: fastapi; extra == "oauth"
42
+ Requires-Dist: httpx; extra == "oauth"
43
+ Requires-Dist: itsdangerous; extra == "oauth"
44
+ Provides-Extra: torch
45
+ Requires-Dist: torch; extra == "torch"
46
+ Requires-Dist: safetensors[torch]; extra == "torch"
47
+ Provides-Extra: hf-transfer
48
+ Requires-Dist: hf_transfer>=0.1.4; extra == "hf-transfer"
49
+ Provides-Extra: fastai
50
+ Requires-Dist: toml; extra == "fastai"
51
+ Requires-Dist: fastai>=2.4; extra == "fastai"
52
+ Requires-Dist: fastcore>=1.3.27; extra == "fastai"
53
+ Provides-Extra: tensorflow
54
+ Requires-Dist: tensorflow; extra == "tensorflow"
55
+ Requires-Dist: pydot; extra == "tensorflow"
56
+ Requires-Dist: graphviz; extra == "tensorflow"
57
+ Provides-Extra: tensorflow-testing
58
+ Requires-Dist: tensorflow; extra == "tensorflow-testing"
59
+ Requires-Dist: keras<3.0; extra == "tensorflow-testing"
60
+ Provides-Extra: hf-xet
61
+ Requires-Dist: hf-xet<2.0.0,>=1.1.2; extra == "hf-xet"
62
+ Provides-Extra: mcp
63
+ Requires-Dist: mcp>=1.8.0; extra == "mcp"
64
+ Requires-Dist: typer; extra == "mcp"
65
+ Requires-Dist: aiohttp; extra == "mcp"
66
+ Provides-Extra: testing
67
+ Requires-Dist: InquirerPy==0.3.4; extra == "testing"
68
+ Requires-Dist: aiohttp; extra == "testing"
69
+ Requires-Dist: authlib>=1.3.2; extra == "testing"
70
+ Requires-Dist: fastapi; extra == "testing"
71
+ Requires-Dist: httpx; extra == "testing"
72
+ Requires-Dist: itsdangerous; extra == "testing"
73
+ Requires-Dist: jedi; extra == "testing"
74
+ Requires-Dist: Jinja2; extra == "testing"
75
+ Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "testing"
76
+ Requires-Dist: pytest-cov; extra == "testing"
77
+ Requires-Dist: pytest-env; extra == "testing"
78
+ Requires-Dist: pytest-xdist; extra == "testing"
79
+ Requires-Dist: pytest-vcr; extra == "testing"
80
+ Requires-Dist: pytest-asyncio; extra == "testing"
81
+ Requires-Dist: pytest-rerunfailures<16.0; extra == "testing"
82
+ Requires-Dist: pytest-mock; extra == "testing"
83
+ Requires-Dist: urllib3<2.0; extra == "testing"
84
+ Requires-Dist: soundfile; extra == "testing"
85
+ Requires-Dist: Pillow; extra == "testing"
86
+ Requires-Dist: gradio>=4.0.0; extra == "testing"
87
+ Requires-Dist: numpy; extra == "testing"
88
+ Requires-Dist: fastapi; extra == "testing"
89
+ Provides-Extra: typing
90
+ Requires-Dist: typing-extensions>=4.8.0; extra == "typing"
91
+ Requires-Dist: types-PyYAML; extra == "typing"
92
+ Requires-Dist: types-requests; extra == "typing"
93
+ Requires-Dist: types-simplejson; extra == "typing"
94
+ Requires-Dist: types-toml; extra == "typing"
95
+ Requires-Dist: types-tqdm; extra == "typing"
96
+ Requires-Dist: types-urllib3; extra == "typing"
97
+ Provides-Extra: quality
98
+ Requires-Dist: ruff>=0.9.0; extra == "quality"
99
+ Requires-Dist: mypy<1.15.0,>=1.14.1; python_version == "3.8" and extra == "quality"
100
+ Requires-Dist: mypy==1.15.0; python_version >= "3.9" and extra == "quality"
101
+ Requires-Dist: libcst>=1.4.0; extra == "quality"
102
+ Requires-Dist: ty; extra == "quality"
103
+ Provides-Extra: all
104
+ Requires-Dist: InquirerPy==0.3.4; extra == "all"
105
+ Requires-Dist: aiohttp; extra == "all"
106
+ Requires-Dist: authlib>=1.3.2; extra == "all"
107
+ Requires-Dist: fastapi; extra == "all"
108
+ Requires-Dist: httpx; extra == "all"
109
+ Requires-Dist: itsdangerous; extra == "all"
110
+ Requires-Dist: jedi; extra == "all"
111
+ Requires-Dist: Jinja2; extra == "all"
112
+ Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "all"
113
+ Requires-Dist: pytest-cov; extra == "all"
114
+ Requires-Dist: pytest-env; extra == "all"
115
+ Requires-Dist: pytest-xdist; extra == "all"
116
+ Requires-Dist: pytest-vcr; extra == "all"
117
+ Requires-Dist: pytest-asyncio; extra == "all"
118
+ Requires-Dist: pytest-rerunfailures<16.0; extra == "all"
119
+ Requires-Dist: pytest-mock; extra == "all"
120
+ Requires-Dist: urllib3<2.0; extra == "all"
121
+ Requires-Dist: soundfile; extra == "all"
122
+ Requires-Dist: Pillow; extra == "all"
123
+ Requires-Dist: gradio>=4.0.0; extra == "all"
124
+ Requires-Dist: numpy; extra == "all"
125
+ Requires-Dist: fastapi; extra == "all"
126
+ Requires-Dist: ruff>=0.9.0; extra == "all"
127
+ Requires-Dist: mypy<1.15.0,>=1.14.1; python_version == "3.8" and extra == "all"
128
+ Requires-Dist: mypy==1.15.0; python_version >= "3.9" and extra == "all"
129
+ Requires-Dist: libcst>=1.4.0; extra == "all"
130
+ Requires-Dist: ty; extra == "all"
131
+ Requires-Dist: typing-extensions>=4.8.0; extra == "all"
132
+ Requires-Dist: types-PyYAML; extra == "all"
133
+ Requires-Dist: types-requests; extra == "all"
134
+ Requires-Dist: types-simplejson; extra == "all"
135
+ Requires-Dist: types-toml; extra == "all"
136
+ Requires-Dist: types-tqdm; extra == "all"
137
+ Requires-Dist: types-urllib3; extra == "all"
138
+ Provides-Extra: dev
139
+ Requires-Dist: InquirerPy==0.3.4; extra == "dev"
140
+ Requires-Dist: aiohttp; extra == "dev"
141
+ Requires-Dist: authlib>=1.3.2; extra == "dev"
142
+ Requires-Dist: fastapi; extra == "dev"
143
+ Requires-Dist: httpx; extra == "dev"
144
+ Requires-Dist: itsdangerous; extra == "dev"
145
+ Requires-Dist: jedi; extra == "dev"
146
+ Requires-Dist: Jinja2; extra == "dev"
147
+ Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "dev"
148
+ Requires-Dist: pytest-cov; extra == "dev"
149
+ Requires-Dist: pytest-env; extra == "dev"
150
+ Requires-Dist: pytest-xdist; extra == "dev"
151
+ Requires-Dist: pytest-vcr; extra == "dev"
152
+ Requires-Dist: pytest-asyncio; extra == "dev"
153
+ Requires-Dist: pytest-rerunfailures<16.0; extra == "dev"
154
+ Requires-Dist: pytest-mock; extra == "dev"
155
+ Requires-Dist: urllib3<2.0; extra == "dev"
156
+ Requires-Dist: soundfile; extra == "dev"
157
+ Requires-Dist: Pillow; extra == "dev"
158
+ Requires-Dist: gradio>=4.0.0; extra == "dev"
159
+ Requires-Dist: numpy; extra == "dev"
160
+ Requires-Dist: fastapi; extra == "dev"
161
+ Requires-Dist: ruff>=0.9.0; extra == "dev"
162
+ Requires-Dist: mypy<1.15.0,>=1.14.1; python_version == "3.8" and extra == "dev"
163
+ Requires-Dist: mypy==1.15.0; python_version >= "3.9" and extra == "dev"
164
+ Requires-Dist: libcst>=1.4.0; extra == "dev"
165
+ Requires-Dist: ty; extra == "dev"
166
+ Requires-Dist: typing-extensions>=4.8.0; extra == "dev"
167
+ Requires-Dist: types-PyYAML; extra == "dev"
168
+ Requires-Dist: types-requests; extra == "dev"
169
+ Requires-Dist: types-simplejson; extra == "dev"
170
+ Requires-Dist: types-toml; extra == "dev"
171
+ Requires-Dist: types-tqdm; extra == "dev"
172
+ Requires-Dist: types-urllib3; extra == "dev"
173
+ Dynamic: author
174
+ Dynamic: author-email
175
+ Dynamic: classifier
176
+ Dynamic: description
177
+ Dynamic: description-content-type
178
+ Dynamic: home-page
179
+ Dynamic: keywords
180
+ Dynamic: license
181
+ Dynamic: license-file
182
+ Dynamic: provides-extra
183
+ Dynamic: requires-dist
184
+ Dynamic: requires-python
185
+ Dynamic: summary
186
+
187
+ <p align="center">
188
+ <picture>
189
+ <source media="(prefers-color-scheme: dark)" srcset="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/huggingface_hub-dark.svg">
190
+ <source media="(prefers-color-scheme: light)" srcset="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/huggingface_hub.svg">
191
+ <img alt="huggingface_hub library logo" src="https://huggingface.co/datasets/huggingface/documentation-images/raw/main/huggingface_hub.svg" width="352" height="59" style="max-width: 100%;">
192
+ </picture>
193
+ <br/>
194
+ <br/>
195
+ </p>
196
+
197
+ <p align="center">
198
+ <i>The official Python client for the Huggingface Hub.</i>
199
+ </p>
200
+
201
+ <p align="center">
202
+ <a href="https://huggingface.co/docs/huggingface_hub/en/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/huggingface_hub/index.svg?down_color=red&down_message=offline&up_message=online&label=doc"></a>
203
+ <a href="https://github.com/huggingface/huggingface_hub/releases"><img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/huggingface_hub.svg"></a>
204
+ <a href="https://github.com/huggingface/huggingface_hub"><img alt="PyPi version" src="https://img.shields.io/pypi/pyversions/huggingface_hub.svg"></a>
205
+ <a href="https://pypi.org/project/huggingface-hub"><img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dm/huggingface_hub"></a>
206
+ <a href="https://codecov.io/gh/huggingface/huggingface_hub"><img alt="Code coverage" src="https://codecov.io/gh/huggingface/huggingface_hub/branch/main/graph/badge.svg?token=RXP95LE2XL"></a>
207
+ </p>
208
+
209
+ <h4 align="center">
210
+ <p>
211
+ <b>English</b> |
212
+ <a href="https://github.com/huggingface/huggingface_hub/blob/main/i18n/README_de.md">Deutsch</a> |
213
+ <a href="https://github.com/huggingface/huggingface_hub/blob/main/i18n/README_hi.md">हिंदी</a> |
214
+ <a href="https://github.com/huggingface/huggingface_hub/blob/main/i18n/README_ko.md">한국어</a> |
215
+ <a href="https://github.com/huggingface/huggingface_hub/blob/main/i18n/README_cn.md">中文(简体)</a>
216
+ <p>
217
+ </h4>
218
+
219
+ ---
220
+
221
+ **Documentation**: <a href="https://hf.co/docs/huggingface_hub" target="_blank">https://hf.co/docs/huggingface_hub</a>
222
+
223
+ **Source Code**: <a href="https://github.com/huggingface/huggingface_hub" target="_blank">https://github.com/huggingface/huggingface_hub</a>
224
+
225
+ ---
226
+
227
+ ## Welcome to the huggingface_hub library
228
+
229
+ The `huggingface_hub` library allows you to interact with the [Hugging Face Hub](https://huggingface.co/), a platform democratizing open-source Machine Learning for creators and collaborators. Discover pre-trained models and datasets for your projects or play with the thousands of machine learning apps hosted on the Hub. You can also create and share your own models, datasets and demos with the community. The `huggingface_hub` library provides a simple way to do all these things with Python.
230
+
231
+ ## Key features
232
+
233
+ - [Download files](https://huggingface.co/docs/huggingface_hub/en/guides/download) from the Hub.
234
+ - [Upload files](https://huggingface.co/docs/huggingface_hub/en/guides/upload) to the Hub.
235
+ - [Manage your repositories](https://huggingface.co/docs/huggingface_hub/en/guides/repository).
236
+ - [Run Inference](https://huggingface.co/docs/huggingface_hub/en/guides/inference) on deployed models.
237
+ - [Search](https://huggingface.co/docs/huggingface_hub/en/guides/search) for models, datasets and Spaces.
238
+ - [Share Model Cards](https://huggingface.co/docs/huggingface_hub/en/guides/model-cards) to document your models.
239
+ - [Engage with the community](https://huggingface.co/docs/huggingface_hub/en/guides/community) through PRs and comments.
240
+
241
+ ## Installation
242
+
243
+ Install the `huggingface_hub` package with [pip](https://pypi.org/project/huggingface-hub/):
244
+
245
+ ```bash
246
+ pip install huggingface_hub
247
+ ```
248
+
249
+ If you prefer, you can also install it with [conda](https://huggingface.co/docs/huggingface_hub/en/installation#install-with-conda).
250
+
251
+ In order to keep the package minimal by default, `huggingface_hub` comes with optional dependencies useful for some use cases. For example, if you want have a complete experience for Inference, run:
252
+
253
+ ```bash
254
+ pip install "huggingface_hub[inference]"
255
+ ```
256
+
257
+ To learn more installation and optional dependencies, check out the [installation guide](https://huggingface.co/docs/huggingface_hub/en/installation).
258
+
259
+ ## Quick start
260
+
261
+ ### Download files
262
+
263
+ Download a single file
264
+
265
+ ```py
266
+ from huggingface_hub import hf_hub_download
267
+
268
+ hf_hub_download(repo_id="tiiuae/falcon-7b-instruct", filename="config.json")
269
+ ```
270
+
271
+ Or an entire repository
272
+
273
+ ```py
274
+ from huggingface_hub import snapshot_download
275
+
276
+ snapshot_download("stabilityai/stable-diffusion-2-1")
277
+ ```
278
+
279
+ Files will be downloaded in a local cache folder. More details in [this guide](https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache).
280
+
281
+ ### Login
282
+
283
+ The Hugging Face Hub uses tokens to authenticate applications (see [docs](https://huggingface.co/docs/hub/security-tokens)). To log in your machine, run the following CLI:
284
+
285
+ ```bash
286
+ hf auth login
287
+ # or using an environment variable
288
+ hf auth login --token $HUGGINGFACE_TOKEN
289
+ ```
290
+
291
+ ### Create a repository
292
+
293
+ ```py
294
+ from huggingface_hub import create_repo
295
+
296
+ create_repo(repo_id="super-cool-model")
297
+ ```
298
+
299
+ ### Upload files
300
+
301
+ Upload a single file
302
+
303
+ ```py
304
+ from huggingface_hub import upload_file
305
+
306
+ upload_file(
307
+ path_or_fileobj="/home/lysandre/dummy-test/README.md",
308
+ path_in_repo="README.md",
309
+ repo_id="lysandre/test-model",
310
+ )
311
+ ```
312
+
313
+ Or an entire folder
314
+
315
+ ```py
316
+ from huggingface_hub import upload_folder
317
+
318
+ upload_folder(
319
+ folder_path="/path/to/local/space",
320
+ repo_id="username/my-cool-space",
321
+ repo_type="space",
322
+ )
323
+ ```
324
+
325
+ For details in the [upload guide](https://huggingface.co/docs/huggingface_hub/en/guides/upload).
326
+
327
+ ## Integrating to the Hub.
328
+
329
+ We're partnering with cool open source ML libraries to provide free model hosting and versioning. You can find the existing integrations [here](https://huggingface.co/docs/hub/libraries).
330
+
331
+ The advantages are:
332
+
333
+ - Free model or dataset hosting for libraries and their users.
334
+ - Built-in file versioning, even with very large files, thanks to a git-based approach.
335
+ - In-browser widgets to play with the uploaded models.
336
+ - Anyone can upload a new model for your library, they just need to add the corresponding tag for the model to be discoverable.
337
+ - Fast downloads! We use Cloudfront (a CDN) to geo-replicate downloads so they're blazing fast from anywhere on the globe.
338
+ - Usage stats and more features to come.
339
+
340
+ If you would like to integrate your library, feel free to open an issue to begin the discussion. We wrote a [step-by-step guide](https://huggingface.co/docs/hub/adding-a-library) with ❤️ showing how to do this integration.
341
+
342
+ ## Contributions (feature requests, bugs, etc.) are super welcome 💙💚💛💜🧡❤️
343
+
344
+ Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community.
345
+ Answering questions, helping others, reaching out and improving the documentations are immensely valuable to the community.
346
+ We wrote a [contribution guide](https://github.com/huggingface/huggingface_hub/blob/main/CONTRIBUTING.md) to summarize
347
+ how to get started to contribute to this repository.
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/RECORD ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ../../../bin/hf,sha256=9ozTW_4otW8kP3AUHvO8MGxkYj-SzMz1_GPKhDOsiro,283
2
+ ../../../bin/huggingface-cli,sha256=NKTBg-JNllcLHRYGmFlm5IuQ-2CHqu6StEwS5IBObDg,301
3
+ ../../../bin/tiny-agents,sha256=VjA5kJPuGbyFCRTRyYbs-nVl-hOSN1RzWwDBKKp4cn4,293
4
+ huggingface_hub-0.36.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
5
+ huggingface_hub-0.36.2.dist-info/METADATA,sha256=TcKwo_snvLqLCR-YvM_0uUNp0mnJdorq7NC5nZcGZdk,15201
6
+ huggingface_hub-0.36.2.dist-info/RECORD,,
7
+ huggingface_hub-0.36.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ huggingface_hub-0.36.2.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
9
+ huggingface_hub-0.36.2.dist-info/entry_points.txt,sha256=FGUdvu8z-x7lvoJ4udumhcg3AtzigPraCn_ZbjEhIto,218
10
+ huggingface_hub-0.36.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
11
+ huggingface_hub-0.36.2.dist-info/top_level.txt,sha256=8KzlQJAY4miUvjAssOAJodqKOw3harNzuiwGQ9qLSSk,16
12
+ huggingface_hub/__init__.py,sha256=5Ya2RhJIISj9JBGTZLxNxtypOEBxNORUpeh2NBs2TjU,52675
13
+ huggingface_hub/__pycache__/__init__.cpython-312.pyc,,
14
+ huggingface_hub/__pycache__/_commit_api.cpython-312.pyc,,
15
+ huggingface_hub/__pycache__/_commit_scheduler.cpython-312.pyc,,
16
+ huggingface_hub/__pycache__/_inference_endpoints.cpython-312.pyc,,
17
+ huggingface_hub/__pycache__/_jobs_api.cpython-312.pyc,,
18
+ huggingface_hub/__pycache__/_local_folder.cpython-312.pyc,,
19
+ huggingface_hub/__pycache__/_login.cpython-312.pyc,,
20
+ huggingface_hub/__pycache__/_oauth.cpython-312.pyc,,
21
+ huggingface_hub/__pycache__/_snapshot_download.cpython-312.pyc,,
22
+ huggingface_hub/__pycache__/_space_api.cpython-312.pyc,,
23
+ huggingface_hub/__pycache__/_tensorboard_logger.cpython-312.pyc,,
24
+ huggingface_hub/__pycache__/_upload_large_folder.cpython-312.pyc,,
25
+ huggingface_hub/__pycache__/_webhooks_payload.cpython-312.pyc,,
26
+ huggingface_hub/__pycache__/_webhooks_server.cpython-312.pyc,,
27
+ huggingface_hub/__pycache__/community.cpython-312.pyc,,
28
+ huggingface_hub/__pycache__/constants.cpython-312.pyc,,
29
+ huggingface_hub/__pycache__/dataclasses.cpython-312.pyc,,
30
+ huggingface_hub/__pycache__/errors.cpython-312.pyc,,
31
+ huggingface_hub/__pycache__/fastai_utils.cpython-312.pyc,,
32
+ huggingface_hub/__pycache__/file_download.cpython-312.pyc,,
33
+ huggingface_hub/__pycache__/hf_api.cpython-312.pyc,,
34
+ huggingface_hub/__pycache__/hf_file_system.cpython-312.pyc,,
35
+ huggingface_hub/__pycache__/hub_mixin.cpython-312.pyc,,
36
+ huggingface_hub/__pycache__/inference_api.cpython-312.pyc,,
37
+ huggingface_hub/__pycache__/keras_mixin.cpython-312.pyc,,
38
+ huggingface_hub/__pycache__/lfs.cpython-312.pyc,,
39
+ huggingface_hub/__pycache__/repocard.cpython-312.pyc,,
40
+ huggingface_hub/__pycache__/repocard_data.cpython-312.pyc,,
41
+ huggingface_hub/__pycache__/repository.cpython-312.pyc,,
42
+ huggingface_hub/_commit_api.py,sha256=pGESDsicpWMeZnct-71635KgTfvUoyok_hPl9ZgIIWI,41010
43
+ huggingface_hub/_commit_scheduler.py,sha256=P64poLZoTJnSyR39SN6w5s9bLyngKstWee03fpoVETQ,14660
44
+ huggingface_hub/_inference_endpoints.py,sha256=ahmbPcEXsJ_JcMb9TDgdkD8Z2z9uytkFG3_1o6dTm8g,17598
45
+ huggingface_hub/_jobs_api.py,sha256=OFcbChcXsLvaX4oGumsHscZKAzsueYIhh0Z6Y4ycpio,10883
46
+ huggingface_hub/_local_folder.py,sha256=2iHXNgIT3UdSt2PvCovd0NzgVxTRypKb-rvAFLK-gZU,17305
47
+ huggingface_hub/_login.py,sha256=TWNkZpMPkDuttQ36uoi-ozLQ1IcXVsZ42tbcQ-b-h0Q,20248
48
+ huggingface_hub/_oauth.py,sha256=75ya9toHxC0WRKsLOAI212CrssRjTSxs16mHWWNMb3w,18714
49
+ huggingface_hub/_snapshot_download.py,sha256=b-NzYQcvktsAirIfGQKgzQwu8w0S6lhBTvnJ5S6saw8,16166
50
+ huggingface_hub/_space_api.py,sha256=jb6rF8qLtjaNU12D-8ygAPM26xDiHCu8CHXHowhGTmg,5470
51
+ huggingface_hub/_tensorboard_logger.py,sha256=tUdQzx-wXF4yjoGJG2izqZrn-IPMflMBWMkl1sKYzo0,8420
52
+ huggingface_hub/_upload_large_folder.py,sha256=l2YWLZttOw69EGdihT3y_Nhr5mweLGooZG9L8smNoHY,30066
53
+ huggingface_hub/_webhooks_payload.py,sha256=Xm3KaK7tCOGBlXkuZvbym6zjHXrT1XCrbUFWuXiBmNY,3617
54
+ huggingface_hub/_webhooks_server.py,sha256=RLrQuCHlDH_qUQJQOm11fKFDEhIUR2IxwazuKy-T9Uo,15672
55
+ huggingface_hub/cli/__init__.py,sha256=xzX1qgAvrtAX4gP59WrPlvOZFLuzuTgcjvanQvcpgHc,928
56
+ huggingface_hub/cli/__pycache__/__init__.cpython-312.pyc,,
57
+ huggingface_hub/cli/__pycache__/_cli_utils.cpython-312.pyc,,
58
+ huggingface_hub/cli/__pycache__/auth.cpython-312.pyc,,
59
+ huggingface_hub/cli/__pycache__/cache.cpython-312.pyc,,
60
+ huggingface_hub/cli/__pycache__/download.cpython-312.pyc,,
61
+ huggingface_hub/cli/__pycache__/hf.cpython-312.pyc,,
62
+ huggingface_hub/cli/__pycache__/jobs.cpython-312.pyc,,
63
+ huggingface_hub/cli/__pycache__/lfs.cpython-312.pyc,,
64
+ huggingface_hub/cli/__pycache__/repo.cpython-312.pyc,,
65
+ huggingface_hub/cli/__pycache__/repo_files.cpython-312.pyc,,
66
+ huggingface_hub/cli/__pycache__/system.cpython-312.pyc,,
67
+ huggingface_hub/cli/__pycache__/upload.cpython-312.pyc,,
68
+ huggingface_hub/cli/__pycache__/upload_large_folder.cpython-312.pyc,,
69
+ huggingface_hub/cli/_cli_utils.py,sha256=Nt6CjbkYqQQRuh70bUXVA6rZpbZt_Sa1WqBUxjQLu6g,2095
70
+ huggingface_hub/cli/auth.py,sha256=XSsbU7-_TS5IXdASkgUCdQeoXVG82VUyGYvOS4oLLRs,7317
71
+ huggingface_hub/cli/cache.py,sha256=fQjYfbRUapeHsK10Y6w_Ixu9JKyuZyM7pJzExJGd_2c,15855
72
+ huggingface_hub/cli/download.py,sha256=8b5wqhMYg3X9tar9EEeWdPZk9um1kZTI_WgBqyiatqs,7141
73
+ huggingface_hub/cli/hf.py,sha256=SQ73_SXEQnWVJkhKT_6bwNQBHQXGOdI5qqlTTtI0XH0,2328
74
+ huggingface_hub/cli/jobs.py,sha256=eA6Q7iy_-7vjU4SjYPvn71b2aVo2qt3q-pVxLyXCWqg,44317
75
+ huggingface_hub/cli/lfs.py,sha256=J9MkKOGUW6GjBrKs2zZUCOaAGxpatxsEoSbBjuhDJV8,7230
76
+ huggingface_hub/cli/repo.py,sha256=CuOqQZ7WELLk9Raf3tnyXILt9e93OrlS8Dyxx3BqdQA,10618
77
+ huggingface_hub/cli/repo_files.py,sha256=9oeeQJx8Z0ygbTElw1o5T6dGtRbeolcXENt_ouEBvjk,4844
78
+ huggingface_hub/cli/system.py,sha256=eLSYME7ywt5Ae3tYQnS43Tai2pR2JLtA1KGImzPt5pM,1707
79
+ huggingface_hub/cli/upload.py,sha256=lOHR_JzfM2XL_pYK3Z1HlGnaAI-fw7xGY46Lccvbsy4,14362
80
+ huggingface_hub/cli/upload_large_folder.py,sha256=w4RIW0yZKTnNnhDOB6yISnIo_h_Hy13KwWVzrFzczpY,6164
81
+ huggingface_hub/commands/__init__.py,sha256=AkbM2a-iGh0Vq_xAWhK3mu3uZ44km8-X5uWjKcvcrUQ,928
82
+ huggingface_hub/commands/__pycache__/__init__.cpython-312.pyc,,
83
+ huggingface_hub/commands/__pycache__/_cli_utils.cpython-312.pyc,,
84
+ huggingface_hub/commands/__pycache__/delete_cache.cpython-312.pyc,,
85
+ huggingface_hub/commands/__pycache__/download.cpython-312.pyc,,
86
+ huggingface_hub/commands/__pycache__/env.cpython-312.pyc,,
87
+ huggingface_hub/commands/__pycache__/huggingface_cli.cpython-312.pyc,,
88
+ huggingface_hub/commands/__pycache__/lfs.cpython-312.pyc,,
89
+ huggingface_hub/commands/__pycache__/repo.cpython-312.pyc,,
90
+ huggingface_hub/commands/__pycache__/repo_files.cpython-312.pyc,,
91
+ huggingface_hub/commands/__pycache__/scan_cache.cpython-312.pyc,,
92
+ huggingface_hub/commands/__pycache__/tag.cpython-312.pyc,,
93
+ huggingface_hub/commands/__pycache__/upload.cpython-312.pyc,,
94
+ huggingface_hub/commands/__pycache__/upload_large_folder.cpython-312.pyc,,
95
+ huggingface_hub/commands/__pycache__/user.cpython-312.pyc,,
96
+ huggingface_hub/commands/__pycache__/version.cpython-312.pyc,,
97
+ huggingface_hub/commands/_cli_utils.py,sha256=ePYTIEWnU677nPvdNC5AdYcEB1400L6qYEUxMkVUzME,2329
98
+ huggingface_hub/commands/delete_cache.py,sha256=035yACUtVUIG8tEtc5vexDoFFphzdk5IXkFTlD4WMiw,17738
99
+ huggingface_hub/commands/download.py,sha256=0QY9ho7eiAPvFndBPttGtH6vXNk3r9AioltNwc8h1Z4,8310
100
+ huggingface_hub/commands/env.py,sha256=qv4SmjuzUz9exo4RDMY2HqabLCKE1oRb55cBA6LN9R4,1342
101
+ huggingface_hub/commands/huggingface_cli.py,sha256=gDi7JueyiLD0bGclTEYfHPQWpAY_WBdPfHT7vkqa5v0,2654
102
+ huggingface_hub/commands/lfs.py,sha256=xdbnNRO04UuQemEhUGT809jFgQn9Rj-SnyT_0Ph-VYg,7342
103
+ huggingface_hub/commands/repo.py,sha256=WcRDFqUYKB0Kz0zFopegiG614ot6VOYTAf6jht0BMss,6042
104
+ huggingface_hub/commands/repo_files.py,sha256=ftjLCC3XCY-AMmiYiZPIdRMmIqZbqVZw-BSjBLcZup4,5054
105
+ huggingface_hub/commands/scan_cache.py,sha256=gQlhBZgWkUzH4wrIYnvgV7CA4C7rvV2SuY0x2JCB7g0,8675
106
+ huggingface_hub/commands/tag.py,sha256=4fgQuXJHG59lTVyOjIUZjxdJDL4JZW4q10XDPSo-gss,6382
107
+ huggingface_hub/commands/upload.py,sha256=eAJIig4ljtO9FRyGjiz6HbHS-Q4MOQziRgzjQrl5Koo,14576
108
+ huggingface_hub/commands/upload_large_folder.py,sha256=_1id84BFtbL8HgFRKZ-el_uPrijamz1qWlzO16KbUAc,6254
109
+ huggingface_hub/commands/user.py,sha256=dDpi0mLYvTeYf0fhPVQyEJsn7Wrk6gWvR5YHC6RgebU,7516
110
+ huggingface_hub/commands/version.py,sha256=rGpCbvxImY9eQqXrshYt609Iws27R75WARmKQrIo6Ok,1390
111
+ huggingface_hub/community.py,sha256=exJxrySnXURAijkVOcreuwM5JAuuz2L1xTSDkd223wk,12365
112
+ huggingface_hub/constants.py,sha256=nILseAp4rqLu_KQTZDpPGOhepVAPanD7azbomAvovj0,10313
113
+ huggingface_hub/dataclasses.py,sha256=rjQfuX9MeTXZQrCQC8JvkjpARDehOiSluE7Kz1L7Ueg,17337
114
+ huggingface_hub/errors.py,sha256=HVqmnJODe1wy1cYsx7AfjrwE4DD-gdKVvMTYTBfLjpA,11265
115
+ huggingface_hub/fastai_utils.py,sha256=m7wwWk-TdhIB1CJMigAzzUBP4eLQALutEzwjWf9Ej-o,16755
116
+ huggingface_hub/file_download.py,sha256=C76FMg1Rg7401K9UpwOAnFd1UG2ko0bL9AES2mM7Ntg,79254
117
+ huggingface_hub/hf_api.py,sha256=REMm9AFgUtyizI6tkEy6glX2Aa7-TH7-uWhlhl0q0fE,487935
118
+ huggingface_hub/hf_file_system.py,sha256=uLeublBZhWd4309fE3eFHIN8G7RCrX2_6_gr0BYjuzQ,48338
119
+ huggingface_hub/hub_mixin.py,sha256=Ii3w9o7XgGbj6UNPnieW5IDfaCd8OEKpIH1hRkncRDQ,38208
120
+ huggingface_hub/inference/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
121
+ huggingface_hub/inference/__pycache__/__init__.cpython-312.pyc,,
122
+ huggingface_hub/inference/__pycache__/_client.cpython-312.pyc,,
123
+ huggingface_hub/inference/__pycache__/_common.cpython-312.pyc,,
124
+ huggingface_hub/inference/_client.py,sha256=9cAIkBFuzFC5f6jVp62MJNDSUcPqxsFluhQLi6FqXdc,157536
125
+ huggingface_hub/inference/_common.py,sha256=dI3OPg0320OOB0FRy_kqftW9F3ghEnBVA5Gi4VaSctg,15778
126
+ huggingface_hub/inference/_generated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
127
+ huggingface_hub/inference/_generated/__pycache__/__init__.cpython-312.pyc,,
128
+ huggingface_hub/inference/_generated/__pycache__/_async_client.cpython-312.pyc,,
129
+ huggingface_hub/inference/_generated/_async_client.py,sha256=DSOAXJ_TxRubPisWnVKzepXalDA7PcE-NG3oczo8iMw,163445
130
+ huggingface_hub/inference/_generated/types/__init__.py,sha256=9WvrGQ8aThtKSNzZF06j-CIE2ZuItne8FFnea1p1u38,6557
131
+ huggingface_hub/inference/_generated/types/__pycache__/__init__.cpython-312.pyc,,
132
+ huggingface_hub/inference/_generated/types/__pycache__/audio_classification.cpython-312.pyc,,
133
+ huggingface_hub/inference/_generated/types/__pycache__/audio_to_audio.cpython-312.pyc,,
134
+ huggingface_hub/inference/_generated/types/__pycache__/automatic_speech_recognition.cpython-312.pyc,,
135
+ huggingface_hub/inference/_generated/types/__pycache__/base.cpython-312.pyc,,
136
+ huggingface_hub/inference/_generated/types/__pycache__/chat_completion.cpython-312.pyc,,
137
+ huggingface_hub/inference/_generated/types/__pycache__/depth_estimation.cpython-312.pyc,,
138
+ huggingface_hub/inference/_generated/types/__pycache__/document_question_answering.cpython-312.pyc,,
139
+ huggingface_hub/inference/_generated/types/__pycache__/feature_extraction.cpython-312.pyc,,
140
+ huggingface_hub/inference/_generated/types/__pycache__/fill_mask.cpython-312.pyc,,
141
+ huggingface_hub/inference/_generated/types/__pycache__/image_classification.cpython-312.pyc,,
142
+ huggingface_hub/inference/_generated/types/__pycache__/image_segmentation.cpython-312.pyc,,
143
+ huggingface_hub/inference/_generated/types/__pycache__/image_to_image.cpython-312.pyc,,
144
+ huggingface_hub/inference/_generated/types/__pycache__/image_to_text.cpython-312.pyc,,
145
+ huggingface_hub/inference/_generated/types/__pycache__/image_to_video.cpython-312.pyc,,
146
+ huggingface_hub/inference/_generated/types/__pycache__/object_detection.cpython-312.pyc,,
147
+ huggingface_hub/inference/_generated/types/__pycache__/question_answering.cpython-312.pyc,,
148
+ huggingface_hub/inference/_generated/types/__pycache__/sentence_similarity.cpython-312.pyc,,
149
+ huggingface_hub/inference/_generated/types/__pycache__/summarization.cpython-312.pyc,,
150
+ huggingface_hub/inference/_generated/types/__pycache__/table_question_answering.cpython-312.pyc,,
151
+ huggingface_hub/inference/_generated/types/__pycache__/text2text_generation.cpython-312.pyc,,
152
+ huggingface_hub/inference/_generated/types/__pycache__/text_classification.cpython-312.pyc,,
153
+ huggingface_hub/inference/_generated/types/__pycache__/text_generation.cpython-312.pyc,,
154
+ huggingface_hub/inference/_generated/types/__pycache__/text_to_audio.cpython-312.pyc,,
155
+ huggingface_hub/inference/_generated/types/__pycache__/text_to_image.cpython-312.pyc,,
156
+ huggingface_hub/inference/_generated/types/__pycache__/text_to_speech.cpython-312.pyc,,
157
+ huggingface_hub/inference/_generated/types/__pycache__/text_to_video.cpython-312.pyc,,
158
+ huggingface_hub/inference/_generated/types/__pycache__/token_classification.cpython-312.pyc,,
159
+ huggingface_hub/inference/_generated/types/__pycache__/translation.cpython-312.pyc,,
160
+ huggingface_hub/inference/_generated/types/__pycache__/video_classification.cpython-312.pyc,,
161
+ huggingface_hub/inference/_generated/types/__pycache__/visual_question_answering.cpython-312.pyc,,
162
+ huggingface_hub/inference/_generated/types/__pycache__/zero_shot_classification.cpython-312.pyc,,
163
+ huggingface_hub/inference/_generated/types/__pycache__/zero_shot_image_classification.cpython-312.pyc,,
164
+ huggingface_hub/inference/_generated/types/__pycache__/zero_shot_object_detection.cpython-312.pyc,,
165
+ huggingface_hub/inference/_generated/types/audio_classification.py,sha256=Jg3mzfGhCSH6CfvVvgJSiFpkz6v4nNA0G4LJXacEgNc,1573
166
+ huggingface_hub/inference/_generated/types/audio_to_audio.py,sha256=2Ep4WkePL7oJwcp5nRJqApwviumGHbft9HhXE9XLHj4,891
167
+ huggingface_hub/inference/_generated/types/automatic_speech_recognition.py,sha256=8CEphr6rvRHgq1L5Md3tq14V0tEAmzJkemh1_7gSswo,5515
168
+ huggingface_hub/inference/_generated/types/base.py,sha256=4XG49q0-2SOftYQ8HXQnWLxiJktou-a7IoG3kdOv-kg,6751
169
+ huggingface_hub/inference/_generated/types/chat_completion.py,sha256=j1Y8G4g5yGs4g7N4sXWbipF8TwkQG0J-ftL9OxejkBw,11254
170
+ huggingface_hub/inference/_generated/types/depth_estimation.py,sha256=rcpe9MhYMeLjflOwBs3KMZPr6WjOH3FYEThStG-FJ3M,929
171
+ huggingface_hub/inference/_generated/types/document_question_answering.py,sha256=6BEYGwJcqGlah4RBJDAvWFTEXkO0mosBiMy82432nAM,3202
172
+ huggingface_hub/inference/_generated/types/feature_extraction.py,sha256=NMWVL_TLSG5SS5bdt1-fflkZ75UMlMKeTMtmdnUTADc,1537
173
+ huggingface_hub/inference/_generated/types/fill_mask.py,sha256=OrTgQ7Ndn0_dWK5thQhZwTOHbQni8j0iJcx9llyhRds,1708
174
+ huggingface_hub/inference/_generated/types/image_classification.py,sha256=A-Y024o8723_n8mGVos4TwdAkVL62McGeL1iIo4VzNs,1585
175
+ huggingface_hub/inference/_generated/types/image_segmentation.py,sha256=vrkI4SuP1Iq_iLXc-2pQhYY3SHN4gzvFBoZqbUHxU7o,1950
176
+ huggingface_hub/inference/_generated/types/image_to_image.py,sha256=snvGbmCdqchxGef25MceD7LSKAmVkIgnoX5t71rdlAQ,2290
177
+ huggingface_hub/inference/_generated/types/image_to_text.py,sha256=OaFEBAfgT-fOVzJ7xVermGf7VODhrc9-Jg38WrM7-2o,4810
178
+ huggingface_hub/inference/_generated/types/image_to_video.py,sha256=bC-L_cNsDhk4s_IdSiprJ9d1NeMGePLcUp7UPpco21w,2240
179
+ huggingface_hub/inference/_generated/types/object_detection.py,sha256=VuFlb1281qTXoSgJDmquGz-VNfEZLo2H0Rh_F6MF6ts,2000
180
+ huggingface_hub/inference/_generated/types/question_answering.py,sha256=zw38a9_9l2k1ifYZefjkioqZ4asfSRM9M4nU3gSCmAQ,2898
181
+ huggingface_hub/inference/_generated/types/sentence_similarity.py,sha256=w5Nj1g18eBzopZwxuDLI-fEsyaCK2KrHA5yf_XfSjgo,1052
182
+ huggingface_hub/inference/_generated/types/summarization.py,sha256=WGGr8uDLrZg8JQgF9ZMUP9euw6uZo6zwkVZ-IfvCFI0,1487
183
+ huggingface_hub/inference/_generated/types/table_question_answering.py,sha256=cJnIPA2fIbQP2Ejn7X_esY48qGWoXg30fnNOqCXiOVQ,2293
184
+ huggingface_hub/inference/_generated/types/text2text_generation.py,sha256=v-418w1JNNSZ2tuW9DUl6a36TQQCADa438A3ufvcbOw,1609
185
+ huggingface_hub/inference/_generated/types/text_classification.py,sha256=FarAjygLEfPofLfKeabzJ7PKEBItlHGoUNUOzyLRpL4,1445
186
+ huggingface_hub/inference/_generated/types/text_generation.py,sha256=28u-1zU7elk2teP3y4u1VAtDDHzY0JZ2KEEJe5d5uvg,5922
187
+ huggingface_hub/inference/_generated/types/text_to_audio.py,sha256=1HR9Q6s9MXqtKGTvHPLGVMum5-eg7O-Pgv6Nd0v8_HU,4741
188
+ huggingface_hub/inference/_generated/types/text_to_image.py,sha256=sGGi1Fa0n5Pmd6G3I-F2SBJcJ1M7Gmqnng6sfi0AVzs,1903
189
+ huggingface_hub/inference/_generated/types/text_to_speech.py,sha256=ROFuR32ijROCeqbv81Jos0lmaA8SRWyIUsWrdD4yWow,4760
190
+ huggingface_hub/inference/_generated/types/text_to_video.py,sha256=yHXVNs3t6aYO7visrBlB5cH7kjoysxF9510aofcf_18,1790
191
+ huggingface_hub/inference/_generated/types/token_classification.py,sha256=iblAcgfxXeaLYJ14NdiiCMIQuBlarUknLkXUklhvcLI,1915
192
+ huggingface_hub/inference/_generated/types/translation.py,sha256=xww4X5cfCYv_F0oINWLwqJRPCT6SV3VBAJuPjTs_j7o,1763
193
+ huggingface_hub/inference/_generated/types/video_classification.py,sha256=TyydjQw2NRLK9sDGzJUVnkDeo848ebmCx588Ur8I9q0,1680
194
+ huggingface_hub/inference/_generated/types/visual_question_answering.py,sha256=AWrQ6qo4gZa3PGedaNpzDFqx5yOYyjhnUB6iuZEj_uo,1673
195
+ huggingface_hub/inference/_generated/types/zero_shot_classification.py,sha256=BAiebPjsqoNa8EU35Dx0pfIv8W2c4GSl-TJckV1MaxQ,1738
196
+ huggingface_hub/inference/_generated/types/zero_shot_image_classification.py,sha256=8J9n6VqFARkWvPfAZNWEG70AlrMGldU95EGQQwn06zI,1487
197
+ huggingface_hub/inference/_generated/types/zero_shot_object_detection.py,sha256=GUd81LIV7oEbRWayDlAVgyLmY596r1M3AW0jXDp1yTA,1630
198
+ huggingface_hub/inference/_mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
+ huggingface_hub/inference/_mcp/__pycache__/__init__.cpython-312.pyc,,
200
+ huggingface_hub/inference/_mcp/__pycache__/_cli_hacks.cpython-312.pyc,,
201
+ huggingface_hub/inference/_mcp/__pycache__/agent.cpython-312.pyc,,
202
+ huggingface_hub/inference/_mcp/__pycache__/cli.cpython-312.pyc,,
203
+ huggingface_hub/inference/_mcp/__pycache__/constants.cpython-312.pyc,,
204
+ huggingface_hub/inference/_mcp/__pycache__/mcp_client.cpython-312.pyc,,
205
+ huggingface_hub/inference/_mcp/__pycache__/types.cpython-312.pyc,,
206
+ huggingface_hub/inference/_mcp/__pycache__/utils.cpython-312.pyc,,
207
+ huggingface_hub/inference/_mcp/_cli_hacks.py,sha256=KX9HZJPa1p8ngY3mtYGGlVUXfg4vYbbBRs-8HLToP04,3284
208
+ huggingface_hub/inference/_mcp/agent.py,sha256=jqvQwOajY41RIhCtD-XgVfuWbTouSYCQkIWJ1gHRrJQ,4262
209
+ huggingface_hub/inference/_mcp/cli.py,sha256=AmSUT6wXlE6EWmI0SfQgTWYnL07322zGwwk2yMZZlBc,9640
210
+ huggingface_hub/inference/_mcp/constants.py,sha256=kldRfaidXMdyMl_jLosaQomgWDv4shvnFe3dnQNwXSU,2511
211
+ huggingface_hub/inference/_mcp/mcp_client.py,sha256=9rcwOO7L2Ih0oGLkeY9o5gbkwEBmsDkHKf4XAmp4Mvc,16784
212
+ huggingface_hub/inference/_mcp/types.py,sha256=3gq-P_mrmvPI6KWBqjCxavtMPiGz10YXog7wg4oJYAo,941
213
+ huggingface_hub/inference/_mcp/utils.py,sha256=KFsGOC8dytS3VgaugBzibdteWasZ9CAnp83U2SyIlMw,4188
214
+ huggingface_hub/inference/_providers/__init__.py,sha256=UxPnzOdVcJgroPEatuahb4fsHaObUYPrwUCzv5ADCa4,9019
215
+ huggingface_hub/inference/_providers/__pycache__/__init__.cpython-312.pyc,,
216
+ huggingface_hub/inference/_providers/__pycache__/_common.cpython-312.pyc,,
217
+ huggingface_hub/inference/_providers/__pycache__/black_forest_labs.cpython-312.pyc,,
218
+ huggingface_hub/inference/_providers/__pycache__/cerebras.cpython-312.pyc,,
219
+ huggingface_hub/inference/_providers/__pycache__/clarifai.cpython-312.pyc,,
220
+ huggingface_hub/inference/_providers/__pycache__/cohere.cpython-312.pyc,,
221
+ huggingface_hub/inference/_providers/__pycache__/fal_ai.cpython-312.pyc,,
222
+ huggingface_hub/inference/_providers/__pycache__/featherless_ai.cpython-312.pyc,,
223
+ huggingface_hub/inference/_providers/__pycache__/fireworks_ai.cpython-312.pyc,,
224
+ huggingface_hub/inference/_providers/__pycache__/groq.cpython-312.pyc,,
225
+ huggingface_hub/inference/_providers/__pycache__/hf_inference.cpython-312.pyc,,
226
+ huggingface_hub/inference/_providers/__pycache__/hyperbolic.cpython-312.pyc,,
227
+ huggingface_hub/inference/_providers/__pycache__/nebius.cpython-312.pyc,,
228
+ huggingface_hub/inference/_providers/__pycache__/novita.cpython-312.pyc,,
229
+ huggingface_hub/inference/_providers/__pycache__/nscale.cpython-312.pyc,,
230
+ huggingface_hub/inference/_providers/__pycache__/openai.cpython-312.pyc,,
231
+ huggingface_hub/inference/_providers/__pycache__/publicai.cpython-312.pyc,,
232
+ huggingface_hub/inference/_providers/__pycache__/replicate.cpython-312.pyc,,
233
+ huggingface_hub/inference/_providers/__pycache__/sambanova.cpython-312.pyc,,
234
+ huggingface_hub/inference/_providers/__pycache__/scaleway.cpython-312.pyc,,
235
+ huggingface_hub/inference/_providers/__pycache__/together.cpython-312.pyc,,
236
+ huggingface_hub/inference/_providers/__pycache__/zai_org.cpython-312.pyc,,
237
+ huggingface_hub/inference/_providers/_common.py,sha256=brZJ1CUxDKooPdmVlm4cuKjvaW_refVY0Y7CbGQe7e4,12373
238
+ huggingface_hub/inference/_providers/black_forest_labs.py,sha256=FIukZoIFt_FDrTTDfpF-Vko5sXnmH0QvVIsMtV2Jzm8,2852
239
+ huggingface_hub/inference/_providers/cerebras.py,sha256=QOJ-1U-os7uE7p6eUnn_P_APq-yQhx28be7c3Tq2EuA,210
240
+ huggingface_hub/inference/_providers/clarifai.py,sha256=1cEXQwhGk4DRKiPCQUa5y-L6okTo4781EImQC8yJVOw,380
241
+ huggingface_hub/inference/_providers/cohere.py,sha256=O3tC-qIUL91mx_mE8bOHCtDWcQuKOUauhUoXSUBUCZ8,1253
242
+ huggingface_hub/inference/_providers/fal_ai.py,sha256=pCr5qP6R1W1CrEw-_nKdNuP3UqsUi58yL18w4r7mXRo,9989
243
+ huggingface_hub/inference/_providers/featherless_ai.py,sha256=QxBz-32O4PztxixrIjrfKuTOzvfqyUi-cVsw0Hf_zlY,1382
244
+ huggingface_hub/inference/_providers/fireworks_ai.py,sha256=Id226ITfPkOcFMFzly3MW9l-dZl9l4qizL4JEHWkBFk,1215
245
+ huggingface_hub/inference/_providers/groq.py,sha256=JTk2JV4ZOlaohho7zLAFQtk92kGVsPmLJ1hmzcwsqvQ,315
246
+ huggingface_hub/inference/_providers/hf_inference.py,sha256=0yi3cR-EJ4HYx3mSzOsMOTVmvVBkaajTzTfKB8JXQpk,9540
247
+ huggingface_hub/inference/_providers/hyperbolic.py,sha256=OQIBi2j3aNvuaSQ8BUK1K1PVeRXdrxc80G-6YmBa-ns,1985
248
+ huggingface_hub/inference/_providers/nebius.py,sha256=VJpTF2JZ58rznc9wxdk-57vwF8sV2vESw_WkXjXqCho,3580
249
+ huggingface_hub/inference/_providers/novita.py,sha256=HGVC8wPraRQUuI5uBoye1Y4Wqe4X116B71GhhbWy5yM,2514
250
+ huggingface_hub/inference/_providers/nscale.py,sha256=qWUsWinQmUbNUqehyKn34tVoWehu8gd-OZ2F4uj2SWM,1802
251
+ huggingface_hub/inference/_providers/openai.py,sha256=GCVYeNdjWIgpQQ7E_Xv8IebmdhTi0S6WfFosz3nLtps,1089
252
+ huggingface_hub/inference/_providers/publicai.py,sha256=1I2W6rORloB5QHSvky4njZO2XKLTwA-kPdNoauoT5rg,210
253
+ huggingface_hub/inference/_providers/replicate.py,sha256=otVfPkfBtlWrpjQub4V__t7g_w8Ewc7ZU3efiOauW-I,3820
254
+ huggingface_hub/inference/_providers/sambanova.py,sha256=Unt3H3jr_kgI9vzRjmmW1DFyoEuPkKCcgIIloiOj3j8,2037
255
+ huggingface_hub/inference/_providers/scaleway.py,sha256=Jy81kXWbXCHBpx6xmyzdEfXGSyhUfjKOLHuDSvhHWGo,1209
256
+ huggingface_hub/inference/_providers/together.py,sha256=KHF19CS3qXS7G1-CwcMiD8Z5wzPKEKi4F2DzqAthbBE,3439
257
+ huggingface_hub/inference/_providers/zai_org.py,sha256=plGzMZuLrChZvgpS3CCPqI6ImotZZxNLgfxnR7v6tw8,646
258
+ huggingface_hub/inference_api.py,sha256=b4-NhPSn9b44nYKV8tDKXodmE4JVdEymMWL4CVGkzlE,8323
259
+ huggingface_hub/keras_mixin.py,sha256=gDm8PBcTqYhfrEvhu1_ptxzxbVOF3h0wAArn90UyzRA,19547
260
+ huggingface_hub/lfs.py,sha256=v0mTThnULTmFv8MVWfrkQEwkiFXzWWx7xyp2VLf-EPo,17020
261
+ huggingface_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
262
+ huggingface_hub/repocard.py,sha256=8tmR7SYVQZ4iBFYCmOj0yl6Ohc9Vv136s-KQKkxBq7U,34865
263
+ huggingface_hub/repocard_data.py,sha256=hr4ReFpEQMNdh_9Dx-L-IJoI1ElHyk-h-8ZRqwVYYOE,34082
264
+ huggingface_hub/repository.py,sha256=axZcbAh4ufXEaMgPbrS1WWgvshd-mFvYnRZAZ_yYljQ,54541
265
+ huggingface_hub/serialization/__init__.py,sha256=kn-Fa-m4FzMnN8lNsF-SwFcfzug4CucexybGKyvZ8S0,1041
266
+ huggingface_hub/serialization/__pycache__/__init__.cpython-312.pyc,,
267
+ huggingface_hub/serialization/__pycache__/_base.cpython-312.pyc,,
268
+ huggingface_hub/serialization/__pycache__/_dduf.cpython-312.pyc,,
269
+ huggingface_hub/serialization/__pycache__/_tensorflow.cpython-312.pyc,,
270
+ huggingface_hub/serialization/__pycache__/_torch.cpython-312.pyc,,
271
+ huggingface_hub/serialization/_base.py,sha256=VGQ4Z9Abg2gsL_1rTGSS9p-3tkkG9eaERjlzBTLGkdU,8109
272
+ huggingface_hub/serialization/_dduf.py,sha256=s42239rLiHwaJE36QDEmS5GH7DSmQ__BffiHJO5RjIg,15424
273
+ huggingface_hub/serialization/_tensorflow.py,sha256=Ea3wN1bKgyb_9opj-FtH-WpIp0ptkovKimroZOudX5c,3608
274
+ huggingface_hub/serialization/_torch.py,sha256=dw3RMkr0CYAr_TwPG_rma-ueHBRTXpfEJtrVKAvvtN4,45143
275
+ huggingface_hub/templates/datasetcard_template.md,sha256=W-EMqR6wndbrnZorkVv56URWPG49l7MATGeI015kTvs,5503
276
+ huggingface_hub/templates/modelcard_template.md,sha256=4AqArS3cqdtbit5Bo-DhjcnDFR-pza5hErLLTPM4Yuc,6870
277
+ huggingface_hub/utils/__init__.py,sha256=ORfVkn5D0wuLIq12jjhTzn5_c4F8fRPxB7TG-iednuQ,3722
278
+ huggingface_hub/utils/__pycache__/__init__.cpython-312.pyc,,
279
+ huggingface_hub/utils/__pycache__/_auth.cpython-312.pyc,,
280
+ huggingface_hub/utils/__pycache__/_cache_assets.cpython-312.pyc,,
281
+ huggingface_hub/utils/__pycache__/_cache_manager.cpython-312.pyc,,
282
+ huggingface_hub/utils/__pycache__/_chunk_utils.cpython-312.pyc,,
283
+ huggingface_hub/utils/__pycache__/_datetime.cpython-312.pyc,,
284
+ huggingface_hub/utils/__pycache__/_deprecation.cpython-312.pyc,,
285
+ huggingface_hub/utils/__pycache__/_dotenv.cpython-312.pyc,,
286
+ huggingface_hub/utils/__pycache__/_experimental.cpython-312.pyc,,
287
+ huggingface_hub/utils/__pycache__/_fixes.cpython-312.pyc,,
288
+ huggingface_hub/utils/__pycache__/_git_credential.cpython-312.pyc,,
289
+ huggingface_hub/utils/__pycache__/_headers.cpython-312.pyc,,
290
+ huggingface_hub/utils/__pycache__/_hf_folder.cpython-312.pyc,,
291
+ huggingface_hub/utils/__pycache__/_http.cpython-312.pyc,,
292
+ huggingface_hub/utils/__pycache__/_lfs.cpython-312.pyc,,
293
+ huggingface_hub/utils/__pycache__/_pagination.cpython-312.pyc,,
294
+ huggingface_hub/utils/__pycache__/_paths.cpython-312.pyc,,
295
+ huggingface_hub/utils/__pycache__/_runtime.cpython-312.pyc,,
296
+ huggingface_hub/utils/__pycache__/_safetensors.cpython-312.pyc,,
297
+ huggingface_hub/utils/__pycache__/_subprocess.cpython-312.pyc,,
298
+ huggingface_hub/utils/__pycache__/_telemetry.cpython-312.pyc,,
299
+ huggingface_hub/utils/__pycache__/_typing.cpython-312.pyc,,
300
+ huggingface_hub/utils/__pycache__/_validators.cpython-312.pyc,,
301
+ huggingface_hub/utils/__pycache__/_xet.cpython-312.pyc,,
302
+ huggingface_hub/utils/__pycache__/_xet_progress_reporting.cpython-312.pyc,,
303
+ huggingface_hub/utils/__pycache__/endpoint_helpers.cpython-312.pyc,,
304
+ huggingface_hub/utils/__pycache__/insecure_hashlib.cpython-312.pyc,,
305
+ huggingface_hub/utils/__pycache__/logging.cpython-312.pyc,,
306
+ huggingface_hub/utils/__pycache__/sha.cpython-312.pyc,,
307
+ huggingface_hub/utils/__pycache__/tqdm.cpython-312.pyc,,
308
+ huggingface_hub/utils/_auth.py,sha256=Ixve2vxdftHXXk2R2vfyLzlVoDT39Tkq-Hrou9KCUvw,8286
309
+ huggingface_hub/utils/_cache_assets.py,sha256=kai77HPQMfYpROouMBQCr_gdBCaeTm996Sqj0dExbNg,5728
310
+ huggingface_hub/utils/_cache_manager.py,sha256=XbeYoZMj8_JCl6eqRviHO6DxGSS29r5Pj38xLlao96Y,34364
311
+ huggingface_hub/utils/_chunk_utils.py,sha256=MH7-6FwCDZ8noV6dGRytCOJGSfcZmDBvsvVotdI8TvQ,2109
312
+ huggingface_hub/utils/_datetime.py,sha256=kCS5jaKV25kOncX1xujbXsz5iDLcjLcLw85semGNzxQ,2770
313
+ huggingface_hub/utils/_deprecation.py,sha256=HZhRGGUX_QMKBBBwHHlffLtmCSK01TOpeXHefZbPfwI,4872
314
+ huggingface_hub/utils/_dotenv.py,sha256=RzHqC8HgzVxE-N4DFBcnemvX0NHmXcV0My2ASK0U1OQ,2017
315
+ huggingface_hub/utils/_experimental.py,sha256=3-c8irbn9sJr2CwWbzhGkIrdXKg8_x7BifhHFy32ei8,2470
316
+ huggingface_hub/utils/_fixes.py,sha256=xQV1QkUn2WpLqLjtXNiyn9gh-454K6AF-Q3kwkYAQD8,4437
317
+ huggingface_hub/utils/_git_credential.py,sha256=ao9rq-rVHn8lghSVZEjDAX4kIkNi7bayY361TDSgSpg,4619
318
+ huggingface_hub/utils/_headers.py,sha256=w4ayq4hLGaZ3B7nwdEi5Zu23SmmDuOwv58It78wkakk,8868
319
+ huggingface_hub/utils/_hf_folder.py,sha256=WNjTnu0Q7tqcSS9EsP4ssCJrrJMcCvAt8P_-LEtmOU8,2487
320
+ huggingface_hub/utils/_http.py,sha256=Cx8MxnXVvlOfg1w30RR03KcFSoIE0WjV1ZX2svwWmx4,25671
321
+ huggingface_hub/utils/_lfs.py,sha256=EC0Oz6Wiwl8foRNkUOzrETXzAWlbgpnpxo5a410ovFY,3957
322
+ huggingface_hub/utils/_pagination.py,sha256=EX5tRasSuQDaKbXuGYbInBK2odnSWNHgzw2tSgqeBRI,1906
323
+ huggingface_hub/utils/_paths.py,sha256=w1ZhFmmD5ykWjp_hAvhjtOoa2ZUcOXJrF4a6O3QpAWo,5042
324
+ huggingface_hub/utils/_runtime.py,sha256=L7SOYezdxKcwd4DovAY0UGY3qt27toXO-QjceIDwExk,11634
325
+ huggingface_hub/utils/_safetensors.py,sha256=GW3nyv7xQcuwObKYeYoT9VhURVzG1DZTbKBKho8Bbos,4458
326
+ huggingface_hub/utils/_subprocess.py,sha256=u9FFUDE7TrzQTiuEzlUnHx7S2P57GbYRV8u16GJwrFw,4625
327
+ huggingface_hub/utils/_telemetry.py,sha256=54LXeIJU5pEGghPAh06gqNAR-UoxOjVLvKqAQscwqZs,4890
328
+ huggingface_hub/utils/_typing.py,sha256=z-134-HG_qJc0cjdSXkmDm3vIRyF5aEfbZgJCB_Qp2Y,3628
329
+ huggingface_hub/utils/_validators.py,sha256=u8AacmA9xCCyer8efmzl1EpQUWTe3zVzsWSJSv3uxTU,9190
330
+ huggingface_hub/utils/_xet.py,sha256=f8qfk8YKePAeGUL6lQiQ1w_3bcs78oWwbeACYdUeg5k,7312
331
+ huggingface_hub/utils/_xet_progress_reporting.py,sha256=JK64hv8orABfNnk1_Wd0YyD_5FfeyVeBvelKpjaNIvs,6169
332
+ huggingface_hub/utils/endpoint_helpers.py,sha256=9VtIAlxQ5H_4y30sjCAgbu7XCqAtNLC7aRYxaNn0hLI,2366
333
+ huggingface_hub/utils/insecure_hashlib.py,sha256=iAaepavFZ5Dhfa5n8KozRfQprKmvcjSnt3X58OUl9fQ,1142
334
+ huggingface_hub/utils/logging.py,sha256=N6NXaCcbPbZSF-Oe-TY3ZnmkpmdFVyTOV8ASo-yVXLE,4916
335
+ huggingface_hub/utils/sha.py,sha256=OFnNGCba0sNcT2gUwaVCJnldxlltrHHe0DS_PCpV3C4,2134
336
+ huggingface_hub/utils/tqdm.py,sha256=xAKcyfnNHsZ7L09WuEM5Ew5-MDhiahLACbbN2zMmcLs,10671
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/REQUESTED ADDED
File without changes
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/WHEEL ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (79.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/entry_points.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [console_scripts]
2
+ hf = huggingface_hub.cli.hf:main
3
+ huggingface-cli = huggingface_hub.commands.huggingface_cli:main
4
+ tiny-agents = huggingface_hub.inference._mcp.cli:app
5
+
6
+ [fsspec.specs]
7
+ hf = huggingface_hub.HfFileSystem
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub-0.36.2.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ huggingface_hub
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/debug.pxi ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @cython.final
2
+ @cython.internal
3
+ cdef class _MemDebug:
4
+ """Debugging support for the memory allocation in libxml2.
5
+ """
6
+ def bytes_used(self):
7
+ """bytes_used(self)
8
+
9
+ Returns the total amount of memory (in bytes) currently used by libxml2.
10
+ Note that libxml2 constrains this value to a C int, which limits
11
+ the accuracy on 64 bit systems.
12
+ """
13
+ return tree.xmlMemUsed()
14
+
15
+ def blocks_used(self):
16
+ """blocks_used(self)
17
+
18
+ Returns the total number of memory blocks currently allocated by libxml2.
19
+ Note that libxml2 constrains this value to a C int, which limits
20
+ the accuracy on 64 bit systems.
21
+ """
22
+ return tree.xmlMemBlocks()
23
+
24
+ def dict_size(self):
25
+ """dict_size(self)
26
+
27
+ Returns the current size of the global name dictionary used by libxml2
28
+ for the current thread. Each thread has its own dictionary.
29
+ """
30
+ c_dict = __GLOBAL_PARSER_CONTEXT._getThreadDict(NULL)
31
+ if c_dict is NULL:
32
+ raise MemoryError()
33
+ return tree.xmlDictSize(c_dict)
34
+
35
+
36
+ memory_debugger = _MemDebug()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/docloader.pxi ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Custom resolver API
2
+
3
+ ctypedef enum _InputDocumentDataType:
4
+ PARSER_DATA_INVALID
5
+ PARSER_DATA_EMPTY
6
+ PARSER_DATA_STRING
7
+ PARSER_DATA_FILENAME
8
+ PARSER_DATA_FILE
9
+
10
+ @cython.final
11
+ @cython.internal
12
+ cdef class _InputDocument:
13
+ cdef _InputDocumentDataType _type
14
+ cdef bytes _data_bytes
15
+ cdef object _filename
16
+ cdef object _file
17
+ cdef bint _close_file
18
+
19
+ def __cinit__(self):
20
+ self._type = PARSER_DATA_INVALID
21
+
22
+
23
+ cdef class Resolver:
24
+ "This is the base class of all resolvers."
25
+ def resolve(self, system_url, public_id, context):
26
+ """resolve(self, system_url, public_id, context)
27
+
28
+ Override this method to resolve an external source by
29
+ ``system_url`` and ``public_id``. The third argument is an
30
+ opaque context object.
31
+
32
+ Return the result of one of the ``resolve_*()`` methods.
33
+ """
34
+ return None
35
+
36
+ def resolve_empty(self, context):
37
+ """resolve_empty(self, context)
38
+
39
+ Return an empty input document.
40
+
41
+ Pass context as parameter.
42
+ """
43
+ cdef _InputDocument doc_ref
44
+ doc_ref = _InputDocument()
45
+ doc_ref._type = PARSER_DATA_EMPTY
46
+ return doc_ref
47
+
48
+ def resolve_string(self, string, context, *, base_url=None):
49
+ """resolve_string(self, string, context, base_url=None)
50
+
51
+ Return a parsable string as input document.
52
+
53
+ Pass data string and context as parameters. You can pass the
54
+ source URL or filename through the ``base_url`` keyword
55
+ argument.
56
+ """
57
+ cdef _InputDocument doc_ref
58
+ if isinstance(string, unicode):
59
+ string = (<unicode>string).encode('utf8')
60
+ elif not isinstance(string, bytes):
61
+ raise TypeError, "argument must be a byte string or unicode string"
62
+ doc_ref = _InputDocument()
63
+ doc_ref._type = PARSER_DATA_STRING
64
+ doc_ref._data_bytes = string
65
+ if base_url is not None:
66
+ doc_ref._filename = _encodeFilename(base_url)
67
+ return doc_ref
68
+
69
+ def resolve_filename(self, filename, context):
70
+ """resolve_filename(self, filename, context)
71
+
72
+ Return the name of a parsable file as input document.
73
+
74
+ Pass filename and context as parameters. You can also pass a
75
+ URL with an HTTP, FTP or file target.
76
+ """
77
+ cdef _InputDocument doc_ref
78
+ doc_ref = _InputDocument()
79
+ doc_ref._type = PARSER_DATA_FILENAME
80
+ doc_ref._filename = _encodeFilename(filename)
81
+ return doc_ref
82
+
83
+ def resolve_file(self, f, context, *, base_url=None, bint close=True):
84
+ """resolve_file(self, f, context, base_url=None, close=True)
85
+
86
+ Return an open file-like object as input document.
87
+
88
+ Pass open file and context as parameters. You can pass the
89
+ base URL or filename of the file through the ``base_url``
90
+ keyword argument. If the ``close`` flag is True (the
91
+ default), the file will be closed after reading.
92
+
93
+ Note that using ``.resolve_filename()`` is more efficient,
94
+ especially in threaded environments.
95
+ """
96
+ cdef _InputDocument doc_ref
97
+ try:
98
+ f.read
99
+ except AttributeError:
100
+ raise TypeError, "Argument is not a file-like object"
101
+ doc_ref = _InputDocument()
102
+ doc_ref._type = PARSER_DATA_FILE
103
+ if base_url is not None:
104
+ doc_ref._filename = _encodeFilename(base_url)
105
+ else:
106
+ doc_ref._filename = _getFilenameForFile(f)
107
+ doc_ref._close_file = close
108
+ doc_ref._file = f
109
+ return doc_ref
110
+
111
+ @cython.final
112
+ @cython.internal
113
+ cdef class _ResolverRegistry:
114
+ cdef object _resolvers
115
+ cdef Resolver _default_resolver
116
+ def __cinit__(self, Resolver default_resolver=None):
117
+ self._resolvers = set()
118
+ self._default_resolver = default_resolver
119
+
120
+ def add(self, Resolver resolver not None):
121
+ """add(self, resolver)
122
+
123
+ Register a resolver.
124
+
125
+ For each requested entity, the 'resolve' method of the resolver will
126
+ be called and the result will be passed to the parser. If this method
127
+ returns None, the request will be delegated to other resolvers or the
128
+ default resolver. The resolvers will be tested in an arbitrary order
129
+ until the first match is found.
130
+ """
131
+ self._resolvers.add(resolver)
132
+
133
+ def remove(self, resolver):
134
+ "remove(self, resolver)"
135
+ self._resolvers.discard(resolver)
136
+
137
+ cdef _ResolverRegistry _copy(self):
138
+ cdef _ResolverRegistry registry
139
+ registry = _ResolverRegistry(self._default_resolver)
140
+ registry._resolvers = self._resolvers.copy()
141
+ return registry
142
+
143
+ def copy(self):
144
+ "copy(self)"
145
+ return self._copy()
146
+
147
+ def resolve(self, system_url, public_id, context):
148
+ "resolve(self, system_url, public_id, context)"
149
+ for resolver in self._resolvers:
150
+ result = resolver.resolve(system_url, public_id, context)
151
+ if result is not None:
152
+ return result
153
+ if self._default_resolver is None:
154
+ return None
155
+ return self._default_resolver.resolve(system_url, public_id, context)
156
+
157
+ def __repr__(self):
158
+ return repr(self._resolvers)
159
+
160
+
161
+ @cython.internal
162
+ cdef class _ResolverContext(_ExceptionContext):
163
+ cdef _ResolverRegistry _resolvers
164
+ cdef _TempStore _storage
165
+
166
+ cdef int clear(self) except -1:
167
+ _ExceptionContext.clear(self)
168
+ self._storage.clear()
169
+ return 0
170
+
171
+
172
+ cdef _initResolverContext(_ResolverContext context,
173
+ _ResolverRegistry resolvers):
174
+ if resolvers is None:
175
+ context._resolvers = _ResolverRegistry()
176
+ else:
177
+ context._resolvers = resolvers
178
+ context._storage = _TempStore()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/dtd.pxi ADDED
@@ -0,0 +1,479 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # support for DTD validation
2
+ from lxml.includes cimport dtdvalid
3
+
4
+ cdef class DTDError(LxmlError):
5
+ """Base class for DTD errors.
6
+ """
7
+
8
+ cdef class DTDParseError(DTDError):
9
+ """Error while parsing a DTD.
10
+ """
11
+
12
+ cdef class DTDValidateError(DTDError):
13
+ """Error while validating an XML document with a DTD.
14
+ """
15
+
16
+
17
+ cdef inline int _assertValidDTDNode(node, void *c_node) except -1:
18
+ assert c_node is not NULL, "invalid DTD proxy at %s" % id(node)
19
+
20
+
21
+ @cython.final
22
+ @cython.internal
23
+ @cython.freelist(8)
24
+ cdef class _DTDElementContentDecl:
25
+ cdef DTD _dtd
26
+ cdef tree.xmlElementContent* _c_node
27
+
28
+ def __repr__(self):
29
+ return "<%s.%s object name=%r type=%r occur=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.type, self.occur, id(self))
30
+
31
+ @property
32
+ def name(self):
33
+ _assertValidDTDNode(self, self._c_node)
34
+ return funicodeOrNone(self._c_node.name)
35
+
36
+ @property
37
+ def type(self):
38
+ _assertValidDTDNode(self, self._c_node)
39
+ cdef int type = self._c_node.type
40
+ if type == tree.XML_ELEMENT_CONTENT_PCDATA:
41
+ return "pcdata"
42
+ elif type == tree.XML_ELEMENT_CONTENT_ELEMENT:
43
+ return "element"
44
+ elif type == tree.XML_ELEMENT_CONTENT_SEQ:
45
+ return "seq"
46
+ elif type == tree.XML_ELEMENT_CONTENT_OR:
47
+ return "or"
48
+ else:
49
+ return None
50
+
51
+ @property
52
+ def occur(self):
53
+ _assertValidDTDNode(self, self._c_node)
54
+ cdef int occur = self._c_node.ocur
55
+ if occur == tree.XML_ELEMENT_CONTENT_ONCE:
56
+ return "once"
57
+ elif occur == tree.XML_ELEMENT_CONTENT_OPT:
58
+ return "opt"
59
+ elif occur == tree.XML_ELEMENT_CONTENT_MULT:
60
+ return "mult"
61
+ elif occur == tree.XML_ELEMENT_CONTENT_PLUS:
62
+ return "plus"
63
+ else:
64
+ return None
65
+
66
+ @property
67
+ def left(self):
68
+ _assertValidDTDNode(self, self._c_node)
69
+ c1 = self._c_node.c1
70
+ if c1:
71
+ node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
72
+ node._dtd = self._dtd
73
+ node._c_node = <tree.xmlElementContent*>c1
74
+ return node
75
+ else:
76
+ return None
77
+
78
+ @property
79
+ def right(self):
80
+ _assertValidDTDNode(self, self._c_node)
81
+ c2 = self._c_node.c2
82
+ if c2:
83
+ node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
84
+ node._dtd = self._dtd
85
+ node._c_node = <tree.xmlElementContent*>c2
86
+ return node
87
+ else:
88
+ return None
89
+
90
+
91
+ @cython.final
92
+ @cython.internal
93
+ @cython.freelist(8)
94
+ cdef class _DTDAttributeDecl:
95
+ cdef DTD _dtd
96
+ cdef tree.xmlAttribute* _c_node
97
+
98
+ def __repr__(self):
99
+ return "<%s.%s object name=%r elemname=%r prefix=%r type=%r default=%r default_value=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.elemname, self.prefix, self.type, self.default, self.default_value, id(self))
100
+
101
+ @property
102
+ def name(self):
103
+ _assertValidDTDNode(self, self._c_node)
104
+ return funicodeOrNone(self._c_node.name)
105
+
106
+ @property
107
+ def elemname(self):
108
+ _assertValidDTDNode(self, self._c_node)
109
+ return funicodeOrNone(self._c_node.elem)
110
+
111
+ @property
112
+ def prefix(self):
113
+ _assertValidDTDNode(self, self._c_node)
114
+ return funicodeOrNone(self._c_node.prefix)
115
+
116
+ @property
117
+ def type(self):
118
+ _assertValidDTDNode(self, self._c_node)
119
+ cdef int type = self._c_node.atype
120
+ if type == tree.XML_ATTRIBUTE_CDATA:
121
+ return "cdata"
122
+ elif type == tree.XML_ATTRIBUTE_ID:
123
+ return "id"
124
+ elif type == tree.XML_ATTRIBUTE_IDREF:
125
+ return "idref"
126
+ elif type == tree.XML_ATTRIBUTE_IDREFS:
127
+ return "idrefs"
128
+ elif type == tree.XML_ATTRIBUTE_ENTITY:
129
+ return "entity"
130
+ elif type == tree.XML_ATTRIBUTE_ENTITIES:
131
+ return "entities"
132
+ elif type == tree.XML_ATTRIBUTE_NMTOKEN:
133
+ return "nmtoken"
134
+ elif type == tree.XML_ATTRIBUTE_NMTOKENS:
135
+ return "nmtokens"
136
+ elif type == tree.XML_ATTRIBUTE_ENUMERATION:
137
+ return "enumeration"
138
+ elif type == tree.XML_ATTRIBUTE_NOTATION:
139
+ return "notation"
140
+ else:
141
+ return None
142
+
143
+ @property
144
+ def default(self):
145
+ _assertValidDTDNode(self, self._c_node)
146
+ cdef int default = self._c_node.def_
147
+ if default == tree.XML_ATTRIBUTE_NONE:
148
+ return "none"
149
+ elif default == tree.XML_ATTRIBUTE_REQUIRED:
150
+ return "required"
151
+ elif default == tree.XML_ATTRIBUTE_IMPLIED:
152
+ return "implied"
153
+ elif default == tree.XML_ATTRIBUTE_FIXED:
154
+ return "fixed"
155
+ else:
156
+ return None
157
+
158
+ @property
159
+ def default_value(self):
160
+ _assertValidDTDNode(self, self._c_node)
161
+ return funicodeOrNone(self._c_node.defaultValue)
162
+
163
+ def itervalues(self):
164
+ _assertValidDTDNode(self, self._c_node)
165
+ cdef tree.xmlEnumeration *c_node = self._c_node.tree
166
+ while c_node is not NULL:
167
+ yield funicode(c_node.name)
168
+ c_node = c_node.next
169
+
170
+ def values(self):
171
+ return list(self.itervalues())
172
+
173
+
174
+ @cython.final
175
+ @cython.internal
176
+ @cython.freelist(8)
177
+ cdef class _DTDElementDecl:
178
+ cdef DTD _dtd
179
+ cdef tree.xmlElement* _c_node
180
+
181
+ def __repr__(self):
182
+ return "<%s.%s object name=%r prefix=%r type=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.prefix, self.type, id(self))
183
+
184
+ @property
185
+ def name(self):
186
+ _assertValidDTDNode(self, self._c_node)
187
+ return funicodeOrNone(self._c_node.name)
188
+
189
+ @property
190
+ def prefix(self):
191
+ _assertValidDTDNode(self, self._c_node)
192
+ return funicodeOrNone(self._c_node.prefix)
193
+
194
+ @property
195
+ def type(self):
196
+ _assertValidDTDNode(self, self._c_node)
197
+ cdef int type = self._c_node.etype
198
+ if type == tree.XML_ELEMENT_TYPE_UNDEFINED:
199
+ return "undefined"
200
+ elif type == tree.XML_ELEMENT_TYPE_EMPTY:
201
+ return "empty"
202
+ elif type == tree.XML_ELEMENT_TYPE_ANY:
203
+ return "any"
204
+ elif type == tree.XML_ELEMENT_TYPE_MIXED:
205
+ return "mixed"
206
+ elif type == tree.XML_ELEMENT_TYPE_ELEMENT:
207
+ return "element"
208
+ else:
209
+ return None
210
+
211
+ @property
212
+ def content(self):
213
+ _assertValidDTDNode(self, self._c_node)
214
+ cdef tree.xmlElementContent *content = self._c_node.content
215
+ if content:
216
+ node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
217
+ node._dtd = self._dtd
218
+ node._c_node = content
219
+ return node
220
+ else:
221
+ return None
222
+
223
+ def iterattributes(self):
224
+ _assertValidDTDNode(self, self._c_node)
225
+ cdef tree.xmlAttribute *c_node = self._c_node.attributes
226
+ while c_node:
227
+ node = <_DTDAttributeDecl>_DTDAttributeDecl.__new__(_DTDAttributeDecl)
228
+ node._dtd = self._dtd
229
+ node._c_node = c_node
230
+ yield node
231
+ c_node = c_node.nexth
232
+
233
+ def attributes(self):
234
+ return list(self.iterattributes())
235
+
236
+
237
+ @cython.final
238
+ @cython.internal
239
+ @cython.freelist(8)
240
+ cdef class _DTDEntityDecl:
241
+ cdef DTD _dtd
242
+ cdef tree.xmlEntity* _c_node
243
+ def __repr__(self):
244
+ return "<%s.%s object name=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
245
+
246
+ @property
247
+ def name(self):
248
+ _assertValidDTDNode(self, self._c_node)
249
+ return funicodeOrNone(self._c_node.name)
250
+
251
+ @property
252
+ def orig(self):
253
+ _assertValidDTDNode(self, self._c_node)
254
+ return funicodeOrNone(self._c_node.orig)
255
+
256
+ @property
257
+ def content(self):
258
+ _assertValidDTDNode(self, self._c_node)
259
+ return funicodeOrNone(self._c_node.content)
260
+
261
+ @property
262
+ def system_url(self):
263
+ _assertValidDTDNode(self, self._c_node)
264
+ return funicodeOrNone(self._c_node.SystemID)
265
+
266
+
267
+ ################################################################################
268
+ # DTD
269
+
270
+ cdef class DTD(_Validator):
271
+ """DTD(self, file=None, external_id=None)
272
+ A DTD validator.
273
+
274
+ Can load from filesystem directly given a filename or file-like object.
275
+ Alternatively, pass the keyword parameter ``external_id`` to load from a
276
+ catalog.
277
+ """
278
+ cdef tree.xmlDtd* _c_dtd
279
+ def __init__(self, file=None, *, external_id=None):
280
+ _Validator.__init__(self)
281
+ if file is not None:
282
+ file = _getFSPathOrObject(file)
283
+ if _isString(file):
284
+ file = _encodeFilename(file)
285
+ with self._error_log:
286
+ orig_loader = _register_document_loader()
287
+ self._c_dtd = xmlparser.xmlParseDTD(NULL, _xcstr(file))
288
+ _reset_document_loader(orig_loader)
289
+ elif hasattr(file, 'read'):
290
+ orig_loader = _register_document_loader()
291
+ self._c_dtd = _parseDtdFromFilelike(file)
292
+ _reset_document_loader(orig_loader)
293
+ else:
294
+ raise DTDParseError, "file must be a filename, file-like or path-like object"
295
+ elif external_id is not None:
296
+ external_id_utf = _utf8(external_id)
297
+ with self._error_log:
298
+ orig_loader = _register_document_loader()
299
+ self._c_dtd = xmlparser.xmlParseDTD(<const_xmlChar*>external_id_utf, NULL)
300
+ _reset_document_loader(orig_loader)
301
+ else:
302
+ raise DTDParseError, "either filename or external ID required"
303
+
304
+ if self._c_dtd is NULL:
305
+ raise DTDParseError(
306
+ self._error_log._buildExceptionMessage("error parsing DTD"),
307
+ self._error_log)
308
+
309
+ @property
310
+ def name(self):
311
+ if self._c_dtd is NULL:
312
+ return None
313
+ return funicodeOrNone(self._c_dtd.name)
314
+
315
+ @property
316
+ def external_id(self):
317
+ if self._c_dtd is NULL:
318
+ return None
319
+ return funicodeOrNone(self._c_dtd.ExternalID)
320
+
321
+ @property
322
+ def system_url(self):
323
+ if self._c_dtd is NULL:
324
+ return None
325
+ return funicodeOrNone(self._c_dtd.SystemID)
326
+
327
+ def iterelements(self):
328
+ cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
329
+ while c_node is not NULL:
330
+ if c_node.type == tree.XML_ELEMENT_DECL:
331
+ node = _DTDElementDecl()
332
+ node._dtd = self
333
+ node._c_node = <tree.xmlElement*>c_node
334
+ yield node
335
+ c_node = c_node.next
336
+
337
+ def elements(self):
338
+ return list(self.iterelements())
339
+
340
+ def iterentities(self):
341
+ cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
342
+ while c_node is not NULL:
343
+ if c_node.type == tree.XML_ENTITY_DECL:
344
+ node = _DTDEntityDecl()
345
+ node._dtd = self
346
+ node._c_node = <tree.xmlEntity*>c_node
347
+ yield node
348
+ c_node = c_node.next
349
+
350
+ def entities(self):
351
+ return list(self.iterentities())
352
+
353
+ def __dealloc__(self):
354
+ tree.xmlFreeDtd(self._c_dtd)
355
+
356
+ def __call__(self, etree):
357
+ """__call__(self, etree)
358
+
359
+ Validate doc using the DTD.
360
+
361
+ Returns true if the document is valid, false if not.
362
+ """
363
+ cdef _Document doc
364
+ cdef _Element root_node
365
+ cdef xmlDoc* c_doc
366
+ cdef dtdvalid.xmlValidCtxt* valid_ctxt
367
+ cdef int ret = -1
368
+
369
+ assert self._c_dtd is not NULL, "DTD not initialised"
370
+ doc = _documentOrRaise(etree)
371
+ root_node = _rootNodeOrRaise(etree)
372
+
373
+ valid_ctxt = dtdvalid.xmlNewValidCtxt()
374
+ if valid_ctxt is NULL:
375
+ raise DTDError("Failed to create validation context")
376
+
377
+ # work around error reporting bug in libxml2 <= 2.9.1 (and later?)
378
+ # https://bugzilla.gnome.org/show_bug.cgi?id=724903
379
+ valid_ctxt.error = <dtdvalid.xmlValidityErrorFunc>_nullGenericErrorFunc
380
+ valid_ctxt.userData = NULL
381
+
382
+ try:
383
+ with self._error_log:
384
+ c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
385
+ ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
386
+ _destroyFakeDoc(doc._c_doc, c_doc)
387
+ finally:
388
+ dtdvalid.xmlFreeValidCtxt(valid_ctxt)
389
+
390
+ if ret == -1:
391
+ raise DTDValidateError("Internal error in DTD validation",
392
+ self._error_log)
393
+ return ret == 1
394
+
395
+
396
+ cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL:
397
+ cdef _ExceptionContext exc_context
398
+ cdef _FileReaderContext dtd_parser
399
+ cdef _ErrorLog error_log
400
+ cdef tree.xmlDtd* c_dtd = NULL
401
+ exc_context = _ExceptionContext()
402
+ dtd_parser = _FileReaderContext(file, exc_context, None)
403
+ error_log = _ErrorLog()
404
+
405
+ with error_log:
406
+ c_dtd = dtd_parser._readDtd()
407
+
408
+ exc_context._raise_if_stored()
409
+ if c_dtd is NULL:
410
+ raise DTDParseError("error parsing DTD", error_log)
411
+ return c_dtd
412
+
413
+ cdef DTD _dtdFactory(tree.xmlDtd* c_dtd):
414
+ # do not run through DTD.__init__()!
415
+ cdef DTD dtd
416
+ if c_dtd is NULL:
417
+ return None
418
+ dtd = DTD.__new__(DTD)
419
+ dtd._c_dtd = _copyDtd(c_dtd)
420
+ _Validator.__init__(dtd)
421
+ return dtd
422
+
423
+
424
+ cdef tree.xmlDtd* _copyDtd(tree.xmlDtd* c_orig_dtd) except NULL:
425
+ """
426
+ Copy a DTD. libxml2 (currently) fails to set up the element->attributes
427
+ links when copying DTDs, so we have to rebuild them here.
428
+ """
429
+ c_dtd = tree.xmlCopyDtd(c_orig_dtd)
430
+ if not c_dtd:
431
+ raise MemoryError
432
+ cdef tree.xmlNode* c_node = c_dtd.children
433
+ while c_node:
434
+ if c_node.type == tree.XML_ATTRIBUTE_DECL:
435
+ _linkDtdAttribute(c_dtd, <tree.xmlAttribute*>c_node)
436
+ c_node = c_node.next
437
+ return c_dtd
438
+
439
+
440
+ cdef void _linkDtdAttribute(tree.xmlDtd* c_dtd, tree.xmlAttribute* c_attr) noexcept:
441
+ """
442
+ Create the link to the DTD attribute declaration from the corresponding
443
+ element declaration.
444
+ """
445
+ c_elem = dtdvalid.xmlGetDtdElementDesc(c_dtd, c_attr.elem)
446
+ if not c_elem:
447
+ # no such element? something is wrong with the DTD ...
448
+ return
449
+ c_pos = c_elem.attributes
450
+ if not c_pos:
451
+ c_elem.attributes = c_attr
452
+ c_attr.nexth = NULL
453
+ return
454
+ # libxml2 keeps namespace declarations first, and we need to make
455
+ # sure we don't re-insert attributes that are already there
456
+ if _isDtdNsDecl(c_attr):
457
+ if not _isDtdNsDecl(c_pos):
458
+ c_elem.attributes = c_attr
459
+ c_attr.nexth = c_pos
460
+ return
461
+ while c_pos != c_attr and c_pos.nexth and _isDtdNsDecl(c_pos.nexth):
462
+ c_pos = c_pos.nexth
463
+ else:
464
+ # append at end
465
+ while c_pos != c_attr and c_pos.nexth:
466
+ c_pos = c_pos.nexth
467
+ if c_pos == c_attr:
468
+ return
469
+ c_attr.nexth = c_pos.nexth
470
+ c_pos.nexth = c_attr
471
+
472
+
473
+ cdef bint _isDtdNsDecl(tree.xmlAttribute* c_attr) noexcept:
474
+ if cstring_h.strcmp(<const_char*>c_attr.name, "xmlns") == 0:
475
+ return True
476
+ if (c_attr.prefix is not NULL and
477
+ cstring_h.strcmp(<const_char*>c_attr.prefix, "xmlns") == 0):
478
+ return True
479
+ return False
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/iterparse.pxi ADDED
@@ -0,0 +1,438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # iterparse -- event-driven parsing
2
+
3
+ DEF __ITERPARSE_CHUNK_SIZE = 32768
4
+
5
+ cdef class iterparse:
6
+ """iterparse(self, source, events=("end",), tag=None, \
7
+ attribute_defaults=False, dtd_validation=False, \
8
+ load_dtd=False, no_network=True, remove_blank_text=False, \
9
+ remove_comments=False, remove_pis=False, encoding=None, \
10
+ html=False, recover=None, huge_tree=False, schema=None)
11
+
12
+ Incremental parser.
13
+
14
+ Parses XML into a tree and generates tuples (event, element) in a
15
+ SAX-like fashion. ``event`` is any of 'start', 'end', 'start-ns',
16
+ 'end-ns'.
17
+
18
+ For 'start' and 'end', ``element`` is the Element that the parser just
19
+ found opening or closing. For 'start-ns', it is a tuple (prefix, URI) of
20
+ a new namespace declaration. For 'end-ns', it is simply None. Note that
21
+ all start and end events are guaranteed to be properly nested.
22
+
23
+ The keyword argument ``events`` specifies a sequence of event type names
24
+ that should be generated. By default, only 'end' events will be
25
+ generated.
26
+
27
+ The additional ``tag`` argument restricts the 'start' and 'end' events to
28
+ those elements that match the given tag. The ``tag`` argument can also be
29
+ a sequence of tags to allow matching more than one tag. By default,
30
+ events are generated for all elements. Note that the 'start-ns' and
31
+ 'end-ns' events are not impacted by this restriction.
32
+
33
+ The other keyword arguments in the constructor are mainly based on the
34
+ libxml2 parser configuration. A DTD will also be loaded if validation or
35
+ attribute default values are requested.
36
+
37
+ Available boolean keyword arguments:
38
+ - attribute_defaults: read default attributes from DTD
39
+ - dtd_validation: validate (if DTD is available)
40
+ - load_dtd: use DTD for parsing
41
+ - no_network: prevent network access for related files
42
+ - remove_blank_text: discard blank text nodes
43
+ - remove_comments: discard comments
44
+ - remove_pis: discard processing instructions
45
+ - strip_cdata: replace CDATA sections by normal text content (default:
46
+ True for XML, ignored otherwise)
47
+ - compact: safe memory for short text content (default: True)
48
+ - resolve_entities: replace entities by their text value (default: True)
49
+ - huge_tree: disable security restrictions and support very deep trees
50
+ and very long text content (only affects libxml2 2.7+)
51
+ - html: parse input as HTML (default: XML)
52
+ - recover: try hard to parse through broken input (default: True for HTML,
53
+ False otherwise)
54
+
55
+ Other keyword arguments:
56
+ - encoding: override the document encoding
57
+ - schema: an XMLSchema to validate against
58
+ """
59
+ cdef _FeedParser _parser
60
+ cdef object _tag
61
+ cdef object _events
62
+ cdef readonly object root
63
+ cdef object _source
64
+ cdef object _filename
65
+ cdef object _error
66
+ cdef bint _close_source_after_read
67
+
68
+ def __init__(self, source, events=("end",), *, tag=None,
69
+ attribute_defaults=False, dtd_validation=False,
70
+ load_dtd=False, no_network=True, remove_blank_text=False,
71
+ compact=True, resolve_entities=True, remove_comments=False,
72
+ remove_pis=False, strip_cdata=True, encoding=None,
73
+ html=False, recover=None, huge_tree=False, collect_ids=True,
74
+ XMLSchema schema=None):
75
+ if not hasattr(source, 'read'):
76
+ source = _getFSPathOrObject(source)
77
+ self._filename = source
78
+ self._source = open(source, 'rb')
79
+ self._close_source_after_read = True
80
+ else:
81
+ self._filename = _getFilenameForFile(source)
82
+ self._source = source
83
+ self._close_source_after_read = False
84
+
85
+ if recover is None:
86
+ recover = html
87
+
88
+ if html:
89
+ # make sure we're not looking for namespaces
90
+ events = [event for event in events
91
+ if event not in ('start-ns', 'end-ns')]
92
+ parser = HTMLPullParser(
93
+ events,
94
+ tag=tag,
95
+ recover=recover,
96
+ base_url=self._filename,
97
+ encoding=encoding,
98
+ remove_blank_text=remove_blank_text,
99
+ remove_comments=remove_comments,
100
+ remove_pis=remove_pis,
101
+ no_network=no_network,
102
+ target=None, # TODO
103
+ schema=schema,
104
+ compact=compact)
105
+ else:
106
+ parser = XMLPullParser(
107
+ events,
108
+ tag=tag,
109
+ recover=recover,
110
+ base_url=self._filename,
111
+ encoding=encoding,
112
+ attribute_defaults=attribute_defaults,
113
+ dtd_validation=dtd_validation,
114
+ load_dtd=load_dtd,
115
+ no_network=no_network,
116
+ schema=schema,
117
+ huge_tree=huge_tree,
118
+ remove_blank_text=remove_blank_text,
119
+ resolve_entities=resolve_entities,
120
+ remove_comments=remove_comments,
121
+ remove_pis=remove_pis,
122
+ strip_cdata=strip_cdata,
123
+ collect_ids=True,
124
+ target=None, # TODO
125
+ compact=compact)
126
+
127
+ self._events = parser.read_events()
128
+ self._parser = parser
129
+
130
+ @property
131
+ def error_log(self):
132
+ """The error log of the last (or current) parser run.
133
+ """
134
+ return self._parser.feed_error_log
135
+
136
+ @property
137
+ def resolvers(self):
138
+ """The custom resolver registry of the last (or current) parser run.
139
+ """
140
+ return self._parser.resolvers
141
+
142
+ @property
143
+ def version(self):
144
+ """The version of the underlying XML parser."""
145
+ return self._parser.version
146
+
147
+ def set_element_class_lookup(self, ElementClassLookup lookup = None):
148
+ """set_element_class_lookup(self, lookup = None)
149
+
150
+ Set a lookup scheme for element classes generated from this parser.
151
+
152
+ Reset it by passing None or nothing.
153
+ """
154
+ self._parser.set_element_class_lookup(lookup)
155
+
156
+ def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
157
+ """makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
158
+
159
+ Creates a new element associated with this parser.
160
+ """
161
+ self._parser.makeelement(
162
+ _tag, attrib=None, nsmap=None, **_extra)
163
+
164
+ @cython.final
165
+ cdef _close_source(self):
166
+ if self._source is None:
167
+ return
168
+ if not self._close_source_after_read:
169
+ self._source = None
170
+ return
171
+ try:
172
+ close = self._source.close
173
+ except AttributeError:
174
+ close = None
175
+ finally:
176
+ self._source = None
177
+ if close is not None:
178
+ close()
179
+
180
+ def __iter__(self):
181
+ return self
182
+
183
+ def __next__(self):
184
+ try:
185
+ return next(self._events)
186
+ except StopIteration:
187
+ pass
188
+ context = <_SaxParserContext>self._parser._getPushParserContext()
189
+ if self._source is not None:
190
+ done = False
191
+ while not done:
192
+ try:
193
+ done = self._read_more_events(context)
194
+ return next(self._events)
195
+ except StopIteration:
196
+ pass # no events yet
197
+ except Exception as e:
198
+ self._error = e
199
+ self._close_source()
200
+ try:
201
+ return next(self._events)
202
+ except StopIteration:
203
+ break
204
+ # nothing left to read or return
205
+ if self._error is not None:
206
+ error = self._error
207
+ self._error = None
208
+ raise error
209
+ if (context._validator is not None
210
+ and not context._validator.isvalid()):
211
+ _raiseParseError(context._c_ctxt, self._filename,
212
+ context._error_log)
213
+ # no errors => all done
214
+ raise StopIteration
215
+
216
+ @cython.final
217
+ cdef bint _read_more_events(self, _SaxParserContext context) except -123:
218
+ data = self._source.read(__ITERPARSE_CHUNK_SIZE)
219
+ if not isinstance(data, bytes):
220
+ self._close_source()
221
+ raise TypeError("reading file objects must return bytes objects")
222
+ if not data:
223
+ try:
224
+ self.root = self._parser.close()
225
+ finally:
226
+ self._close_source()
227
+ return True
228
+ self._parser.feed(data)
229
+ return False
230
+
231
+
232
+ cdef enum _IterwalkSkipStates:
233
+ IWSKIP_NEXT_IS_START
234
+ IWSKIP_SKIP_NEXT
235
+ IWSKIP_CAN_SKIP
236
+ IWSKIP_CANNOT_SKIP
237
+
238
+
239
+ cdef class iterwalk:
240
+ """iterwalk(self, element_or_tree, events=("end",), tag=None)
241
+
242
+ A tree walker that generates events from an existing tree as if it
243
+ was parsing XML data with ``iterparse()``.
244
+
245
+ Just as for ``iterparse()``, the ``tag`` argument can be a single tag or a
246
+ sequence of tags.
247
+
248
+ After receiving a 'start' or 'start-ns' event, the children and
249
+ descendants of the current element can be excluded from iteration
250
+ by calling the ``skip_subtree()`` method.
251
+ """
252
+ cdef _MultiTagMatcher _matcher
253
+ cdef list _node_stack
254
+ cdef list _events
255
+ cdef object _pop_event
256
+ cdef object _include_siblings
257
+ cdef int _index
258
+ cdef int _event_filter
259
+ cdef _IterwalkSkipStates _skip_state
260
+
261
+ def __init__(self, element_or_tree, events=("end",), tag=None):
262
+ cdef _Element root
263
+ cdef int ns_count
264
+ root = _rootNodeOrRaise(element_or_tree)
265
+ self._event_filter = _buildParseEventFilter(events)
266
+ if tag is None or tag == '*':
267
+ self._matcher = None
268
+ else:
269
+ self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
270
+ self._node_stack = []
271
+ self._events = []
272
+ self._pop_event = self._events.pop
273
+ self._skip_state = IWSKIP_CANNOT_SKIP # ignore all skip requests by default
274
+
275
+ if self._event_filter:
276
+ self._index = 0
277
+ if self._matcher is not None and self._event_filter & PARSE_EVENT_FILTER_START:
278
+ self._matcher.cacheTags(root._doc)
279
+
280
+ # When processing an ElementTree, add events for the preceding comments/PIs.
281
+ if self._event_filter & (PARSE_EVENT_FILTER_COMMENT | PARSE_EVENT_FILTER_PI):
282
+ if isinstance(element_or_tree, _ElementTree):
283
+ self._include_siblings = root
284
+ for elem in list(root.itersiblings(preceding=True))[::-1]:
285
+ if self._event_filter & PARSE_EVENT_FILTER_COMMENT and elem.tag is Comment:
286
+ self._events.append(('comment', elem))
287
+ elif self._event_filter & PARSE_EVENT_FILTER_PI and elem.tag is PI:
288
+ self._events.append(('pi', elem))
289
+
290
+ ns_count = self._start_node(root)
291
+ self._node_stack.append( (root, ns_count) )
292
+ else:
293
+ self._index = -1
294
+
295
+ def __iter__(self):
296
+ return self
297
+
298
+ def __next__(self):
299
+ cdef xmlNode* c_child
300
+ cdef _Element node
301
+ cdef _Element next_node
302
+ cdef int ns_count = 0
303
+ if self._events:
304
+ return self._next_event()
305
+ if self._matcher is not None and self._index >= 0:
306
+ node = self._node_stack[self._index][0]
307
+ self._matcher.cacheTags(node._doc)
308
+
309
+ # find next node
310
+ while self._index >= 0:
311
+ node = self._node_stack[self._index][0]
312
+
313
+ if self._skip_state == IWSKIP_SKIP_NEXT:
314
+ c_child = NULL
315
+ else:
316
+ c_child = self._process_non_elements(
317
+ node._doc, _findChildForwards(node._c_node, 0))
318
+ self._skip_state = IWSKIP_CANNOT_SKIP
319
+
320
+ while c_child is NULL:
321
+ # back off through parents
322
+ self._index -= 1
323
+ node = self._end_node()
324
+ if self._index < 0:
325
+ break
326
+ c_child = self._process_non_elements(
327
+ node._doc, _nextElement(node._c_node))
328
+
329
+ if c_child is not NULL:
330
+ next_node = _elementFactory(node._doc, c_child)
331
+ if self._event_filter & (PARSE_EVENT_FILTER_START |
332
+ PARSE_EVENT_FILTER_START_NS):
333
+ ns_count = self._start_node(next_node)
334
+ elif self._event_filter & PARSE_EVENT_FILTER_END_NS:
335
+ ns_count = _countNsDefs(next_node._c_node)
336
+ self._node_stack.append( (next_node, ns_count) )
337
+ self._index += 1
338
+ if self._events:
339
+ return self._next_event()
340
+
341
+ if self._include_siblings is not None:
342
+ node, self._include_siblings = self._include_siblings, None
343
+ self._process_non_elements(node._doc, _nextElement(node._c_node))
344
+ if self._events:
345
+ return self._next_event()
346
+
347
+ raise StopIteration
348
+
349
+ @cython.final
350
+ cdef xmlNode* _process_non_elements(self, _Document doc, xmlNode* c_node):
351
+ while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE:
352
+ if c_node.type == tree.XML_COMMENT_NODE:
353
+ if self._event_filter & PARSE_EVENT_FILTER_COMMENT:
354
+ self._events.append(
355
+ ("comment", _elementFactory(doc, c_node)))
356
+ c_node = _nextElement(c_node)
357
+ elif c_node.type == tree.XML_PI_NODE:
358
+ if self._event_filter & PARSE_EVENT_FILTER_PI:
359
+ self._events.append(
360
+ ("pi", _elementFactory(doc, c_node)))
361
+ c_node = _nextElement(c_node)
362
+ else:
363
+ break
364
+ return c_node
365
+
366
+ @cython.final
367
+ cdef _next_event(self):
368
+ if self._skip_state == IWSKIP_NEXT_IS_START:
369
+ if self._events[0][0] in ('start', 'start-ns'):
370
+ self._skip_state = IWSKIP_CAN_SKIP
371
+ return self._pop_event(0)
372
+
373
+ def skip_subtree(self):
374
+ """Prevent descending into the current subtree.
375
+ Instead, the next returned event will be the 'end' event of the current element
376
+ (if included), ignoring any children or descendants.
377
+
378
+ This has no effect right after an 'end' or 'end-ns' event.
379
+ """
380
+ if self._skip_state == IWSKIP_CAN_SKIP:
381
+ self._skip_state = IWSKIP_SKIP_NEXT
382
+
383
+ @cython.final
384
+ cdef int _start_node(self, _Element node) except -1:
385
+ cdef int ns_count
386
+ if self._event_filter & PARSE_EVENT_FILTER_START_NS:
387
+ ns_count = _appendStartNsEvents(node._c_node, self._events)
388
+ if self._events:
389
+ self._skip_state = IWSKIP_NEXT_IS_START
390
+ elif self._event_filter & PARSE_EVENT_FILTER_END_NS:
391
+ ns_count = _countNsDefs(node._c_node)
392
+ else:
393
+ ns_count = 0
394
+ if self._event_filter & PARSE_EVENT_FILTER_START:
395
+ if self._matcher is None or self._matcher.matches(node._c_node):
396
+ self._events.append( ("start", node) )
397
+ self._skip_state = IWSKIP_NEXT_IS_START
398
+ return ns_count
399
+
400
+ @cython.final
401
+ cdef _Element _end_node(self):
402
+ cdef _Element node
403
+ cdef int i, ns_count
404
+ node, ns_count = self._node_stack.pop()
405
+ if self._event_filter & PARSE_EVENT_FILTER_END:
406
+ if self._matcher is None or self._matcher.matches(node._c_node):
407
+ self._events.append( ("end", node) )
408
+ if self._event_filter & PARSE_EVENT_FILTER_END_NS and ns_count:
409
+ event = ("end-ns", None)
410
+ for i in range(ns_count):
411
+ self._events.append(event)
412
+ return node
413
+
414
+
415
+ cdef int _countNsDefs(xmlNode* c_node) noexcept:
416
+ cdef xmlNs* c_ns
417
+ cdef int count
418
+ count = 0
419
+ c_ns = c_node.nsDef
420
+ while c_ns is not NULL:
421
+ count += (c_ns.href is not NULL)
422
+ c_ns = c_ns.next
423
+ return count
424
+
425
+
426
+ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
427
+ cdef xmlNs* c_ns
428
+ cdef int count
429
+ count = 0
430
+ c_ns = c_node.nsDef
431
+ while c_ns is not NULL:
432
+ if c_ns.href:
433
+ ns_tuple = (funicodeOrEmpty(c_ns.prefix),
434
+ funicode(c_ns.href))
435
+ event_list.append( ("start-ns", ns_tuple) )
436
+ count += 1
437
+ c_ns = c_ns.next
438
+ return count
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/lxml.etree_api.h ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Generated by Cython 3.1.4 */
2
+
3
+ #ifndef __PYX_HAVE_API__lxml__etree
4
+ #define __PYX_HAVE_API__lxml__etree
5
+ #ifdef __MINGW64__
6
+ #define MS_WIN64
7
+ #endif
8
+ #include "Python.h"
9
+ #include "lxml.etree.h"
10
+
11
+ static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument)(struct LxmlDocument *, xmlNode *) = 0;
12
+ #define deepcopyNodeToDocument __pyx_api_f_4lxml_5etree_deepcopyNodeToDocument
13
+ static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_elementTreeFactory)(struct LxmlElement *) = 0;
14
+ #define elementTreeFactory __pyx_api_f_4lxml_5etree_elementTreeFactory
15
+ static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_newElementTree)(struct LxmlElement *, PyObject *) = 0;
16
+ #define newElementTree __pyx_api_f_4lxml_5etree_newElementTree
17
+ static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_adoptExternalDocument)(xmlDoc *, PyObject *, int) = 0;
18
+ #define adoptExternalDocument __pyx_api_f_4lxml_5etree_adoptExternalDocument
19
+ static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_elementFactory)(struct LxmlDocument *, xmlNode *) = 0;
20
+ #define elementFactory __pyx_api_f_4lxml_5etree_elementFactory
21
+ static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeElement)(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
22
+ #define makeElement __pyx_api_f_4lxml_5etree_makeElement
23
+ static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeSubElement)(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
24
+ #define makeSubElement __pyx_api_f_4lxml_5etree_makeSubElement
25
+ static void (*__pyx_api_f_4lxml_5etree_setElementClassLookupFunction)(_element_class_lookup_function, PyObject *) = 0;
26
+ #define setElementClassLookupFunction __pyx_api_f_4lxml_5etree_setElementClassLookupFunction
27
+ static PyObject *(*__pyx_api_f_4lxml_5etree_lookupDefaultElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
28
+ #define lookupDefaultElementClass __pyx_api_f_4lxml_5etree_lookupDefaultElementClass
29
+ static PyObject *(*__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
30
+ #define lookupNamespaceElementClass __pyx_api_f_4lxml_5etree_lookupNamespaceElementClass
31
+ static PyObject *(*__pyx_api_f_4lxml_5etree_callLookupFallback)(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *) = 0;
32
+ #define callLookupFallback __pyx_api_f_4lxml_5etree_callLookupFallback
33
+ static int (*__pyx_api_f_4lxml_5etree_tagMatches)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
34
+ #define tagMatches __pyx_api_f_4lxml_5etree_tagMatches
35
+ static struct LxmlDocument *(*__pyx_api_f_4lxml_5etree_documentOrRaise)(PyObject *) = 0;
36
+ #define documentOrRaise __pyx_api_f_4lxml_5etree_documentOrRaise
37
+ static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_rootNodeOrRaise)(PyObject *) = 0;
38
+ #define rootNodeOrRaise __pyx_api_f_4lxml_5etree_rootNodeOrRaise
39
+ static int (*__pyx_api_f_4lxml_5etree_hasText)(xmlNode *) = 0;
40
+ #define hasText __pyx_api_f_4lxml_5etree_hasText
41
+ static int (*__pyx_api_f_4lxml_5etree_hasTail)(xmlNode *) = 0;
42
+ #define hasTail __pyx_api_f_4lxml_5etree_hasTail
43
+ static PyObject *(*__pyx_api_f_4lxml_5etree_textOf)(xmlNode *) = 0;
44
+ #define textOf __pyx_api_f_4lxml_5etree_textOf
45
+ static PyObject *(*__pyx_api_f_4lxml_5etree_tailOf)(xmlNode *) = 0;
46
+ #define tailOf __pyx_api_f_4lxml_5etree_tailOf
47
+ static int (*__pyx_api_f_4lxml_5etree_setNodeText)(xmlNode *, PyObject *) = 0;
48
+ #define setNodeText __pyx_api_f_4lxml_5etree_setNodeText
49
+ static int (*__pyx_api_f_4lxml_5etree_setTailText)(xmlNode *, PyObject *) = 0;
50
+ #define setTailText __pyx_api_f_4lxml_5etree_setTailText
51
+ static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValue)(xmlNode *, xmlAttr *) = 0;
52
+ #define attributeValue __pyx_api_f_4lxml_5etree_attributeValue
53
+ static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValueFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
54
+ #define attributeValueFromNsName __pyx_api_f_4lxml_5etree_attributeValueFromNsName
55
+ static PyObject *(*__pyx_api_f_4lxml_5etree_getAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
56
+ #define getAttributeValue __pyx_api_f_4lxml_5etree_getAttributeValue
57
+ static PyObject *(*__pyx_api_f_4lxml_5etree_iterattributes)(struct LxmlElement *, int) = 0;
58
+ #define iterattributes __pyx_api_f_4lxml_5etree_iterattributes
59
+ static PyObject *(*__pyx_api_f_4lxml_5etree_collectAttributes)(xmlNode *, int) = 0;
60
+ #define collectAttributes __pyx_api_f_4lxml_5etree_collectAttributes
61
+ static int (*__pyx_api_f_4lxml_5etree_setAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
62
+ #define setAttributeValue __pyx_api_f_4lxml_5etree_setAttributeValue
63
+ static int (*__pyx_api_f_4lxml_5etree_delAttribute)(struct LxmlElement *, PyObject *) = 0;
64
+ #define delAttribute __pyx_api_f_4lxml_5etree_delAttribute
65
+ static int (*__pyx_api_f_4lxml_5etree_delAttributeFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
66
+ #define delAttributeFromNsName __pyx_api_f_4lxml_5etree_delAttributeFromNsName
67
+ static int (*__pyx_api_f_4lxml_5etree_hasChild)(xmlNode *) = 0;
68
+ #define hasChild __pyx_api_f_4lxml_5etree_hasChild
69
+ static xmlNode *(*__pyx_api_f_4lxml_5etree_findChild)(xmlNode *, Py_ssize_t) = 0;
70
+ #define findChild __pyx_api_f_4lxml_5etree_findChild
71
+ static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildForwards)(xmlNode *, Py_ssize_t) = 0;
72
+ #define findChildForwards __pyx_api_f_4lxml_5etree_findChildForwards
73
+ static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildBackwards)(xmlNode *, Py_ssize_t) = 0;
74
+ #define findChildBackwards __pyx_api_f_4lxml_5etree_findChildBackwards
75
+ static xmlNode *(*__pyx_api_f_4lxml_5etree_nextElement)(xmlNode *) = 0;
76
+ #define nextElement __pyx_api_f_4lxml_5etree_nextElement
77
+ static xmlNode *(*__pyx_api_f_4lxml_5etree_previousElement)(xmlNode *) = 0;
78
+ #define previousElement __pyx_api_f_4lxml_5etree_previousElement
79
+ static void (*__pyx_api_f_4lxml_5etree_appendChild)(struct LxmlElement *, struct LxmlElement *) = 0;
80
+ #define appendChild __pyx_api_f_4lxml_5etree_appendChild
81
+ static int (*__pyx_api_f_4lxml_5etree_appendChildToElement)(struct LxmlElement *, struct LxmlElement *) = 0;
82
+ #define appendChildToElement __pyx_api_f_4lxml_5etree_appendChildToElement
83
+ static PyObject *(*__pyx_api_f_4lxml_5etree_pyunicode)(const xmlChar *) = 0;
84
+ #define pyunicode __pyx_api_f_4lxml_5etree_pyunicode
85
+ static PyObject *(*__pyx_api_f_4lxml_5etree_utf8)(PyObject *) = 0;
86
+ #define utf8 __pyx_api_f_4lxml_5etree_utf8
87
+ static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTag)(PyObject *) = 0;
88
+ #define getNsTag __pyx_api_f_4lxml_5etree_getNsTag
89
+ static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs)(PyObject *) = 0;
90
+ #define getNsTagWithEmptyNs __pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs
91
+ static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedName)(xmlNode *) = 0;
92
+ #define namespacedName __pyx_api_f_4lxml_5etree_namespacedName
93
+ static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedNameFromNsName)(const xmlChar *, const xmlChar *) = 0;
94
+ #define namespacedNameFromNsName __pyx_api_f_4lxml_5etree_namespacedNameFromNsName
95
+ static void (*__pyx_api_f_4lxml_5etree_iteratorStoreNext)(struct LxmlElementIterator *, struct LxmlElement *) = 0;
96
+ #define iteratorStoreNext __pyx_api_f_4lxml_5etree_iteratorStoreNext
97
+ static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatcher *, PyObject *) = 0;
98
+ #define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch
99
+ static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0;
100
+ #define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix
101
+ static int __Pyx_ImportFunction_3_1_4(PyObject *module, const char *funcname, void (**f)(void), const char *sig);
102
+
103
+ #ifndef __PYX_HAVE_RT_ImportFunction_3_1_4
104
+ #define __PYX_HAVE_RT_ImportFunction_3_1_4
105
+ static int __Pyx_ImportFunction_3_1_4(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
106
+ PyObject *d = 0;
107
+ PyObject *cobj = 0;
108
+ union {
109
+ void (*fp)(void);
110
+ void *p;
111
+ } tmp;
112
+ d = PyObject_GetAttrString(module, "__pyx_capi__");
113
+ if (!d)
114
+ goto bad;
115
+ #if (defined(Py_LIMITED_API) && Py_LIMITED_API >= 0x030d0000) || (!defined(Py_LIMITED_API) && PY_VERSION_HEX >= 0x030d0000)
116
+ PyDict_GetItemStringRef(d, funcname, &cobj);
117
+ #else
118
+ cobj = PyDict_GetItemString(d, funcname);
119
+ Py_XINCREF(cobj);
120
+ #endif
121
+ if (!cobj) {
122
+ PyErr_Format(PyExc_ImportError,
123
+ "%.200s does not export expected C function %.200s",
124
+ PyModule_GetName(module), funcname);
125
+ goto bad;
126
+ }
127
+ if (!PyCapsule_IsValid(cobj, sig)) {
128
+ PyErr_Format(PyExc_TypeError,
129
+ "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
130
+ PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
131
+ goto bad;
132
+ }
133
+ tmp.p = PyCapsule_GetPointer(cobj, sig);
134
+ *f = tmp.fp;
135
+ if (!(*f))
136
+ goto bad;
137
+ Py_DECREF(d);
138
+ Py_DECREF(cobj);
139
+ return 0;
140
+ bad:
141
+ Py_XDECREF(d);
142
+ Py_XDECREF(cobj);
143
+ return -1;
144
+ }
145
+ #endif
146
+
147
+
148
+ static int import_lxml__etree(void) {
149
+ PyObject *module = 0;
150
+ module = PyImport_ImportModule("lxml.etree");
151
+ if (!module) goto bad;
152
+ if (__Pyx_ImportFunction_3_1_4(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
153
+ if (__Pyx_ImportFunction_3_1_4(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad;
154
+ if (__Pyx_ImportFunction_3_1_4(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad;
155
+ if (__Pyx_ImportFunction_3_1_4(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad;
156
+ if (__Pyx_ImportFunction_3_1_4(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
157
+ if (__Pyx_ImportFunction_3_1_4(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
158
+ if (__Pyx_ImportFunction_3_1_4(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
159
+ if (__Pyx_ImportFunction_3_1_4(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad;
160
+ if (__Pyx_ImportFunction_3_1_4(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
161
+ if (__Pyx_ImportFunction_3_1_4(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
162
+ if (__Pyx_ImportFunction_3_1_4(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad;
163
+ if (__Pyx_ImportFunction_3_1_4(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
164
+ if (__Pyx_ImportFunction_3_1_4(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad;
165
+ if (__Pyx_ImportFunction_3_1_4(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad;
166
+ if (__Pyx_ImportFunction_3_1_4(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad;
167
+ if (__Pyx_ImportFunction_3_1_4(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad;
168
+ if (__Pyx_ImportFunction_3_1_4(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad;
169
+ if (__Pyx_ImportFunction_3_1_4(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad;
170
+ if (__Pyx_ImportFunction_3_1_4(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad;
171
+ if (__Pyx_ImportFunction_3_1_4(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad;
172
+ if (__Pyx_ImportFunction_3_1_4(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad;
173
+ if (__Pyx_ImportFunction_3_1_4(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
174
+ if (__Pyx_ImportFunction_3_1_4(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
175
+ if (__Pyx_ImportFunction_3_1_4(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad;
176
+ if (__Pyx_ImportFunction_3_1_4(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad;
177
+ if (__Pyx_ImportFunction_3_1_4(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
178
+ if (__Pyx_ImportFunction_3_1_4(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad;
179
+ if (__Pyx_ImportFunction_3_1_4(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
180
+ if (__Pyx_ImportFunction_3_1_4(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad;
181
+ if (__Pyx_ImportFunction_3_1_4(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
182
+ if (__Pyx_ImportFunction_3_1_4(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
183
+ if (__Pyx_ImportFunction_3_1_4(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
184
+ if (__Pyx_ImportFunction_3_1_4(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad;
185
+ if (__Pyx_ImportFunction_3_1_4(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad;
186
+ if (__Pyx_ImportFunction_3_1_4(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
187
+ if (__Pyx_ImportFunction_3_1_4(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
188
+ if (__Pyx_ImportFunction_3_1_4(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad;
189
+ if (__Pyx_ImportFunction_3_1_4(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad;
190
+ if (__Pyx_ImportFunction_3_1_4(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad;
191
+ if (__Pyx_ImportFunction_3_1_4(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad;
192
+ if (__Pyx_ImportFunction_3_1_4(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad;
193
+ if (__Pyx_ImportFunction_3_1_4(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad;
194
+ if (__Pyx_ImportFunction_3_1_4(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad;
195
+ if (__Pyx_ImportFunction_3_1_4(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad;
196
+ if (__Pyx_ImportFunction_3_1_4(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
197
+ Py_DECREF(module); module = 0;
198
+ return 0;
199
+ bad:
200
+ Py_XDECREF(module);
201
+ return -1;
202
+ }
203
+
204
+ #endif /* !__PYX_HAVE_API__lxml__etree */
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/nsclasses.pxi ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # module-level API for namespace implementations
2
+
3
+ cdef class LxmlRegistryError(LxmlError):
4
+ """Base class of lxml registry errors.
5
+ """
6
+
7
+ cdef class NamespaceRegistryError(LxmlRegistryError):
8
+ """Error registering a namespace extension.
9
+ """
10
+
11
+
12
+ @cython.internal
13
+ cdef class _NamespaceRegistry:
14
+ "Dictionary-like namespace registry"
15
+ cdef object _ns_uri
16
+ cdef bytes _ns_uri_utf
17
+ cdef dict _entries
18
+ cdef char* _c_ns_uri_utf
19
+ def __cinit__(self, ns_uri):
20
+ self._ns_uri = ns_uri
21
+ if ns_uri is None:
22
+ self._ns_uri_utf = None
23
+ self._c_ns_uri_utf = NULL
24
+ else:
25
+ self._ns_uri_utf = _utf8(ns_uri)
26
+ self._c_ns_uri_utf = _cstr(self._ns_uri_utf)
27
+ self._entries = {}
28
+
29
+ def update(self, class_dict_iterable):
30
+ """update(self, class_dict_iterable)
31
+
32
+ Forgivingly update the registry.
33
+
34
+ ``class_dict_iterable`` may be a dict or some other iterable
35
+ that yields (name, value) pairs.
36
+
37
+ If a value does not match the required type for this registry,
38
+ or if the name starts with '_', it will be silently discarded.
39
+ This allows registrations at the module or class level using
40
+ vars(), globals() etc."""
41
+ if hasattr(class_dict_iterable, 'items'):
42
+ class_dict_iterable = class_dict_iterable.items()
43
+ for name, item in class_dict_iterable:
44
+ if (name is None or name[:1] != '_') and callable(item):
45
+ self[name] = item
46
+
47
+ def __getitem__(self, name):
48
+ if name is not None:
49
+ name = _utf8(name)
50
+ return self._get(name)
51
+
52
+ def __delitem__(self, name):
53
+ if name is not None:
54
+ name = _utf8(name)
55
+ del self._entries[name]
56
+
57
+ cdef object _get(self, object name):
58
+ cdef python.PyObject* dict_result
59
+ dict_result = python.PyDict_GetItem(self._entries, name)
60
+ if dict_result is NULL:
61
+ raise KeyError, "Name not registered."
62
+ return <object>dict_result
63
+
64
+ cdef object _getForString(self, char* name):
65
+ cdef python.PyObject* dict_result
66
+ dict_result = python.PyDict_GetItem(self._entries, name)
67
+ if dict_result is NULL:
68
+ raise KeyError, "Name not registered."
69
+ return <object>dict_result
70
+
71
+ def __iter__(self):
72
+ return iter(self._entries)
73
+
74
+ def items(self):
75
+ return list(self._entries.items())
76
+
77
+ def iteritems(self):
78
+ return iter(self._entries.items())
79
+
80
+ def clear(self):
81
+ self._entries.clear()
82
+
83
+ def __call__(self, obj):
84
+ # Usage as decorator:
85
+ # ns = lookup.get_namespace("...")
86
+ # @ns('abc')
87
+ # class element(ElementBase): pass
88
+ #
89
+ # @ns
90
+ # class elementname(ElementBase): pass
91
+
92
+ if obj is None or python._isString(obj):
93
+ # @ns(None) or @ns('tag')
94
+ return partial(self.__deco, obj)
95
+ # plain @ns decorator
96
+ self[obj.__name__] = obj
97
+ return obj
98
+
99
+ def __deco(self, name, obj):
100
+ self[name] = obj
101
+ return obj
102
+
103
+
104
+ @cython.final
105
+ @cython.internal
106
+ cdef class _ClassNamespaceRegistry(_NamespaceRegistry):
107
+ "Dictionary-like registry for namespace implementation classes"
108
+ def __setitem__(self, name, item):
109
+ if not isinstance(item, type) or not issubclass(item, ElementBase):
110
+ raise NamespaceRegistryError, \
111
+ "Registered element classes must be subtypes of ElementBase"
112
+ if name is not None:
113
+ name = _utf8(name)
114
+ self._entries[name] = item
115
+
116
+ def __repr__(self):
117
+ return "Namespace(%r)" % self._ns_uri
118
+
119
+
120
+ cdef class ElementNamespaceClassLookup(FallbackElementClassLookup):
121
+ """ElementNamespaceClassLookup(self, fallback=None)
122
+
123
+ Element class lookup scheme that searches the Element class in the
124
+ Namespace registry.
125
+
126
+ Usage:
127
+
128
+ >>> lookup = ElementNamespaceClassLookup()
129
+ >>> ns_elements = lookup.get_namespace("http://schema.org/Movie")
130
+
131
+ >>> @ns_elements
132
+ ... class movie(ElementBase):
133
+ ... "Element implementation for 'movie' tag (using class name) in schema namespace."
134
+
135
+ >>> @ns_elements("movie")
136
+ ... class MovieElement(ElementBase):
137
+ ... "Element implementation for 'movie' tag (explicit tag name) in schema namespace."
138
+ """
139
+ cdef dict _namespace_registries
140
+ def __cinit__(self):
141
+ self._namespace_registries = {}
142
+
143
+ def __init__(self, ElementClassLookup fallback=None):
144
+ FallbackElementClassLookup.__init__(self, fallback)
145
+ self._lookup_function = _find_nselement_class
146
+
147
+ def get_namespace(self, ns_uri):
148
+ """get_namespace(self, ns_uri)
149
+
150
+ Retrieve the namespace object associated with the given URI.
151
+ Pass None for the empty namespace.
152
+
153
+ Creates a new namespace object if it does not yet exist."""
154
+ if ns_uri:
155
+ ns_utf = _utf8(ns_uri)
156
+ else:
157
+ ns_utf = None
158
+ try:
159
+ return self._namespace_registries[ns_utf]
160
+ except KeyError:
161
+ registry = self._namespace_registries[ns_utf] = \
162
+ _ClassNamespaceRegistry(ns_uri)
163
+ return registry
164
+
165
+ cdef object _find_nselement_class(state, _Document doc, xmlNode* c_node):
166
+ cdef python.PyObject* dict_result
167
+ cdef ElementNamespaceClassLookup lookup
168
+ cdef _NamespaceRegistry registry
169
+ if state is None:
170
+ return _lookupDefaultElementClass(None, doc, c_node)
171
+
172
+ lookup = <ElementNamespaceClassLookup>state
173
+ if c_node.type != tree.XML_ELEMENT_NODE:
174
+ return _callLookupFallback(lookup, doc, c_node)
175
+
176
+ c_namespace_utf = _getNs(c_node)
177
+ if c_namespace_utf is not NULL:
178
+ dict_result = python.PyDict_GetItem(
179
+ lookup._namespace_registries, <unsigned char*>c_namespace_utf)
180
+ else:
181
+ dict_result = python.PyDict_GetItem(
182
+ lookup._namespace_registries, None)
183
+ if dict_result is not NULL:
184
+ registry = <_NamespaceRegistry>dict_result
185
+ classes = registry._entries
186
+
187
+ if c_node.name is not NULL:
188
+ dict_result = python.PyDict_GetItem(
189
+ classes, <unsigned char*>c_node.name)
190
+ else:
191
+ dict_result = NULL
192
+
193
+ if dict_result is NULL:
194
+ dict_result = python.PyDict_GetItem(classes, None)
195
+
196
+ if dict_result is not NULL:
197
+ return <object>dict_result
198
+ return _callLookupFallback(lookup, doc, c_node)
199
+
200
+
201
+ ################################################################################
202
+ # XPath extension functions
203
+
204
+ cdef dict __FUNCTION_NAMESPACE_REGISTRIES
205
+ __FUNCTION_NAMESPACE_REGISTRIES = {}
206
+
207
+ def FunctionNamespace(ns_uri):
208
+ """FunctionNamespace(ns_uri)
209
+
210
+ Retrieve the function namespace object associated with the given
211
+ URI.
212
+
213
+ Creates a new one if it does not yet exist. A function namespace
214
+ can only be used to register extension functions.
215
+
216
+ Usage:
217
+
218
+ >>> ns_functions = FunctionNamespace("http://schema.org/Movie")
219
+
220
+ >>> @ns_functions # uses function name
221
+ ... def add2(x):
222
+ ... return x + 2
223
+
224
+ >>> @ns_functions("add3") # uses explicit name
225
+ ... def add_three(x):
226
+ ... return x + 3
227
+ """
228
+ ns_utf = _utf8(ns_uri) if ns_uri else None
229
+ try:
230
+ return __FUNCTION_NAMESPACE_REGISTRIES[ns_utf]
231
+ except KeyError:
232
+ registry = __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] = \
233
+ _XPathFunctionNamespaceRegistry(ns_uri)
234
+ return registry
235
+
236
+ @cython.internal
237
+ cdef class _FunctionNamespaceRegistry(_NamespaceRegistry):
238
+ def __setitem__(self, name, item):
239
+ if not callable(item):
240
+ raise NamespaceRegistryError, \
241
+ "Registered functions must be callable."
242
+ if not name:
243
+ raise ValueError, \
244
+ "extensions must have non empty names"
245
+ self._entries[_utf8(name)] = item
246
+
247
+ def __repr__(self):
248
+ return "FunctionNamespace(%r)" % self._ns_uri
249
+
250
+ @cython.final
251
+ @cython.internal
252
+ cdef class _XPathFunctionNamespaceRegistry(_FunctionNamespaceRegistry):
253
+ cdef object _prefix
254
+ cdef bytes _prefix_utf
255
+
256
+ property prefix:
257
+ "Namespace prefix for extension functions."
258
+ def __del__(self):
259
+ self._prefix = None # no prefix configured
260
+ self._prefix_utf = None
261
+ def __get__(self):
262
+ if self._prefix is None:
263
+ return ''
264
+ else:
265
+ return self._prefix
266
+ def __set__(self, prefix):
267
+ if prefix == '':
268
+ prefix = None # empty prefix
269
+ self._prefix_utf = _utf8(prefix) if prefix is not None else None
270
+ self._prefix = prefix
271
+
272
+ cdef list _find_all_extension_prefixes():
273
+ "Internal lookup function to find all function prefixes for XSLT/XPath."
274
+ cdef _XPathFunctionNamespaceRegistry registry
275
+ cdef list ns_prefixes = []
276
+ for registry in __FUNCTION_NAMESPACE_REGISTRIES.itervalues():
277
+ if registry._prefix_utf is not None:
278
+ if registry._ns_uri_utf is not None:
279
+ ns_prefixes.append(
280
+ (registry._prefix_utf, registry._ns_uri_utf))
281
+ return ns_prefixes
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/objectify.pyx ADDED
@@ -0,0 +1,2149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # cython: binding=True
2
+ # cython: auto_pickle=False
3
+ # cython: language_level=3
4
+
5
+ """
6
+ The ``lxml.objectify`` module implements a Python object API for XML.
7
+ It is based on `lxml.etree`.
8
+ """
9
+
10
+ cimport cython
11
+
12
+ from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup
13
+ from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode
14
+ from lxml.includes.tree cimport const_xmlChar, _xcstr
15
+ from lxml cimport python
16
+ from lxml.includes cimport tree
17
+
18
+ cimport lxml.includes.etreepublic as cetree
19
+ cimport libc.string as cstring_h # not to be confused with stdlib 'string'
20
+ from libc.string cimport const_char
21
+ from libc cimport limits
22
+
23
+ __all__ = ['BoolElement', 'DataElement', 'E', 'Element', 'ElementMaker',
24
+ 'FloatElement', 'IntElement', 'NoneElement',
25
+ 'NumberElement', 'ObjectPath', 'ObjectifiedDataElement',
26
+ 'ObjectifiedElement', 'ObjectifyElementClassLookup',
27
+ 'PYTYPE_ATTRIBUTE', 'PyType', 'StringElement', 'SubElement',
28
+ 'XML', 'annotate', 'deannotate', 'dump', 'enable_recursive_str',
29
+ 'fromstring', 'getRegisteredTypes', 'makeparser', 'parse',
30
+ 'pyannotate', 'pytypename', 'set_default_parser',
31
+ 'set_pytype_attribute_tag', 'xsiannotate']
32
+
33
+ cdef object etree
34
+ from lxml import etree
35
+ # initialize C-API of lxml.etree
36
+ import_lxml__etree()
37
+
38
+ __version__ = etree.__version__
39
+
40
+ cdef object _float_is_inf, _float_is_nan
41
+ from math import isinf as _float_is_inf, isnan as _float_is_nan
42
+
43
+ cdef object re
44
+ import re
45
+
46
+ cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError)
47
+ cdef object is_special_method = re.compile('__.*__$').match
48
+
49
+
50
+ cdef object _typename(object t):
51
+ cdef const_char* c_name
52
+ c_name = python._fqtypename(t)
53
+ s = cstring_h.strrchr(c_name, c'.')
54
+ if s is not NULL:
55
+ c_name = s + 1
56
+ return pyunicode(<const_xmlChar*>c_name)
57
+
58
+
59
+ # namespace/name for "pytype" hint attribute
60
+ cdef object PYTYPE_NAMESPACE
61
+ cdef bytes PYTYPE_NAMESPACE_UTF8
62
+ cdef const_xmlChar* _PYTYPE_NAMESPACE
63
+
64
+ cdef object PYTYPE_ATTRIBUTE_NAME
65
+ cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8
66
+ cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME
67
+
68
+ PYTYPE_ATTRIBUTE = None
69
+
70
+ cdef unicode TREE_PYTYPE_NAME = "TREE"
71
+
72
+ cdef tuple _unicodeAndUtf8(s):
73
+ return s, python.PyUnicode_AsUTF8String(s)
74
+
75
+ def set_pytype_attribute_tag(attribute_tag=None):
76
+ """set_pytype_attribute_tag(attribute_tag=None)
77
+ Change name and namespace of the XML attribute that holds Python type
78
+ information.
79
+
80
+ Do not use this unless you know what you are doing.
81
+
82
+ Reset by calling without argument.
83
+
84
+ Default: "{http://codespeak.net/lxml/objectify/pytype}pytype"
85
+ """
86
+ global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME
87
+ global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8
88
+ global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8
89
+ if attribute_tag is None:
90
+ PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \
91
+ _unicodeAndUtf8("http://codespeak.net/lxml/objectify/pytype")
92
+ PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
93
+ _unicodeAndUtf8("pytype")
94
+ else:
95
+ PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
96
+ cetree.getNsTag(attribute_tag)
97
+ PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8')
98
+ PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8')
99
+
100
+ _PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8
101
+ _PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8
102
+ PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName(
103
+ _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
104
+
105
+ set_pytype_attribute_tag()
106
+
107
+
108
+ # namespaces for XML Schema
109
+ cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8
110
+ XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \
111
+ _unicodeAndUtf8("http://www.w3.org/2001/XMLSchema")
112
+ cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8)
113
+
114
+ cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8
115
+ XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \
116
+ _unicodeAndUtf8("http://www.w3.org/2001/XMLSchema-instance")
117
+ cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8)
118
+
119
+ cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS
120
+ cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS
121
+
122
+
123
+ ################################################################################
124
+ # Element class for the main API
125
+
126
+ cdef class ObjectifiedElement(ElementBase):
127
+ """Main XML Element class.
128
+
129
+ Element children are accessed as object attributes. Multiple children
130
+ with the same name are available through a list index. Example::
131
+
132
+ >>> root = XML("<root><c1><c2>0</c2><c2>1</c2></c1></root>")
133
+ >>> second_c2 = root.c1.c2[1]
134
+ >>> print(second_c2.text)
135
+ 1
136
+
137
+ Note that you cannot (and must not) instantiate this class or its
138
+ subclasses.
139
+ """
140
+ def __iter__(self):
141
+ """Iterate over self and all siblings with the same tag.
142
+ """
143
+ parent = self.getparent()
144
+ if parent is None:
145
+ return iter([self])
146
+ return etree.ElementChildIterator(parent, tag=self.tag)
147
+
148
+ def __str__(self):
149
+ if __RECURSIVE_STR:
150
+ return _dump(self, 0)
151
+ else:
152
+ return textOf(self._c_node) or ''
153
+
154
+ # pickle support for objectified Element
155
+ def __reduce__(self):
156
+ return fromstring, (etree.tostring(self),)
157
+
158
+ @property
159
+ def text(self):
160
+ return textOf(self._c_node)
161
+
162
+ @property
163
+ def __dict__(self):
164
+ """A fake implementation for __dict__ to support dir() etc.
165
+
166
+ Note that this only considers the first child with a given name.
167
+ """
168
+ cdef _Element child
169
+ cdef dict children
170
+ c_ns = tree._getNs(self._c_node)
171
+ tag = "{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None
172
+ children = {}
173
+ for child in etree.ElementChildIterator(self, tag=tag):
174
+ if c_ns is NULL and tree._getNs(child._c_node) is not NULL:
175
+ continue
176
+ name = pyunicode(child._c_node.name)
177
+ if name not in children:
178
+ children[name] = child
179
+ return children
180
+
181
+ def __len__(self):
182
+ """Count self and siblings with the same tag.
183
+ """
184
+ return _countSiblings(self._c_node)
185
+
186
+ def countchildren(self):
187
+ """countchildren(self)
188
+
189
+ Return the number of children of this element, regardless of their
190
+ name.
191
+ """
192
+ # copied from etree
193
+ cdef Py_ssize_t c
194
+ cdef tree.xmlNode* c_node
195
+ c = 0
196
+ c_node = self._c_node.children
197
+ while c_node is not NULL:
198
+ if tree._isElement(c_node):
199
+ c += 1
200
+ c_node = c_node.next
201
+ return c
202
+
203
+ def getchildren(self):
204
+ """getchildren(self)
205
+
206
+ Returns a sequence of all direct children. The elements are
207
+ returned in document order.
208
+ """
209
+ cdef tree.xmlNode* c_node
210
+ result = []
211
+ c_node = self._c_node.children
212
+ while c_node is not NULL:
213
+ if tree._isElement(c_node):
214
+ result.append(cetree.elementFactory(self._doc, c_node))
215
+ c_node = c_node.next
216
+ return result
217
+
218
+ def __getattr__(self, tag):
219
+ """Return the (first) child with the given tag name. If no namespace
220
+ is provided, the child will be looked up in the same one as self.
221
+ """
222
+ return _lookupChildOrRaise(self, tag)
223
+
224
+ def __setattr__(self, tag, value):
225
+ """Set the value of the (first) child with the given tag name. If no
226
+ namespace is provided, the child will be looked up in the same one as
227
+ self.
228
+ """
229
+ cdef _Element element
230
+ # properties are looked up /after/ __setattr__, so we must emulate them
231
+ if tag == 'text' or tag == 'pyval':
232
+ # read-only !
233
+ raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable"
234
+ elif tag == 'tail':
235
+ cetree.setTailText(self._c_node, value)
236
+ return
237
+ elif tag == 'tag':
238
+ ElementBase.tag.__set__(self, value)
239
+ return
240
+ elif tag == 'base':
241
+ ElementBase.base.__set__(self, value)
242
+ return
243
+ tag = _buildChildTag(self, tag)
244
+ element = _lookupChild(self, tag)
245
+ if element is None:
246
+ _appendValue(self, tag, value)
247
+ else:
248
+ _replaceElement(element, value)
249
+
250
+ def __delattr__(self, tag):
251
+ child = _lookupChildOrRaise(self, tag)
252
+ self.remove(child)
253
+
254
+ def addattr(self, tag, value):
255
+ """addattr(self, tag, value)
256
+
257
+ Add a child value to the element.
258
+
259
+ As opposed to append(), it sets a data value, not an element.
260
+ """
261
+ _appendValue(self, _buildChildTag(self, tag), value)
262
+
263
+ def __getitem__(self, key):
264
+ """Return a sibling, counting from the first child of the parent. The
265
+ method behaves like both a dict and a sequence.
266
+
267
+ * If argument is an integer, returns the sibling at that position.
268
+
269
+ * If argument is a string, does the same as getattr(). This can be
270
+ used to provide namespaces for element lookup, or to look up
271
+ children with special names (``text`` etc.).
272
+
273
+ * If argument is a slice object, returns the matching slice.
274
+ """
275
+ cdef tree.xmlNode* c_self_node
276
+ cdef tree.xmlNode* c_parent
277
+ cdef tree.xmlNode* c_node
278
+ cdef Py_ssize_t c_index
279
+ if python._isString(key):
280
+ return _lookupChildOrRaise(self, key)
281
+ elif isinstance(key, slice):
282
+ return list(self)[key]
283
+ # normal item access
284
+ c_index = key # raises TypeError if necessary
285
+ c_self_node = self._c_node
286
+ c_parent = c_self_node.parent
287
+ if c_parent is NULL:
288
+ if c_index == 0 or c_index == -1:
289
+ return self
290
+ raise IndexError, unicode(key)
291
+ if c_index < 0:
292
+ c_node = c_parent.last
293
+ else:
294
+ c_node = c_parent.children
295
+ c_node = _findFollowingSibling(
296
+ c_node, tree._getNs(c_self_node), c_self_node.name, c_index)
297
+ if c_node is NULL:
298
+ raise IndexError, unicode(key)
299
+ return elementFactory(self._doc, c_node)
300
+
301
+ def __setitem__(self, key, value):
302
+ """Set the value of a sibling, counting from the first child of the
303
+ parent. Implements key assignment, item assignment and slice
304
+ assignment.
305
+
306
+ * If argument is an integer, sets the sibling at that position.
307
+
308
+ * If argument is a string, does the same as setattr(). This is used
309
+ to provide namespaces for element lookup.
310
+
311
+ * If argument is a sequence (list, tuple, etc.), assign the contained
312
+ items to the siblings.
313
+ """
314
+ cdef _Element element
315
+ cdef tree.xmlNode* c_node
316
+ if python._isString(key):
317
+ key = _buildChildTag(self, key)
318
+ element = _lookupChild(self, key)
319
+ if element is None:
320
+ _appendValue(self, key, value)
321
+ else:
322
+ _replaceElement(element, value)
323
+ return
324
+
325
+ if self._c_node.parent is NULL:
326
+ # the 'root[i] = ...' case
327
+ raise TypeError, "assignment to root element is invalid"
328
+
329
+ if isinstance(key, slice):
330
+ # slice assignment
331
+ _setSlice(key, self, value)
332
+ else:
333
+ # normal index assignment
334
+ if key < 0:
335
+ c_node = self._c_node.parent.last
336
+ else:
337
+ c_node = self._c_node.parent.children
338
+ c_node = _findFollowingSibling(
339
+ c_node, tree._getNs(self._c_node), self._c_node.name, key)
340
+ if c_node is NULL:
341
+ raise IndexError, unicode(key)
342
+ element = elementFactory(self._doc, c_node)
343
+ _replaceElement(element, value)
344
+
345
+ def __delitem__(self, key):
346
+ parent = self.getparent()
347
+ if parent is None:
348
+ raise TypeError, "deleting items not supported by root element"
349
+ if isinstance(key, slice):
350
+ # slice deletion
351
+ del_items = list(self)[key]
352
+ remove = parent.remove
353
+ for el in del_items:
354
+ remove(el)
355
+ else:
356
+ # normal index deletion
357
+ sibling = self.__getitem__(key)
358
+ parent.remove(sibling)
359
+
360
+ def descendantpaths(self, prefix=None):
361
+ """descendantpaths(self, prefix=None)
362
+
363
+ Returns a list of object path expressions for all descendants.
364
+ """
365
+ if prefix is not None and not python._isString(prefix):
366
+ prefix = '.'.join(prefix)
367
+ return _build_descendant_paths(self._c_node, prefix)
368
+
369
+
370
+ cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
371
+ if c_node.name != c_name:
372
+ return 0
373
+ if c_href == NULL:
374
+ return 1
375
+ c_node_href = tree._getNs(c_node)
376
+ if c_node_href == NULL:
377
+ return c_href[0] == c'\0'
378
+ return tree.xmlStrcmp(c_node_href, c_href) == 0
379
+
380
+
381
+ cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node):
382
+ cdef tree.xmlNode* c_node
383
+ cdef Py_ssize_t count
384
+ c_tag = c_start_node.name
385
+ c_href = tree._getNs(c_start_node)
386
+ count = 1
387
+ c_node = c_start_node.next
388
+ while c_node is not NULL:
389
+ if c_node.type == tree.XML_ELEMENT_NODE and \
390
+ _tagMatches(c_node, c_href, c_tag):
391
+ count += 1
392
+ c_node = c_node.next
393
+ c_node = c_start_node.prev
394
+ while c_node is not NULL:
395
+ if c_node.type == tree.XML_ELEMENT_NODE and \
396
+ _tagMatches(c_node, c_href, c_tag):
397
+ count += 1
398
+ c_node = c_node.prev
399
+ return count
400
+
401
+ cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node,
402
+ const_xmlChar* href, const_xmlChar* name,
403
+ Py_ssize_t index):
404
+ cdef tree.xmlNode* (*next)(tree.xmlNode*)
405
+ if index >= 0:
406
+ next = cetree.nextElement
407
+ else:
408
+ index = -1 - index
409
+ next = cetree.previousElement
410
+ while c_node is not NULL:
411
+ if c_node.type == tree.XML_ELEMENT_NODE and \
412
+ _tagMatches(c_node, href, name):
413
+ index = index - 1
414
+ if index < 0:
415
+ return c_node
416
+ c_node = next(c_node)
417
+ return NULL
418
+
419
+ cdef object _lookupChild(_Element parent, tag):
420
+ cdef tree.xmlNode* c_result
421
+ cdef tree.xmlNode* c_node
422
+ c_node = parent._c_node
423
+ ns, tag = cetree.getNsTagWithEmptyNs(tag)
424
+ c_tag_len = len(<bytes> tag)
425
+ if c_tag_len > limits.INT_MAX:
426
+ return None
427
+ c_tag = tree.xmlDictExists(
428
+ c_node.doc.dict, _xcstr(tag), <int> c_tag_len)
429
+ if c_tag is NULL:
430
+ return None # not in the hash map => not in the tree
431
+ if ns is None:
432
+ # either inherit ns from parent or use empty (i.e. no) namespace
433
+ c_href = tree._getNs(c_node) or <const_xmlChar*>''
434
+ else:
435
+ c_href = _xcstr(ns)
436
+ c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0)
437
+ if c_result is NULL:
438
+ return None
439
+ return elementFactory(parent._doc, c_result)
440
+
441
+ cdef object _lookupChildOrRaise(_Element parent, tag):
442
+ element = _lookupChild(parent, tag)
443
+ if element is None:
444
+ raise AttributeError, "no such child: " + _buildChildTag(parent, tag)
445
+ return element
446
+
447
+ cdef object _buildChildTag(_Element parent, tag):
448
+ ns, tag = cetree.getNsTag(tag)
449
+ c_tag = _xcstr(tag)
450
+ c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns)
451
+ return cetree.namespacedNameFromNsName(c_href, c_tag)
452
+
453
+ cdef _replaceElement(_Element element, value):
454
+ cdef _Element new_element
455
+ if isinstance(value, _Element):
456
+ # deep copy the new element
457
+ new_element = cetree.deepcopyNodeToDocument(
458
+ element._doc, (<_Element>value)._c_node)
459
+ new_element.tag = element.tag
460
+ elif isinstance(value, (list, tuple)):
461
+ element[:] = value
462
+ return
463
+ else:
464
+ new_element = element.makeelement(element.tag)
465
+ _setElementValue(new_element, value)
466
+ element.getparent().replace(element, new_element)
467
+
468
+ cdef _appendValue(_Element parent, tag, value):
469
+ cdef _Element new_element
470
+ if isinstance(value, _Element):
471
+ # deep copy the new element
472
+ new_element = cetree.deepcopyNodeToDocument(
473
+ parent._doc, (<_Element>value)._c_node)
474
+ new_element.tag = tag
475
+ cetree.appendChildToElement(parent, new_element)
476
+ elif isinstance(value, (list, tuple)):
477
+ for item in value:
478
+ _appendValue(parent, tag, item)
479
+ else:
480
+ new_element = cetree.makeElement(
481
+ tag, parent._doc, None, None, None, None, None)
482
+ _setElementValue(new_element, value)
483
+ cetree.appendChildToElement(parent, new_element)
484
+
485
+ cdef _setElementValue(_Element element, value):
486
+ if value is None:
487
+ cetree.setAttributeValue(
488
+ element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true")
489
+ elif isinstance(value, _Element):
490
+ _replaceElement(element, value)
491
+ return
492
+ else:
493
+ cetree.delAttributeFromNsName(
494
+ element._c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil")
495
+ if python._isString(value):
496
+ pytype_name = "str"
497
+ py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
498
+ else:
499
+ pytype_name = _typename(value)
500
+ py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
501
+ if py_type is not None:
502
+ value = py_type.stringify(value)
503
+ else:
504
+ value = unicode(value)
505
+ if py_type is not None:
506
+ cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
507
+ else:
508
+ cetree.delAttributeFromNsName(
509
+ element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
510
+ cetree.setNodeText(element._c_node, value)
511
+
512
+ cdef _setSlice(sliceobject, _Element target, items):
513
+ cdef _Element parent
514
+ cdef tree.xmlNode* c_node
515
+ cdef Py_ssize_t c_step, c_start, pos
516
+ # collect existing slice
517
+ if (<slice>sliceobject).step is None:
518
+ c_step = 1
519
+ else:
520
+ c_step = (<slice>sliceobject).step
521
+ if c_step == 0:
522
+ raise ValueError, "Invalid slice"
523
+ cdef list del_items = target[sliceobject]
524
+
525
+ # collect new values
526
+ new_items = []
527
+ tag = target.tag
528
+ for item in items:
529
+ if isinstance(item, _Element):
530
+ # deep copy the new element
531
+ new_element = cetree.deepcopyNodeToDocument(
532
+ target._doc, (<_Element>item)._c_node)
533
+ new_element.tag = tag
534
+ else:
535
+ new_element = cetree.makeElement(
536
+ tag, target._doc, None, None, None, None, None)
537
+ _setElementValue(new_element, item)
538
+ new_items.append(new_element)
539
+
540
+ # sanity check - raise what a list would raise
541
+ if c_step != 1 and len(del_items) != len(new_items):
542
+ raise ValueError, \
543
+ f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}"
544
+
545
+ # replace existing items
546
+ pos = 0
547
+ parent = target.getparent()
548
+ replace = parent.replace
549
+ while pos < len(new_items) and pos < len(del_items):
550
+ replace(del_items[pos], new_items[pos])
551
+ pos += 1
552
+ # remove leftover items
553
+ if pos < len(del_items):
554
+ remove = parent.remove
555
+ while pos < len(del_items):
556
+ remove(del_items[pos])
557
+ pos += 1
558
+ # append remaining new items
559
+ if pos < len(new_items):
560
+ # the sanity check above guarantees (step == 1)
561
+ if pos > 0:
562
+ item = new_items[pos-1]
563
+ else:
564
+ if (<slice>sliceobject).start > 0:
565
+ c_node = parent._c_node.children
566
+ else:
567
+ c_node = parent._c_node.last
568
+ c_node = _findFollowingSibling(
569
+ c_node, tree._getNs(target._c_node), target._c_node.name,
570
+ (<slice>sliceobject).start - 1)
571
+ if c_node is NULL:
572
+ while pos < len(new_items):
573
+ cetree.appendChildToElement(parent, new_items[pos])
574
+ pos += 1
575
+ return
576
+ item = cetree.elementFactory(parent._doc, c_node)
577
+ while pos < len(new_items):
578
+ add = item.addnext
579
+ item = new_items[pos]
580
+ add(item)
581
+ pos += 1
582
+
583
+ ################################################################################
584
+ # Data type support in subclasses
585
+
586
+ cdef class ObjectifiedDataElement(ObjectifiedElement):
587
+ """This is the base class for all data type Elements. Subclasses should
588
+ override the 'pyval' property and possibly the __str__ method.
589
+ """
590
+ @property
591
+ def pyval(self):
592
+ return textOf(self._c_node)
593
+
594
+ def __str__(self):
595
+ return textOf(self._c_node) or ''
596
+
597
+ def __repr__(self):
598
+ return textOf(self._c_node) or ''
599
+
600
+ def _setText(self, s):
601
+ """For use in subclasses only. Don't use unless you know what you are
602
+ doing.
603
+ """
604
+ cetree.setNodeText(self._c_node, s)
605
+
606
+
607
+ cdef class NumberElement(ObjectifiedDataElement):
608
+ cdef object _parse_value
609
+
610
+ def _setValueParser(self, function):
611
+ """Set the function that parses the Python value from a string.
612
+
613
+ Do not use this unless you know what you are doing.
614
+ """
615
+ self._parse_value = function
616
+
617
+ @property
618
+ def pyval(self):
619
+ return _parseNumber(self)
620
+
621
+ def __int__(self):
622
+ return int(_parseNumber(self))
623
+
624
+ def __float__(self):
625
+ return float(_parseNumber(self))
626
+
627
+ def __complex__(self):
628
+ return complex(_parseNumber(self))
629
+
630
+ def __str__(self):
631
+ return unicode(_parseNumber(self))
632
+
633
+ def __repr__(self):
634
+ return repr(_parseNumber(self))
635
+
636
+ def __oct__(self):
637
+ return oct(_parseNumber(self))
638
+
639
+ def __hex__(self):
640
+ return hex(_parseNumber(self))
641
+
642
+ def __richcmp__(self, other, int op):
643
+ return _richcmpPyvals(self, other, op)
644
+
645
+ def __hash__(self):
646
+ return hash(_parseNumber(self))
647
+
648
+ def __add__(self, other):
649
+ return _numericValueOf(self) + _numericValueOf(other)
650
+
651
+ def __radd__(self, other):
652
+ return _numericValueOf(other) + _numericValueOf(self)
653
+
654
+ def __sub__(self, other):
655
+ return _numericValueOf(self) - _numericValueOf(other)
656
+
657
+ def __rsub__(self, other):
658
+ return _numericValueOf(other) - _numericValueOf(self)
659
+
660
+ def __mul__(self, other):
661
+ return _numericValueOf(self) * _numericValueOf(other)
662
+
663
+ def __rmul__(self, other):
664
+ return _numericValueOf(other) * _numericValueOf(self)
665
+
666
+ def __div__(self, other):
667
+ return _numericValueOf(self) / _numericValueOf(other)
668
+
669
+ def __rdiv__(self, other):
670
+ return _numericValueOf(other) / _numericValueOf(self)
671
+
672
+ def __truediv__(self, other):
673
+ return _numericValueOf(self) / _numericValueOf(other)
674
+
675
+ def __rtruediv__(self, other):
676
+ return _numericValueOf(other) / _numericValueOf(self)
677
+
678
+ def __floordiv__(self, other):
679
+ return _numericValueOf(self) // _numericValueOf(other)
680
+
681
+ def __rfloordiv__(self, other):
682
+ return _numericValueOf(other) // _numericValueOf(self)
683
+
684
+ def __mod__(self, other):
685
+ return _numericValueOf(self) % _numericValueOf(other)
686
+
687
+ def __rmod__(self, other):
688
+ return _numericValueOf(other) % _numericValueOf(self)
689
+
690
+ def __divmod__(self, other):
691
+ return divmod(_numericValueOf(self), _numericValueOf(other))
692
+
693
+ def __rdivmod__(self, other):
694
+ return divmod(_numericValueOf(other), _numericValueOf(self))
695
+
696
+ def __pow__(self, other, modulo):
697
+ if modulo is None:
698
+ return _numericValueOf(self) ** _numericValueOf(other)
699
+ else:
700
+ return pow(_numericValueOf(self), _numericValueOf(other), modulo)
701
+
702
+ def __rpow__(self, other, modulo):
703
+ if modulo is None:
704
+ return _numericValueOf(other) ** _numericValueOf(self)
705
+ else:
706
+ return pow(_numericValueOf(other), _numericValueOf(self), modulo)
707
+
708
+ def __neg__(self):
709
+ return - _numericValueOf(self)
710
+
711
+ def __pos__(self):
712
+ return + _numericValueOf(self)
713
+
714
+ def __abs__(self):
715
+ return abs( _numericValueOf(self) )
716
+
717
+ def __bool__(self):
718
+ return bool(_numericValueOf(self))
719
+
720
+ def __invert__(self):
721
+ return ~ _numericValueOf(self)
722
+
723
+ def __lshift__(self, other):
724
+ return _numericValueOf(self) << _numericValueOf(other)
725
+
726
+ def __rlshift__(self, other):
727
+ return _numericValueOf(other) << _numericValueOf(self)
728
+
729
+ def __rshift__(self, other):
730
+ return _numericValueOf(self) >> _numericValueOf(other)
731
+
732
+ def __rrshift__(self, other):
733
+ return _numericValueOf(other) >> _numericValueOf(self)
734
+
735
+ def __and__(self, other):
736
+ return _numericValueOf(self) & _numericValueOf(other)
737
+
738
+ def __rand__(self, other):
739
+ return _numericValueOf(other) & _numericValueOf(self)
740
+
741
+ def __or__(self, other):
742
+ return _numericValueOf(self) | _numericValueOf(other)
743
+
744
+ def __ror__(self, other):
745
+ return _numericValueOf(other) | _numericValueOf(self)
746
+
747
+ def __xor__(self, other):
748
+ return _numericValueOf(self) ^ _numericValueOf(other)
749
+
750
+ def __rxor__(self, other):
751
+ return _numericValueOf(other) ^ _numericValueOf(self)
752
+
753
+
754
+ cdef class IntElement(NumberElement):
755
+ def _init(self):
756
+ self._parse_value = int
757
+
758
+ def __index__(self):
759
+ return int(_parseNumber(self))
760
+
761
+
762
+ cdef class FloatElement(NumberElement):
763
+ def _init(self):
764
+ self._parse_value = float
765
+
766
+
767
+ cdef class StringElement(ObjectifiedDataElement):
768
+ """String data class.
769
+
770
+ Note that this class does *not* support the sequence protocol of strings:
771
+ len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported.
772
+ Instead, use the .text attribute to get a 'real' string.
773
+ """
774
+ @property
775
+ def pyval(self):
776
+ return textOf(self._c_node) or ''
777
+
778
+ def __repr__(self):
779
+ return repr(textOf(self._c_node) or '')
780
+
781
+ def strlen(self):
782
+ text = textOf(self._c_node)
783
+ if text is None:
784
+ return 0
785
+ else:
786
+ return len(text)
787
+
788
+ def __bool__(self):
789
+ return bool(textOf(self._c_node))
790
+
791
+ def __richcmp__(self, other, int op):
792
+ return _richcmpPyvals(self, other, op)
793
+
794
+ def __hash__(self):
795
+ return hash(textOf(self._c_node) or '')
796
+
797
+ def __add__(self, other):
798
+ text = _strValueOf(self)
799
+ other = _strValueOf(other)
800
+ return text + other
801
+
802
+ def __radd__(self, other):
803
+ text = _strValueOf(self)
804
+ other = _strValueOf(other)
805
+ return other + text
806
+
807
+ def __mul__(self, other):
808
+ if isinstance(self, StringElement):
809
+ return (textOf((<StringElement>self)._c_node) or '') * _numericValueOf(other)
810
+ elif isinstance(other, StringElement):
811
+ return _numericValueOf(self) * (textOf((<StringElement>other)._c_node) or '')
812
+ else:
813
+ return NotImplemented
814
+
815
+ def __rmul__(self, other):
816
+ return _numericValueOf(other) * (textOf((<StringElement>self)._c_node) or '')
817
+
818
+ def __mod__(self, other):
819
+ return (_strValueOf(self) or '') % other
820
+
821
+ def __int__(self):
822
+ return int(textOf(self._c_node))
823
+
824
+ def __float__(self):
825
+ return float(textOf(self._c_node))
826
+
827
+ def __complex__(self):
828
+ return complex(textOf(self._c_node))
829
+
830
+
831
+ cdef class NoneElement(ObjectifiedDataElement):
832
+ def __str__(self):
833
+ return "None"
834
+
835
+ def __repr__(self):
836
+ return "None"
837
+
838
+ def __bool__(self):
839
+ return False
840
+
841
+ def __richcmp__(self, other, int op):
842
+ if other is None or self is None:
843
+ return python.PyObject_RichCompare(None, None, op)
844
+ if isinstance(self, NoneElement):
845
+ return python.PyObject_RichCompare(None, other, op)
846
+ else:
847
+ return python.PyObject_RichCompare(self, None, op)
848
+
849
+ def __hash__(self):
850
+ return hash(None)
851
+
852
+ @property
853
+ def pyval(self):
854
+ return None
855
+
856
+
857
+ cdef class BoolElement(IntElement):
858
+ """Boolean type base on string values: 'true' or 'false'.
859
+
860
+ Note that this inherits from IntElement to mimic the behaviour of
861
+ Python's bool type.
862
+ """
863
+ def _init(self):
864
+ self._parse_value = _parseBool # wraps as Python callable
865
+
866
+ def __bool__(self):
867
+ return _parseBool(textOf(self._c_node))
868
+
869
+ def __int__(self):
870
+ return 0 + _parseBool(textOf(self._c_node))
871
+
872
+ def __float__(self):
873
+ return 0.0 + _parseBool(textOf(self._c_node))
874
+
875
+ def __richcmp__(self, other, int op):
876
+ return _richcmpPyvals(self, other, op)
877
+
878
+ def __hash__(self):
879
+ return hash(_parseBool(textOf(self._c_node)))
880
+
881
+ def __str__(self):
882
+ return unicode(_parseBool(textOf(self._c_node)))
883
+
884
+ def __repr__(self):
885
+ return repr(_parseBool(textOf(self._c_node)))
886
+
887
+ @property
888
+ def pyval(self):
889
+ return _parseBool(textOf(self._c_node))
890
+
891
+
892
+ cdef _checkBool(s):
893
+ cdef int value = -1
894
+ if s is not None:
895
+ value = __parseBoolAsInt(s)
896
+ if value == -1:
897
+ raise ValueError
898
+
899
+
900
+ cdef bint _parseBool(s) except -1:
901
+ cdef int value
902
+ if s is None:
903
+ return False
904
+ value = __parseBoolAsInt(s)
905
+ if value == -1:
906
+ raise ValueError, f"Invalid boolean value: '{s}'"
907
+ return value
908
+
909
+
910
+ cdef inline int __parseBoolAsInt(text) except -2:
911
+ if text == 'false':
912
+ return 0
913
+ elif text == 'true':
914
+ return 1
915
+ elif text == '0':
916
+ return 0
917
+ elif text == '1':
918
+ return 1
919
+ return -1
920
+
921
+
922
+ cdef object _parseNumber(NumberElement element):
923
+ return element._parse_value(textOf(element._c_node))
924
+
925
+
926
+ cdef enum NumberParserState:
927
+ NPS_SPACE_PRE = 0
928
+ NPS_SIGN = 1
929
+ NPS_DIGITS = 2
930
+ NPS_POINT_LEAD = 3
931
+ NPS_POINT = 4
932
+ NPS_FRACTION = 5
933
+ NPS_EXP = 6
934
+ NPS_EXP_SIGN = 7
935
+ NPS_DIGITS_EXP = 8
936
+ NPS_SPACE_TAIL = 9
937
+ NPS_INF1 = 20
938
+ NPS_INF2 = 21
939
+ NPS_INF3 = 22
940
+ NPS_NAN1 = 23
941
+ NPS_NAN2 = 24
942
+ NPS_NAN3 = 25
943
+ NPS_ERROR = 99
944
+
945
+
946
+ ctypedef fused bytes_unicode:
947
+ bytes
948
+ unicode
949
+
950
+
951
+ cdef _checkNumber(bytes_unicode s, bint allow_float):
952
+ cdef Py_UCS4 c
953
+ cdef NumberParserState state = NPS_SPACE_PRE
954
+
955
+ for c in s:
956
+ if c in '0123456789':
957
+ if state in (NPS_DIGITS, NPS_FRACTION, NPS_DIGITS_EXP):
958
+ pass
959
+ elif state in (NPS_SPACE_PRE, NPS_SIGN):
960
+ state = NPS_DIGITS
961
+ elif state in (NPS_POINT_LEAD, NPS_POINT):
962
+ state = NPS_FRACTION
963
+ elif state in (NPS_EXP, NPS_EXP_SIGN):
964
+ state = NPS_DIGITS_EXP
965
+ else:
966
+ state = NPS_ERROR
967
+ else:
968
+ if c == '.':
969
+ if state in (NPS_SPACE_PRE, NPS_SIGN):
970
+ state = NPS_POINT_LEAD
971
+ elif state == NPS_DIGITS:
972
+ state = NPS_POINT
973
+ else:
974
+ state = NPS_ERROR
975
+ if not allow_float:
976
+ state = NPS_ERROR
977
+ elif c in '-+':
978
+ if state == NPS_SPACE_PRE:
979
+ state = NPS_SIGN
980
+ elif state == NPS_EXP:
981
+ state = NPS_EXP_SIGN
982
+ else:
983
+ state = NPS_ERROR
984
+ elif c == 'E':
985
+ if state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION):
986
+ state = NPS_EXP
987
+ else:
988
+ state = NPS_ERROR
989
+ if not allow_float:
990
+ state = NPS_ERROR
991
+ # Allow INF and NaN. XMLSchema requires case, we don't, like Python.
992
+ elif c in 'iI':
993
+ state = NPS_INF1 if allow_float and state in (NPS_SPACE_PRE, NPS_SIGN) else NPS_ERROR
994
+ elif c in 'fF':
995
+ state = NPS_INF3 if state == NPS_INF2 else NPS_ERROR
996
+ elif c in 'aA':
997
+ state = NPS_NAN2 if state == NPS_NAN1 else NPS_ERROR
998
+ elif c in 'nN':
999
+ # Python also allows [+-]NaN, so let's accept that.
1000
+ if state in (NPS_SPACE_PRE, NPS_SIGN):
1001
+ state = NPS_NAN1 if allow_float else NPS_ERROR
1002
+ elif state == NPS_NAN2:
1003
+ state = NPS_NAN3
1004
+ elif state == NPS_INF1:
1005
+ state = NPS_INF2
1006
+ else:
1007
+ state = NPS_ERROR
1008
+ # Allow spaces around text values.
1009
+ else:
1010
+ if c.isspace() if (bytes_unicode is unicode) else c in b'\x09\x0a\x0b\x0c\x0d\x20':
1011
+ if state in (NPS_SPACE_PRE, NPS_SPACE_TAIL):
1012
+ pass
1013
+ elif state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3):
1014
+ state = NPS_SPACE_TAIL
1015
+ else:
1016
+ state = NPS_ERROR
1017
+ else:
1018
+ state = NPS_ERROR
1019
+
1020
+ if state == NPS_ERROR:
1021
+ break
1022
+
1023
+ if state not in (NPS_DIGITS, NPS_FRACTION, NPS_POINT, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3, NPS_SPACE_TAIL):
1024
+ raise ValueError
1025
+
1026
+
1027
+ cdef _checkInt(s):
1028
+ return _checkNumber(<unicode>s, allow_float=False)
1029
+
1030
+
1031
+ cdef _checkFloat(s):
1032
+ return _checkNumber(<unicode>s, allow_float=True)
1033
+
1034
+
1035
+ cdef object _strValueOf(obj):
1036
+ if python._isString(obj):
1037
+ return obj
1038
+ if isinstance(obj, _Element):
1039
+ return textOf((<_Element>obj)._c_node) or ''
1040
+ if obj is None:
1041
+ return ''
1042
+ return unicode(obj)
1043
+
1044
+
1045
+ cdef object _numericValueOf(obj):
1046
+ if isinstance(obj, NumberElement):
1047
+ return _parseNumber(<NumberElement>obj)
1048
+ try:
1049
+ # not always numeric, but Python will raise the right exception
1050
+ return obj.pyval
1051
+ except AttributeError:
1052
+ pass
1053
+ return obj
1054
+
1055
+
1056
+ cdef _richcmpPyvals(left, right, int op):
1057
+ left = getattr(left, 'pyval', left)
1058
+ right = getattr(right, 'pyval', right)
1059
+ return python.PyObject_RichCompare(left, right, op)
1060
+
1061
+
1062
+ ################################################################################
1063
+ # Python type registry
1064
+
1065
+ cdef class PyType:
1066
+ """PyType(self, name, type_check, type_class, stringify=None)
1067
+ User defined type.
1068
+
1069
+ Named type that contains a type check function, a type class that
1070
+ inherits from ObjectifiedDataElement and an optional "stringification"
1071
+ function. The type check must take a string as argument and raise
1072
+ ValueError or TypeError if it cannot handle the string value. It may be
1073
+ None in which case it is not considered for type guessing. For registered
1074
+ named types, the 'stringify' function (or unicode() if None) is used to
1075
+ convert a Python object with type name 'name' to the string representation
1076
+ stored in the XML tree.
1077
+
1078
+ Example::
1079
+
1080
+ PyType('int', int, MyIntClass).register()
1081
+
1082
+ Note that the order in which types are registered matters. The first
1083
+ matching type will be used.
1084
+ """
1085
+ cdef readonly object name
1086
+ cdef readonly object type_check
1087
+ cdef readonly object stringify
1088
+ cdef object _type
1089
+ cdef list _schema_types
1090
+ def __init__(self, name, type_check, type_class, stringify=None):
1091
+ if isinstance(name, bytes):
1092
+ name = (<bytes>name).decode('ascii')
1093
+ elif not isinstance(name, unicode):
1094
+ raise TypeError, "Type name must be a string"
1095
+ if type_check is not None and not callable(type_check):
1096
+ raise TypeError, "Type check function must be callable (or None)"
1097
+ if name != TREE_PYTYPE_NAME and \
1098
+ not issubclass(type_class, ObjectifiedDataElement):
1099
+ raise TypeError, \
1100
+ "Data classes must inherit from ObjectifiedDataElement"
1101
+ self.name = name
1102
+ self._type = type_class
1103
+ self.type_check = type_check
1104
+ if stringify is None:
1105
+ stringify = unicode
1106
+ self.stringify = stringify
1107
+ self._schema_types = []
1108
+
1109
+ def __repr__(self):
1110
+ return "PyType(%s, %s)" % (self.name, self._type.__name__)
1111
+
1112
+ def register(self, before=None, after=None):
1113
+ """register(self, before=None, after=None)
1114
+
1115
+ Register the type.
1116
+
1117
+ The additional keyword arguments 'before' and 'after' accept a
1118
+ sequence of type names that must appear before/after the new type in
1119
+ the type list. If any of them is not currently known, it is simply
1120
+ ignored. Raises ValueError if the dependencies cannot be fulfilled.
1121
+ """
1122
+ if self.name == TREE_PYTYPE_NAME:
1123
+ raise ValueError, "Cannot register tree type"
1124
+ if self.type_check is not None:
1125
+ for item in _TYPE_CHECKS:
1126
+ if item[0] is self.type_check:
1127
+ _TYPE_CHECKS.remove(item)
1128
+ break
1129
+ entry = (self.type_check, self)
1130
+ first_pos = 0
1131
+ last_pos = -1
1132
+ if before or after:
1133
+ if before is None:
1134
+ before = ()
1135
+ elif after is None:
1136
+ after = ()
1137
+ for i, (check, pytype) in enumerate(_TYPE_CHECKS):
1138
+ if last_pos == -1 and pytype.name in before:
1139
+ last_pos = i
1140
+ if pytype.name in after:
1141
+ first_pos = i+1
1142
+ if last_pos == -1:
1143
+ _TYPE_CHECKS.append(entry)
1144
+ elif first_pos > last_pos:
1145
+ raise ValueError, "inconsistent before/after dependencies"
1146
+ else:
1147
+ _TYPE_CHECKS.insert(last_pos, entry)
1148
+
1149
+ _PYTYPE_DICT[self.name] = self
1150
+ for xs_type in self._schema_types:
1151
+ _SCHEMA_TYPE_DICT[xs_type] = self
1152
+
1153
+ def unregister(self):
1154
+ "unregister(self)"
1155
+ if _PYTYPE_DICT.get(self.name) is self:
1156
+ del _PYTYPE_DICT[self.name]
1157
+ for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()):
1158
+ if pytype is self:
1159
+ del _SCHEMA_TYPE_DICT[xs_type]
1160
+ if self.type_check is None:
1161
+ return
1162
+ try:
1163
+ _TYPE_CHECKS.remove( (self.type_check, self) )
1164
+ except ValueError:
1165
+ pass
1166
+
1167
+ property xmlSchemaTypes:
1168
+ """The list of XML Schema datatypes this Python type maps to.
1169
+
1170
+ Note that this must be set before registering the type!
1171
+ """
1172
+ def __get__(self):
1173
+ return self._schema_types
1174
+ def __set__(self, types):
1175
+ self._schema_types = list(map(unicode, types))
1176
+
1177
+
1178
+ cdef dict _PYTYPE_DICT = {}
1179
+ cdef dict _SCHEMA_TYPE_DICT = {}
1180
+ cdef list _TYPE_CHECKS = []
1181
+
1182
+ cdef unicode _xml_bool(value):
1183
+ return "true" if value else "false"
1184
+
1185
+ cdef unicode _xml_float(value):
1186
+ if _float_is_inf(value):
1187
+ if value > 0:
1188
+ return "INF"
1189
+ return "-INF"
1190
+ if _float_is_nan(value):
1191
+ return "NaN"
1192
+ return unicode(repr(value))
1193
+
1194
+ cdef _pytypename(obj):
1195
+ return "str" if python._isString(obj) else _typename(obj)
1196
+
1197
+ def pytypename(obj):
1198
+ """pytypename(obj)
1199
+
1200
+ Find the name of the corresponding PyType for a Python object.
1201
+ """
1202
+ return _pytypename(obj)
1203
+
1204
+ cdef _registerPyTypes():
1205
+ pytype = PyType('int', _checkInt, IntElement) # wraps functions for Python
1206
+ pytype.xmlSchemaTypes = ("integer", "int", "short", "byte", "unsignedShort",
1207
+ "unsignedByte", "nonPositiveInteger",
1208
+ "negativeInteger", "long", "nonNegativeInteger",
1209
+ "unsignedLong", "unsignedInt", "positiveInteger",)
1210
+ pytype.register()
1211
+
1212
+ # 'long' type just for backwards compatibility
1213
+ pytype = PyType('long', None, IntElement)
1214
+ pytype.register()
1215
+
1216
+ pytype = PyType('float', _checkFloat, FloatElement, _xml_float) # wraps functions for Python
1217
+ pytype.xmlSchemaTypes = ("double", "float")
1218
+ pytype.register()
1219
+
1220
+ pytype = PyType('bool', _checkBool, BoolElement, _xml_bool) # wraps functions for Python
1221
+ pytype.xmlSchemaTypes = ("boolean",)
1222
+ pytype.register()
1223
+
1224
+ pytype = PyType('str', None, StringElement)
1225
+ pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language",
1226
+ "Name", "NCName", "ID", "IDREF", "ENTITY",
1227
+ "NMTOKEN", )
1228
+ pytype.register()
1229
+
1230
+ # since lxml 2.0
1231
+ pytype = PyType('NoneType', None, NoneElement)
1232
+ pytype.register()
1233
+
1234
+ # backwards compatibility
1235
+ pytype = PyType('none', None, NoneElement)
1236
+ pytype.register()
1237
+
1238
+ # non-registered PyType for inner tree elements
1239
+ cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement)
1240
+
1241
+ _registerPyTypes()
1242
+
1243
+ def getRegisteredTypes():
1244
+ """getRegisteredTypes()
1245
+
1246
+ Returns a list of the currently registered PyType objects.
1247
+
1248
+ To add a new type, retrieve this list and call unregister() for all
1249
+ entries. Then add the new type at a suitable position (possibly replacing
1250
+ an existing one) and call register() for all entries.
1251
+
1252
+ This is necessary if the new type interferes with the type check functions
1253
+ of existing ones (normally only int/float/bool) and must the tried before
1254
+ other types. To add a type that is not yet parsable by the current type
1255
+ check functions, you can simply register() it, which will append it to the
1256
+ end of the type list.
1257
+ """
1258
+ cdef list types = []
1259
+ cdef set known = set()
1260
+ for check, pytype in _TYPE_CHECKS:
1261
+ name = pytype.name
1262
+ if name not in known:
1263
+ known.add(name)
1264
+ types.append(pytype)
1265
+ for pytype in _PYTYPE_DICT.values():
1266
+ name = pytype.name
1267
+ if name not in known:
1268
+ known.add(name)
1269
+ types.append(pytype)
1270
+ return types
1271
+
1272
+ cdef PyType _guessPyType(value, PyType defaulttype):
1273
+ if value is None:
1274
+ return None
1275
+ for type_check, tested_pytype in _TYPE_CHECKS:
1276
+ try:
1277
+ type_check(value)
1278
+ return <PyType>tested_pytype
1279
+ except IGNORABLE_ERRORS:
1280
+ # could not be parsed as the specified type => ignore
1281
+ pass
1282
+ return defaulttype
1283
+
1284
+ cdef object _guessElementClass(tree.xmlNode* c_node):
1285
+ value = textOf(c_node)
1286
+ if value is None:
1287
+ return None
1288
+ if value == '':
1289
+ return StringElement
1290
+
1291
+ for type_check, pytype in _TYPE_CHECKS:
1292
+ try:
1293
+ type_check(value)
1294
+ return (<PyType>pytype)._type
1295
+ except IGNORABLE_ERRORS:
1296
+ pass
1297
+ return None
1298
+
1299
+ ################################################################################
1300
+ # adapted ElementMaker supports registered PyTypes
1301
+
1302
+ @cython.final
1303
+ @cython.internal
1304
+ cdef class _ObjectifyElementMakerCaller:
1305
+ cdef object _tag
1306
+ cdef object _nsmap
1307
+ cdef object _element_factory
1308
+ cdef bint _annotate
1309
+
1310
+ def __call__(self, *children, **attrib):
1311
+ "__call__(self, *children, **attrib)"
1312
+ cdef _ObjectifyElementMakerCaller elementMaker
1313
+ cdef _Element element
1314
+ cdef _Element childElement
1315
+ cdef bint has_children
1316
+ cdef bint has_string_value
1317
+ if self._element_factory is None:
1318
+ element = _makeElement(self._tag, None, attrib, self._nsmap)
1319
+ else:
1320
+ element = self._element_factory(self._tag, attrib, self._nsmap)
1321
+
1322
+ pytype_name = None
1323
+ has_children = False
1324
+ has_string_value = False
1325
+ for child in children:
1326
+ if child is None:
1327
+ if len(children) == 1:
1328
+ cetree.setAttributeValue(
1329
+ element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true")
1330
+ elif python._isString(child):
1331
+ _add_text(element, child)
1332
+ has_string_value = True
1333
+ elif isinstance(child, _Element):
1334
+ cetree.appendChildToElement(element, <_Element>child)
1335
+ has_children = True
1336
+ elif isinstance(child, _ObjectifyElementMakerCaller):
1337
+ elementMaker = <_ObjectifyElementMakerCaller>child
1338
+ if elementMaker._element_factory is None:
1339
+ cetree.makeSubElement(element, elementMaker._tag,
1340
+ None, None, None, None)
1341
+ else:
1342
+ childElement = elementMaker._element_factory(
1343
+ elementMaker._tag)
1344
+ cetree.appendChildToElement(element, childElement)
1345
+ has_children = True
1346
+ elif isinstance(child, dict):
1347
+ for name, value in child.items():
1348
+ # keyword arguments in attrib take precedence
1349
+ if name in attrib:
1350
+ continue
1351
+ pytype = _PYTYPE_DICT.get(_typename(value))
1352
+ if pytype is not None:
1353
+ value = (<PyType>pytype).stringify(value)
1354
+ elif not python._isString(value):
1355
+ value = unicode(value)
1356
+ cetree.setAttributeValue(element, name, value)
1357
+ else:
1358
+ if pytype_name is not None:
1359
+ # concatenation always makes the result a string
1360
+ has_string_value = True
1361
+ pytype_name = _typename(child)
1362
+ pytype = _PYTYPE_DICT.get(_typename(child))
1363
+ if pytype is not None:
1364
+ _add_text(element, (<PyType>pytype).stringify(child))
1365
+ else:
1366
+ has_string_value = True
1367
+ child = unicode(child)
1368
+ _add_text(element, child)
1369
+
1370
+ if self._annotate and not has_children:
1371
+ if has_string_value:
1372
+ cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, "str")
1373
+ elif pytype_name is not None:
1374
+ cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
1375
+
1376
+ return element
1377
+
1378
+ cdef _add_text(_Element elem, text):
1379
+ # add text to the tree in construction, either as element text or
1380
+ # tail text, depending on the current tree state
1381
+ cdef tree.xmlNode* c_child
1382
+ c_child = cetree.findChildBackwards(elem._c_node, 0)
1383
+ if c_child is not NULL:
1384
+ old = cetree.tailOf(c_child)
1385
+ if old is not None:
1386
+ text = old + text
1387
+ cetree.setTailText(c_child, text)
1388
+ else:
1389
+ old = cetree.textOf(elem._c_node)
1390
+ if old is not None:
1391
+ text = old + text
1392
+ cetree.setNodeText(elem._c_node, text)
1393
+
1394
+ cdef class ElementMaker:
1395
+ """ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None)
1396
+
1397
+ An ElementMaker that can be used for constructing trees.
1398
+
1399
+ Example::
1400
+
1401
+ >>> M = ElementMaker(annotate=False)
1402
+ >>> attributes = {'class': 'par'}
1403
+ >>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) )
1404
+
1405
+ >>> from lxml.etree import tostring
1406
+ >>> print(tostring(html, method='html').decode('ascii'))
1407
+ <html><body><p style="font-weight: bold" class="par">hello<br>objectify</p></body></html>
1408
+
1409
+ To create tags that are not valid Python identifiers, call the factory
1410
+ directly and pass the tag name as first argument::
1411
+
1412
+ >>> root = M('tricky-tag', 'some text')
1413
+ >>> print(root.tag)
1414
+ tricky-tag
1415
+ >>> print(root.text)
1416
+ some text
1417
+
1418
+ Note that this module has a predefined ElementMaker instance called ``E``.
1419
+ """
1420
+ cdef object _makeelement
1421
+ cdef object _namespace
1422
+ cdef object _nsmap
1423
+ cdef bint _annotate
1424
+ cdef dict _cache
1425
+ def __init__(self, *, namespace=None, nsmap=None, annotate=True,
1426
+ makeelement=None):
1427
+ if nsmap is None:
1428
+ nsmap = _DEFAULT_NSMAP if annotate else {}
1429
+ self._nsmap = nsmap
1430
+ self._namespace = None if namespace is None else "{%s}" % namespace
1431
+ self._annotate = annotate
1432
+ if makeelement is not None:
1433
+ if not callable(makeelement):
1434
+ raise TypeError(
1435
+ f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}")
1436
+ self._makeelement = makeelement
1437
+ else:
1438
+ self._makeelement = None
1439
+ self._cache = {}
1440
+
1441
+ @cython.final
1442
+ cdef _build_element_maker(self, tag, bint caching):
1443
+ cdef _ObjectifyElementMakerCaller element_maker
1444
+ element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller)
1445
+ if self._namespace is not None and tag[0] != "{":
1446
+ element_maker._tag = self._namespace + tag
1447
+ else:
1448
+ element_maker._tag = tag
1449
+ element_maker._nsmap = self._nsmap
1450
+ element_maker._annotate = self._annotate
1451
+ element_maker._element_factory = self._makeelement
1452
+ if caching:
1453
+ if len(self._cache) > 200:
1454
+ self._cache.clear()
1455
+ self._cache[tag] = element_maker
1456
+ return element_maker
1457
+
1458
+ def __getattr__(self, tag):
1459
+ element_maker = self._cache.get(tag)
1460
+ if element_maker is None:
1461
+ return self._build_element_maker(tag, caching=True)
1462
+ return element_maker
1463
+
1464
+ def __call__(self, tag, *args, **kwargs):
1465
+ element_maker = self._cache.get(tag)
1466
+ if element_maker is None:
1467
+ element_maker = self._build_element_maker(
1468
+ tag, caching=not is_special_method(tag))
1469
+ return element_maker(*args, **kwargs)
1470
+
1471
+ ################################################################################
1472
+ # Recursive element dumping
1473
+
1474
+ cdef bint __RECURSIVE_STR = 0 # default: off
1475
+
1476
+ def enable_recursive_str(on=True):
1477
+ """enable_recursive_str(on=True)
1478
+
1479
+ Enable a recursively generated tree representation for str(element),
1480
+ based on objectify.dump(element).
1481
+ """
1482
+ global __RECURSIVE_STR
1483
+ __RECURSIVE_STR = on
1484
+
1485
+ def dump(_Element element not None):
1486
+ """dump(_Element element not None)
1487
+
1488
+ Return a recursively generated string representation of an element.
1489
+ """
1490
+ return _dump(element, 0)
1491
+
1492
+ cdef object _dump(_Element element, int indent):
1493
+ indentstr = " " * indent
1494
+ if isinstance(element, ObjectifiedDataElement):
1495
+ value = repr(element)
1496
+ else:
1497
+ value = textOf(element._c_node)
1498
+ if value is not None:
1499
+ if not value.strip():
1500
+ value = None
1501
+ else:
1502
+ value = repr(value)
1503
+ result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n"
1504
+ xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS
1505
+ pytype_ns = "{%s}" % PYTYPE_NAMESPACE
1506
+ for name, value in sorted(cetree.iterattributes(element, 3)):
1507
+ if '{' in name:
1508
+ if name == PYTYPE_ATTRIBUTE:
1509
+ if value == TREE_PYTYPE_NAME:
1510
+ continue
1511
+ else:
1512
+ name = name.replace(pytype_ns, 'py:')
1513
+ name = name.replace(xsi_ns, 'xsi:')
1514
+ result += f"{indentstr} * {name} = {value!r}\n"
1515
+
1516
+ indent += 1
1517
+ for child in element.iterchildren():
1518
+ result += _dump(child, indent)
1519
+ if indent == 1:
1520
+ return result[:-1] # strip last '\n'
1521
+ else:
1522
+ return result
1523
+
1524
+
1525
+ ################################################################################
1526
+ # Pickle support for objectified ElementTree
1527
+
1528
+ def __unpickleElementTree(data):
1529
+ return etree.ElementTree(fromstring(data))
1530
+
1531
+ cdef _setupPickle(elementTreeReduceFunction):
1532
+ import copyreg
1533
+ copyreg.pickle(etree._ElementTree,
1534
+ elementTreeReduceFunction, __unpickleElementTree)
1535
+
1536
+ def pickleReduceElementTree(obj):
1537
+ return __unpickleElementTree, (etree.tostring(obj),)
1538
+
1539
+ _setupPickle(pickleReduceElementTree)
1540
+ del pickleReduceElementTree
1541
+
1542
+ ################################################################################
1543
+ # Element class lookup
1544
+
1545
+ cdef class ObjectifyElementClassLookup(ElementClassLookup):
1546
+ """ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None)
1547
+ Element class lookup method that uses the objectify classes.
1548
+ """
1549
+ cdef object empty_data_class
1550
+ cdef object tree_class
1551
+ def __init__(self, tree_class=None, empty_data_class=None):
1552
+ """Lookup mechanism for objectify.
1553
+
1554
+ The default Element classes can be replaced by passing subclasses of
1555
+ ObjectifiedElement and ObjectifiedDataElement as keyword arguments.
1556
+ 'tree_class' defines inner tree classes (defaults to
1557
+ ObjectifiedElement), 'empty_data_class' defines the default class for
1558
+ empty data elements (defaults to StringElement).
1559
+ """
1560
+ self._lookup_function = _lookupElementClass
1561
+ if tree_class is None:
1562
+ tree_class = ObjectifiedElement
1563
+ self.tree_class = tree_class
1564
+ if empty_data_class is None:
1565
+ empty_data_class = StringElement
1566
+ self.empty_data_class = empty_data_class
1567
+
1568
+ cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node):
1569
+ cdef ObjectifyElementClassLookup lookup
1570
+ lookup = <ObjectifyElementClassLookup>state
1571
+ # if element has children => no data class
1572
+ if cetree.hasChild(c_node):
1573
+ return lookup.tree_class
1574
+
1575
+ # if element is defined as xsi:nil, return NoneElement class
1576
+ if "true" == cetree.attributeValueFromNsName(
1577
+ c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil"):
1578
+ return NoneElement
1579
+
1580
+ # check for Python type hint
1581
+ value = cetree.attributeValueFromNsName(
1582
+ c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
1583
+ if value is not None:
1584
+ if value == TREE_PYTYPE_NAME:
1585
+ return lookup.tree_class
1586
+ py_type = <PyType>_PYTYPE_DICT.get(value)
1587
+ if py_type is not None:
1588
+ return py_type._type
1589
+ # unknown 'pyval' => try to figure it out ourself, just go on
1590
+
1591
+ # check for XML Schema type hint
1592
+ value = cetree.attributeValueFromNsName(
1593
+ c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
1594
+
1595
+ if value is not None:
1596
+ schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
1597
+ if schema_type is None and ':' in value:
1598
+ prefix, value = value.split(':', 1)
1599
+ schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
1600
+ if schema_type is not None:
1601
+ return schema_type._type
1602
+
1603
+ # otherwise determine class based on text content type
1604
+ el_class = _guessElementClass(c_node)
1605
+ if el_class is not None:
1606
+ return el_class
1607
+
1608
+ # if element is a root node => default to tree node
1609
+ if c_node.parent is NULL or not tree._isElement(c_node.parent):
1610
+ return lookup.tree_class
1611
+
1612
+ return lookup.empty_data_class
1613
+
1614
+
1615
+ ################################################################################
1616
+ # Type annotations
1617
+
1618
+ cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype):
1619
+ if pytype is None:
1620
+ return None
1621
+ value = textOf(c_node)
1622
+ try:
1623
+ pytype.type_check(value)
1624
+ return pytype
1625
+ except IGNORABLE_ERRORS:
1626
+ # could not be parsed as the specified type => ignore
1627
+ pass
1628
+ return None
1629
+
1630
+ def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False,
1631
+ empty_pytype=None):
1632
+ """pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None)
1633
+
1634
+ Recursively annotates the elements of an XML tree with 'pytype'
1635
+ attributes.
1636
+
1637
+ If the 'ignore_old' keyword argument is True (the default), current 'pytype'
1638
+ attributes will be ignored and replaced. Otherwise, they will be checked
1639
+ and only replaced if they no longer fit the current text value.
1640
+
1641
+ Setting the keyword argument ``ignore_xsi`` to True makes the function
1642
+ additionally ignore existing ``xsi:type`` annotations. The default is to
1643
+ use them as a type hint.
1644
+
1645
+ The default annotation of empty elements can be set with the
1646
+ ``empty_pytype`` keyword argument. The default is not to annotate empty
1647
+ elements. Pass 'str', for example, to make string values the default.
1648
+ """
1649
+ cdef _Element element
1650
+ element = cetree.rootNodeOrRaise(element_or_tree)
1651
+ _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype)
1652
+
1653
+ def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False,
1654
+ empty_type=None):
1655
+ """xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None)
1656
+
1657
+ Recursively annotates the elements of an XML tree with 'xsi:type'
1658
+ attributes.
1659
+
1660
+ If the 'ignore_old' keyword argument is True (the default), current
1661
+ 'xsi:type' attributes will be ignored and replaced. Otherwise, they will be
1662
+ checked and only replaced if they no longer fit the current text value.
1663
+
1664
+ Note that the mapping from Python types to XSI types is usually ambiguous.
1665
+ Currently, only the first XSI type name in the corresponding PyType
1666
+ definition will be used for annotation. Thus, you should consider naming
1667
+ the widest type first if you define additional types.
1668
+
1669
+ Setting the keyword argument ``ignore_pytype`` to True makes the function
1670
+ additionally ignore existing ``pytype`` annotations. The default is to
1671
+ use them as a type hint.
1672
+
1673
+ The default annotation of empty elements can be set with the
1674
+ ``empty_type`` keyword argument. The default is not to annotate empty
1675
+ elements. Pass 'string', for example, to make string values the default.
1676
+ """
1677
+ cdef _Element element
1678
+ element = cetree.rootNodeOrRaise(element_or_tree)
1679
+ _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None)
1680
+
1681
+ def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
1682
+ empty_pytype=None, empty_type=None, annotate_xsi=0,
1683
+ annotate_pytype=1):
1684
+ """annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1)
1685
+
1686
+ Recursively annotates the elements of an XML tree with 'xsi:type'
1687
+ and/or 'py:pytype' attributes.
1688
+
1689
+ If the 'ignore_old' keyword argument is True (the default), current
1690
+ 'py:pytype' attributes will be ignored for the type annotation. Set to False
1691
+ if you want reuse existing 'py:pytype' information (iff appropriate for the
1692
+ element text value).
1693
+
1694
+ If the 'ignore_xsi' keyword argument is False (the default), existing
1695
+ 'xsi:type' attributes will be used for the type annotation, if they fit the
1696
+ element text values.
1697
+
1698
+ Note that the mapping from Python types to XSI types is usually ambiguous.
1699
+ Currently, only the first XSI type name in the corresponding PyType
1700
+ definition will be used for annotation. Thus, you should consider naming
1701
+ the widest type first if you define additional types.
1702
+
1703
+ The default 'py:pytype' annotation of empty elements can be set with the
1704
+ ``empty_pytype`` keyword argument. Pass 'str', for example, to make
1705
+ string values the default.
1706
+
1707
+ The default 'xsi:type' annotation of empty elements can be set with the
1708
+ ``empty_type`` keyword argument. The default is not to annotate empty
1709
+ elements. Pass 'string', for example, to make string values the default.
1710
+
1711
+ The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype'
1712
+ (default: 1) control which kind(s) of annotation to use.
1713
+ """
1714
+ cdef _Element element
1715
+ element = cetree.rootNodeOrRaise(element_or_tree)
1716
+ _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi,
1717
+ ignore_old, empty_type, empty_pytype)
1718
+
1719
+
1720
+ cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype,
1721
+ bint ignore_xsi, bint ignore_pytype,
1722
+ empty_type_name, empty_pytype_name):
1723
+ cdef _Document doc
1724
+ cdef tree.xmlNode* c_node
1725
+ cdef PyType empty_pytype, StrType, NoneType
1726
+
1727
+ if not annotate_xsi and not annotate_pytype:
1728
+ return
1729
+
1730
+ if empty_type_name is not None:
1731
+ if isinstance(empty_type_name, bytes):
1732
+ empty_type_name = (<bytes>empty_type_name).decode("ascii")
1733
+ empty_pytype = <PyType>_SCHEMA_TYPE_DICT.get(empty_type_name)
1734
+ elif empty_pytype_name is not None:
1735
+ if isinstance(empty_pytype_name, bytes):
1736
+ empty_pytype_name = (<bytes>empty_pytype_name).decode("ascii")
1737
+ empty_pytype = <PyType>_PYTYPE_DICT.get(empty_pytype_name)
1738
+ else:
1739
+ empty_pytype = None
1740
+
1741
+ StrType = <PyType>_PYTYPE_DICT.get('str')
1742
+ NoneType = <PyType>_PYTYPE_DICT.get('NoneType')
1743
+
1744
+ doc = element._doc
1745
+ c_node = element._c_node
1746
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
1747
+ if c_node.type == tree.XML_ELEMENT_NODE:
1748
+ _annotate_element(c_node, doc, annotate_xsi, annotate_pytype,
1749
+ ignore_xsi, ignore_pytype,
1750
+ empty_type_name, empty_pytype, StrType, NoneType)
1751
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
1752
+
1753
+ cdef int _annotate_element(tree.xmlNode* c_node, _Document doc,
1754
+ bint annotate_xsi, bint annotate_pytype,
1755
+ bint ignore_xsi, bint ignore_pytype,
1756
+ empty_type_name, PyType empty_pytype,
1757
+ PyType StrType, PyType NoneType) except -1:
1758
+ cdef tree.xmlNs* c_ns
1759
+ cdef PyType pytype = None
1760
+ typename = None
1761
+ istree = 0
1762
+
1763
+ # if element is defined as xsi:nil, represent it as None
1764
+ if cetree.attributeValueFromNsName(
1765
+ c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") == "true":
1766
+ pytype = NoneType
1767
+
1768
+ if pytype is None and not ignore_xsi:
1769
+ # check that old xsi type value is valid
1770
+ typename = cetree.attributeValueFromNsName(
1771
+ c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
1772
+ if typename is not None:
1773
+ pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
1774
+ if pytype is None and ':' in typename:
1775
+ prefix, typename = typename.split(':', 1)
1776
+ pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
1777
+ if pytype is not None and pytype is not StrType:
1778
+ # StrType does not have a typecheck but is the default
1779
+ # anyway, so just accept it if given as type
1780
+ # information
1781
+ pytype = _check_type(c_node, pytype)
1782
+ if pytype is None:
1783
+ typename = None
1784
+
1785
+ if pytype is None and not ignore_pytype:
1786
+ # check that old pytype value is valid
1787
+ old_pytypename = cetree.attributeValueFromNsName(
1788
+ c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
1789
+ if old_pytypename is not None:
1790
+ if old_pytypename == TREE_PYTYPE_NAME:
1791
+ if not cetree.hasChild(c_node):
1792
+ # only case where we should keep it,
1793
+ # everything else is clear enough
1794
+ pytype = TREE_PYTYPE
1795
+ else:
1796
+ if old_pytypename == 'none':
1797
+ # transition from lxml 1.x
1798
+ old_pytypename = "NoneType"
1799
+ pytype = <PyType>_PYTYPE_DICT.get(old_pytypename)
1800
+ if pytype is not None and pytype is not StrType:
1801
+ # StrType does not have a typecheck but is the
1802
+ # default anyway, so just accept it if given as
1803
+ # type information
1804
+ pytype = _check_type(c_node, pytype)
1805
+
1806
+ if pytype is None:
1807
+ # try to guess type
1808
+ if not cetree.hasChild(c_node):
1809
+ # element has no children => data class
1810
+ pytype = _guessPyType(textOf(c_node), StrType)
1811
+ else:
1812
+ istree = 1
1813
+
1814
+ if pytype is None:
1815
+ # use default type for empty elements
1816
+ if cetree.hasText(c_node):
1817
+ pytype = StrType
1818
+ else:
1819
+ pytype = empty_pytype
1820
+ if typename is None:
1821
+ typename = empty_type_name
1822
+
1823
+ if pytype is not None:
1824
+ if typename is None:
1825
+ if not istree:
1826
+ if pytype._schema_types:
1827
+ # pytype->xsi:type is a 1:n mapping
1828
+ # simply take the first
1829
+ typename = pytype._schema_types[0]
1830
+ elif typename not in pytype._schema_types:
1831
+ typename = pytype._schema_types[0]
1832
+
1833
+ if annotate_xsi:
1834
+ if typename is None or istree:
1835
+ cetree.delAttributeFromNsName(
1836
+ c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
1837
+ else:
1838
+ # update or create attribute
1839
+ typename_utf8 = cetree.utf8(typename)
1840
+ c_ns = cetree.findOrBuildNodeNsPrefix(
1841
+ doc, c_node, _XML_SCHEMA_NS, <unsigned char*>'xsd')
1842
+ if c_ns is not NULL:
1843
+ if b':' in typename_utf8:
1844
+ prefix, name = typename_utf8.split(b':', 1)
1845
+ if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0':
1846
+ typename_utf8 = name
1847
+ elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0:
1848
+ typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + name
1849
+ elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0':
1850
+ typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + typename_utf8
1851
+ c_ns = cetree.findOrBuildNodeNsPrefix(
1852
+ doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
1853
+ tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"type", _xcstr(typename_utf8))
1854
+
1855
+ if annotate_pytype:
1856
+ if pytype is None:
1857
+ # delete attribute if it exists
1858
+ cetree.delAttributeFromNsName(
1859
+ c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
1860
+ else:
1861
+ # update or create attribute
1862
+ c_ns = cetree.findOrBuildNodeNsPrefix(
1863
+ doc, c_node, _PYTYPE_NAMESPACE, <unsigned char*>'py')
1864
+ pytype_name = cetree.utf8(pytype.name)
1865
+ tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME,
1866
+ _xcstr(pytype_name))
1867
+ if pytype is NoneType:
1868
+ c_ns = cetree.findOrBuildNodeNsPrefix(
1869
+ doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
1870
+ tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"nil", <unsigned char*>"true")
1871
+
1872
+ return 0
1873
+
1874
+ cdef object _strip_attributes = etree.strip_attributes
1875
+ cdef object _cleanup_namespaces = etree.cleanup_namespaces
1876
+
1877
+ def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True,
1878
+ bint xsi_nil=False, bint cleanup_namespaces=False):
1879
+ """deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False)
1880
+
1881
+ Recursively de-annotate the elements of an XML tree by removing 'py:pytype'
1882
+ and/or 'xsi:type' attributes and/or 'xsi:nil' attributes.
1883
+
1884
+ If the 'pytype' keyword argument is True (the default), 'py:pytype'
1885
+ attributes will be removed. If the 'xsi' keyword argument is True (the
1886
+ default), 'xsi:type' attributes will be removed.
1887
+ If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil'
1888
+ attributes will be removed.
1889
+
1890
+ Note that this does not touch the namespace declarations by
1891
+ default. If you want to remove unused namespace declarations from
1892
+ the tree, pass the option ``cleanup_namespaces=True``.
1893
+ """
1894
+ cdef list attribute_names = []
1895
+
1896
+ if pytype:
1897
+ attribute_names.append(PYTYPE_ATTRIBUTE)
1898
+ if xsi:
1899
+ attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR)
1900
+ if xsi_nil:
1901
+ attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR)
1902
+
1903
+ _strip_attributes(element_or_tree, *attribute_names)
1904
+ if cleanup_namespaces:
1905
+ _cleanup_namespaces(element_or_tree)
1906
+
1907
+ ################################################################################
1908
+ # Module level parser setup
1909
+
1910
+ cdef object __DEFAULT_PARSER
1911
+ __DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True)
1912
+ __DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() )
1913
+
1914
+ cdef object objectify_parser
1915
+ objectify_parser = __DEFAULT_PARSER
1916
+
1917
+ def set_default_parser(new_parser = None):
1918
+ """set_default_parser(new_parser = None)
1919
+
1920
+ Replace the default parser used by objectify's Element() and
1921
+ fromstring() functions.
1922
+
1923
+ The new parser must be an etree.XMLParser.
1924
+
1925
+ Call without arguments to reset to the original parser.
1926
+ """
1927
+ global objectify_parser
1928
+ if new_parser is None:
1929
+ objectify_parser = __DEFAULT_PARSER
1930
+ elif isinstance(new_parser, etree.XMLParser):
1931
+ objectify_parser = new_parser
1932
+ else:
1933
+ raise TypeError, "parser must inherit from lxml.etree.XMLParser"
1934
+
1935
+ def makeparser(**kw):
1936
+ """makeparser(remove_blank_text=True, **kw)
1937
+
1938
+ Create a new XML parser for objectify trees.
1939
+
1940
+ You can pass all keyword arguments that are supported by
1941
+ ``etree.XMLParser()``. Note that this parser defaults to removing
1942
+ blank text. You can disable this by passing the
1943
+ ``remove_blank_text`` boolean keyword option yourself.
1944
+ """
1945
+ if 'remove_blank_text' not in kw:
1946
+ kw['remove_blank_text'] = True
1947
+ parser = etree.XMLParser(**kw)
1948
+ parser.set_element_class_lookup( ObjectifyElementClassLookup() )
1949
+ return parser
1950
+
1951
+ cdef _Element _makeElement(tag, text, attrib, nsmap):
1952
+ return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap)
1953
+
1954
+ ################################################################################
1955
+ # Module level factory functions
1956
+
1957
+ cdef object _fromstring
1958
+ _fromstring = etree.fromstring
1959
+
1960
+ SubElement = etree.SubElement
1961
+
1962
+ def fromstring(xml, parser=None, *, base_url=None):
1963
+ """fromstring(xml, parser=None, base_url=None)
1964
+
1965
+ Objectify specific version of the lxml.etree fromstring() function
1966
+ that uses the objectify parser.
1967
+
1968
+ You can pass a different parser as second argument.
1969
+
1970
+ The ``base_url`` keyword argument allows to set the original base URL of
1971
+ the document to support relative Paths when looking up external entities
1972
+ (DTD, XInclude, ...).
1973
+ """
1974
+ if parser is None:
1975
+ parser = objectify_parser
1976
+ return _fromstring(xml, parser, base_url=base_url)
1977
+
1978
+ def XML(xml, parser=None, *, base_url=None):
1979
+ """XML(xml, parser=None, base_url=None)
1980
+
1981
+ Objectify specific version of the lxml.etree XML() literal factory
1982
+ that uses the objectify parser.
1983
+
1984
+ You can pass a different parser as second argument.
1985
+
1986
+ The ``base_url`` keyword argument allows to set the original base URL of
1987
+ the document to support relative Paths when looking up external entities
1988
+ (DTD, XInclude, ...).
1989
+ """
1990
+ if parser is None:
1991
+ parser = objectify_parser
1992
+ return _fromstring(xml, parser, base_url=base_url)
1993
+
1994
+ cdef object _parse
1995
+ _parse = etree.parse
1996
+
1997
+ def parse(f, parser=None, *, base_url=None):
1998
+ """parse(f, parser=None, base_url=None)
1999
+
2000
+ Parse a file or file-like object with the objectify parser.
2001
+
2002
+ You can pass a different parser as second argument.
2003
+
2004
+ The ``base_url`` keyword allows setting a URL for the document
2005
+ when parsing from a file-like object. This is needed when looking
2006
+ up external entities (DTD, XInclude, ...) with relative paths.
2007
+ """
2008
+ if parser is None:
2009
+ parser = objectify_parser
2010
+ return _parse(f, parser, base_url=base_url)
2011
+
2012
+ cdef dict _DEFAULT_NSMAP = {
2013
+ "py" : PYTYPE_NAMESPACE,
2014
+ "xsi" : XML_SCHEMA_INSTANCE_NS,
2015
+ "xsd" : XML_SCHEMA_NS
2016
+ }
2017
+
2018
+ E = ElementMaker()
2019
+
2020
+ def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes):
2021
+ """Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes)
2022
+
2023
+ Objectify specific version of the lxml.etree Element() factory that
2024
+ always creates a structural (tree) element.
2025
+
2026
+ NOTE: requires parser based element class lookup activated in lxml.etree!
2027
+ """
2028
+ if attrib is not None:
2029
+ if _attributes:
2030
+ attrib = dict(attrib)
2031
+ attrib.update(_attributes)
2032
+ _attributes = attrib
2033
+ if _pytype is None:
2034
+ _pytype = TREE_PYTYPE_NAME
2035
+ if nsmap is None:
2036
+ nsmap = _DEFAULT_NSMAP
2037
+ _attributes[PYTYPE_ATTRIBUTE] = _pytype
2038
+ return _makeElement(_tag, None, _attributes, nsmap)
2039
+
2040
+ def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
2041
+ **_attributes):
2042
+ """DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes)
2043
+
2044
+ Create a new element from a Python value and XML attributes taken from
2045
+ keyword arguments or a dictionary passed as second argument.
2046
+
2047
+ Automatically adds a 'pytype' attribute for the Python type of the value,
2048
+ if the type can be identified. If '_pytype' or '_xsi' are among the
2049
+ keyword arguments, they will be used instead.
2050
+
2051
+ If the _value argument is an ObjectifiedDataElement instance, its py:pytype,
2052
+ xsi:type and other attributes and nsmap are reused unless they are redefined
2053
+ in attrib and/or keyword arguments.
2054
+ """
2055
+ if nsmap is None:
2056
+ nsmap = _DEFAULT_NSMAP
2057
+ if attrib is not None and attrib:
2058
+ if _attributes:
2059
+ attrib = dict(attrib)
2060
+ attrib.update(_attributes)
2061
+ _attributes = attrib
2062
+ if isinstance(_value, ObjectifiedElement):
2063
+ if _pytype is None:
2064
+ if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP:
2065
+ # special case: no change!
2066
+ return _value.__copy__()
2067
+ if isinstance(_value, ObjectifiedDataElement):
2068
+ # reuse existing nsmap unless redefined in nsmap parameter
2069
+ temp = _value.nsmap
2070
+ if temp is not None and temp:
2071
+ temp = dict(temp)
2072
+ temp.update(nsmap)
2073
+ nsmap = temp
2074
+ # reuse existing attributes unless redefined in attrib/_attributes
2075
+ temp = _value.attrib
2076
+ if temp is not None and temp:
2077
+ temp = dict(temp)
2078
+ temp.update(_attributes)
2079
+ _attributes = temp
2080
+ # reuse existing xsi:type or py:pytype attributes, unless provided as
2081
+ # arguments
2082
+ if _xsi is None and _pytype is None:
2083
+ _xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
2084
+ _pytype = _attributes.get(PYTYPE_ATTRIBUTE)
2085
+
2086
+ if _xsi is not None:
2087
+ if ':' in _xsi:
2088
+ prefix, name = _xsi.split(':', 1)
2089
+ ns = nsmap.get(prefix)
2090
+ if ns != XML_SCHEMA_NS:
2091
+ raise ValueError, "XSD types require the XSD namespace"
2092
+ elif nsmap is _DEFAULT_NSMAP:
2093
+ name = _xsi
2094
+ _xsi = 'xsd:' + _xsi
2095
+ else:
2096
+ name = _xsi
2097
+ for prefix, ns in nsmap.items():
2098
+ if ns == XML_SCHEMA_NS:
2099
+ if prefix is not None and prefix:
2100
+ _xsi = prefix + ':' + _xsi
2101
+ break
2102
+ else:
2103
+ raise ValueError, "XSD types require the XSD namespace"
2104
+ _attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi
2105
+ if _pytype is None:
2106
+ # allow using unregistered or even wrong xsi:type names
2107
+ py_type = <PyType>_SCHEMA_TYPE_DICT.get(_xsi)
2108
+ if py_type is None:
2109
+ py_type = <PyType>_SCHEMA_TYPE_DICT.get(name)
2110
+ if py_type is not None:
2111
+ _pytype = py_type.name
2112
+
2113
+ if _pytype is None:
2114
+ _pytype = _pytypename(_value)
2115
+
2116
+ if _value is None and _pytype != "str":
2117
+ _pytype = _pytype or "NoneType"
2118
+ strval = None
2119
+ elif python._isString(_value):
2120
+ strval = _value
2121
+ elif isinstance(_value, bool):
2122
+ if _value:
2123
+ strval = "true"
2124
+ else:
2125
+ strval = "false"
2126
+ else:
2127
+ py_type = <PyType>_PYTYPE_DICT.get(_pytype)
2128
+ stringify = unicode if py_type is None else py_type.stringify
2129
+ strval = stringify(_value)
2130
+
2131
+ if _pytype is not None:
2132
+ if _pytype == "NoneType" or _pytype == "none":
2133
+ strval = None
2134
+ _attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = "true"
2135
+ else:
2136
+ # check if type information from arguments is valid
2137
+ py_type = <PyType>_PYTYPE_DICT.get(_pytype)
2138
+ if py_type is not None:
2139
+ if py_type.type_check is not None:
2140
+ py_type.type_check(strval)
2141
+ _attributes[PYTYPE_ATTRIBUTE] = _pytype
2142
+
2143
+ return _makeElement("value", strval, _attributes, nsmap)
2144
+
2145
+
2146
+ ################################################################################
2147
+ # ObjectPath
2148
+
2149
+ include "objectpath.pxi"
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/parser.pxi ADDED
@@ -0,0 +1,2071 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Parsers for XML and HTML
2
+
3
+ from lxml.includes cimport xmlparser
4
+ from lxml.includes cimport htmlparser
5
+
6
+ cdef object _GenericAlias
7
+ try:
8
+ from types import GenericAlias as _GenericAlias
9
+ except ImportError:
10
+ # Python 3.8 - we only need this as return value from "__class_getitem__"
11
+ def _GenericAlias(cls, item):
12
+ return f"{cls.__name__}[{item.__name__}]"
13
+
14
+
15
+ class ParseError(LxmlSyntaxError):
16
+ """Syntax error while parsing an XML document.
17
+
18
+ For compatibility with ElementTree 1.3 and later.
19
+ """
20
+ def __init__(self, message, code, line, column, filename=None):
21
+ super(_ParseError, self).__init__(message)
22
+ self.lineno, self.offset = (line, column - 1)
23
+ self.code = code
24
+ self.filename = filename
25
+
26
+ @property
27
+ def position(self):
28
+ return self.lineno, self.offset + 1
29
+
30
+ @position.setter
31
+ def position(self, new_pos):
32
+ self.lineno, column = new_pos
33
+ self.offset = column - 1
34
+
35
+ cdef object _ParseError = ParseError
36
+
37
+
38
+ class XMLSyntaxError(ParseError):
39
+ """Syntax error while parsing an XML document.
40
+ """
41
+
42
+ cdef class ParserError(LxmlError):
43
+ """Internal lxml parser error.
44
+ """
45
+
46
+
47
+ @cython.final
48
+ @cython.internal
49
+ cdef class _ParserDictionaryContext:
50
+ # Global parser context to share the string dictionary.
51
+ #
52
+ # This class is a delegate singleton!
53
+ #
54
+ # It creates _ParserDictionaryContext objects for each thread to keep thread state,
55
+ # but those must never be used directly. Always stick to using the static
56
+ # __GLOBAL_PARSER_CONTEXT as defined below the class.
57
+ #
58
+
59
+ cdef tree.xmlDict* _c_dict
60
+ cdef _BaseParser _default_parser
61
+ cdef list _implied_parser_contexts
62
+
63
+ def __cinit__(self):
64
+ self._implied_parser_contexts = []
65
+
66
+ def __dealloc__(self):
67
+ if self._c_dict is not NULL:
68
+ xmlparser.xmlDictFree(self._c_dict)
69
+
70
+ cdef int initMainParserContext(self) except -1:
71
+ """Put the global context into the thread dictionary of the main
72
+ thread. To be called once and only in the main thread."""
73
+ thread_dict = python.PyThreadState_GetDict()
74
+ if thread_dict is not NULL:
75
+ (<dict>thread_dict)["_ParserDictionaryContext"] = self
76
+
77
+ cdef _ParserDictionaryContext _findThreadParserContext(self):
78
+ "Find (or create) the _ParserDictionaryContext object for the current thread"
79
+ cdef _ParserDictionaryContext context
80
+ thread_dict = python.PyThreadState_GetDict()
81
+ if thread_dict is NULL:
82
+ return self
83
+ d = <dict>thread_dict
84
+ result = python.PyDict_GetItem(d, "_ParserDictionaryContext")
85
+ if result is not NULL:
86
+ return <object>result
87
+ context = <_ParserDictionaryContext>_ParserDictionaryContext.__new__(_ParserDictionaryContext)
88
+ d["_ParserDictionaryContext"] = context
89
+ return context
90
+
91
+ cdef int setDefaultParser(self, _BaseParser parser) except -1:
92
+ "Set the default parser for the current thread"
93
+ cdef _ParserDictionaryContext context
94
+ context = self._findThreadParserContext()
95
+ context._default_parser = parser
96
+
97
+ cdef _BaseParser getDefaultParser(self):
98
+ "Return (or create) the default parser of the current thread"
99
+ cdef _ParserDictionaryContext context
100
+ context = self._findThreadParserContext()
101
+ if context._default_parser is None:
102
+ if self._default_parser is None:
103
+ self._default_parser = __DEFAULT_XML_PARSER._copy()
104
+ if context is not self:
105
+ context._default_parser = self._default_parser._copy()
106
+ return context._default_parser
107
+
108
+ cdef tree.xmlDict* _getThreadDict(self, tree.xmlDict* default):
109
+ "Return the thread-local dict or create a new one if necessary."
110
+ cdef _ParserDictionaryContext context
111
+ context = self._findThreadParserContext()
112
+ if context._c_dict is NULL:
113
+ # thread dict not yet set up => use default or create a new one
114
+ if default is not NULL:
115
+ context._c_dict = default
116
+ xmlparser.xmlDictReference(default)
117
+ return default
118
+ if self._c_dict is NULL:
119
+ self._c_dict = xmlparser.xmlDictCreate()
120
+ if context is not self:
121
+ context._c_dict = xmlparser.xmlDictCreateSub(self._c_dict)
122
+ return context._c_dict
123
+
124
+ cdef int initThreadDictRef(self, tree.xmlDict** c_dict_ref) except -1:
125
+ c_dict = c_dict_ref[0]
126
+ c_thread_dict = self._getThreadDict(c_dict)
127
+ if c_dict is c_thread_dict:
128
+ return 0
129
+ if c_dict is not NULL:
130
+ xmlparser.xmlDictFree(c_dict)
131
+ c_dict_ref[0] = c_thread_dict
132
+ xmlparser.xmlDictReference(c_thread_dict)
133
+
134
+ cdef int initParserDict(self, xmlparser.xmlParserCtxt* pctxt) except -1:
135
+ "Assure we always use the same string dictionary."
136
+ self.initThreadDictRef(&pctxt.dict)
137
+ pctxt.dictNames = 1
138
+
139
+ cdef int initXPathParserDict(self, xpath.xmlXPathContext* pctxt) except -1:
140
+ "Assure we always use the same string dictionary."
141
+ self.initThreadDictRef(&pctxt.dict)
142
+
143
+ cdef int initDocDict(self, xmlDoc* result) except -1:
144
+ "Store dict of last object parsed if no shared dict yet"
145
+ # XXX We also free the result dict here if there already was one.
146
+ # This case should only occur for new documents with empty dicts,
147
+ # otherwise we'd free data that's in use => segfault
148
+ self.initThreadDictRef(&result.dict)
149
+
150
+ cdef _ParserContext findImpliedContext(self):
151
+ """Return any current implied xml parser context for the current
152
+ thread. This is used when the resolver functions are called
153
+ with an xmlParserCtxt that was generated from within libxml2
154
+ (i.e. without a _ParserContext) - which happens when parsing
155
+ schema and xinclude external references."""
156
+ cdef _ParserDictionaryContext context
157
+ cdef _ParserContext implied_context
158
+
159
+ # see if we have a current implied parser
160
+ context = self._findThreadParserContext()
161
+ if context._implied_parser_contexts:
162
+ implied_context = context._implied_parser_contexts[-1]
163
+ return implied_context
164
+ return None
165
+
166
+ cdef int pushImpliedContextFromParser(self, _BaseParser parser) except -1:
167
+ "Push a new implied context object taken from the parser."
168
+ if parser is not None:
169
+ self.pushImpliedContext(parser._getParserContext())
170
+ else:
171
+ self.pushImpliedContext(None)
172
+
173
+ cdef int pushImpliedContext(self, _ParserContext parser_context) except -1:
174
+ "Push a new implied context object."
175
+ cdef _ParserDictionaryContext context
176
+ context = self._findThreadParserContext()
177
+ context._implied_parser_contexts.append(parser_context)
178
+
179
+ cdef int popImpliedContext(self) except -1:
180
+ "Pop the current implied context object."
181
+ cdef _ParserDictionaryContext context
182
+ context = self._findThreadParserContext()
183
+ context._implied_parser_contexts.pop()
184
+
185
+ cdef _ParserDictionaryContext __GLOBAL_PARSER_CONTEXT = _ParserDictionaryContext()
186
+ __GLOBAL_PARSER_CONTEXT.initMainParserContext()
187
+
188
+ ############################################################
189
+ ## support for Python unicode I/O
190
+ ############################################################
191
+
192
+ # name of Python Py_UNICODE encoding as known to libxml2
193
+ cdef const_char* _PY_UNICODE_ENCODING = NULL
194
+
195
+ cdef int _setupPythonUnicode() except -1:
196
+ """Sets _PY_UNICODE_ENCODING to the internal encoding name of Python unicode
197
+ strings if libxml2 supports reading native Python unicode. This depends
198
+ on iconv and the local Python installation, so we simply check if we find
199
+ a matching encoding handler.
200
+ """
201
+ cdef tree.xmlCharEncodingHandler* enchandler
202
+ cdef Py_ssize_t l
203
+ cdef const_char* enc
204
+ cdef Py_UNICODE *uchars = [c'<', c't', c'e', c's', c't', c'/', c'>']
205
+ cdef const_xmlChar* buffer = <const_xmlChar*>uchars
206
+ # apparently, libxml2 can't detect UTF-16 on some systems
207
+ if (buffer[0] == c'<' and buffer[1] == c'\0' and
208
+ buffer[2] == c't' and buffer[3] == c'\0'):
209
+ enc = "UTF-16LE"
210
+ elif (buffer[0] == c'\0' and buffer[1] == c'<' and
211
+ buffer[2] == c'\0' and buffer[3] == c't'):
212
+ enc = "UTF-16BE"
213
+ else:
214
+ # let libxml2 give it a try
215
+ enc = _findEncodingName(buffer, sizeof(Py_UNICODE) * 7)
216
+ if enc is NULL:
217
+ # not my fault, it's YOUR broken system :)
218
+ return 0
219
+ enchandler = tree.xmlFindCharEncodingHandler(enc)
220
+ if enchandler is not NULL:
221
+ global _PY_UNICODE_ENCODING
222
+ tree.xmlCharEncCloseFunc(enchandler)
223
+ _PY_UNICODE_ENCODING = enc
224
+ return 0
225
+
226
+ cdef const_char* _findEncodingName(const_xmlChar* buffer, int size):
227
+ "Work around bug in libxml2: find iconv name of encoding on our own."
228
+ cdef tree.xmlCharEncoding enc
229
+ enc = tree.xmlDetectCharEncoding(buffer, size)
230
+ if enc == tree.XML_CHAR_ENCODING_UTF16LE:
231
+ if size >= 4 and (buffer[0] == <const_xmlChar> b'\xFF' and
232
+ buffer[1] == <const_xmlChar> b'\xFE' and
233
+ buffer[2] == 0 and buffer[3] == 0):
234
+ return "UTF-32LE" # according to BOM
235
+ else:
236
+ return "UTF-16LE"
237
+ elif enc == tree.XML_CHAR_ENCODING_UTF16BE:
238
+ return "UTF-16BE"
239
+ elif enc == tree.XML_CHAR_ENCODING_UCS4LE:
240
+ return "UCS-4LE"
241
+ elif enc == tree.XML_CHAR_ENCODING_UCS4BE:
242
+ return "UCS-4BE"
243
+ elif enc == tree.XML_CHAR_ENCODING_NONE:
244
+ return NULL
245
+ else:
246
+ # returns a constant char*, no need to free it
247
+ return tree.xmlGetCharEncodingName(enc)
248
+
249
+ # Python 3.12 removed support for "Py_UNICODE".
250
+ if python.PY_VERSION_HEX < 0x030C0000:
251
+ _setupPythonUnicode()
252
+
253
+
254
+ cdef unicode _find_PyUCS4EncodingName():
255
+ """
256
+ Find a suitable encoding for Py_UCS4 PyUnicode strings in libxml2.
257
+ """
258
+ ustring = "<xml>\U0001F92A</xml>"
259
+ cdef const xmlChar* buffer = <const xmlChar*> python.PyUnicode_DATA(ustring)
260
+ cdef Py_ssize_t py_buffer_len = python.PyUnicode_GET_LENGTH(ustring)
261
+
262
+ encoding_name = ''
263
+ cdef tree.xmlCharEncoding enc = tree.xmlDetectCharEncoding(buffer, py_buffer_len)
264
+ enchandler = tree.xmlGetCharEncodingHandler(enc)
265
+ if enchandler is not NULL:
266
+ try:
267
+ if enchandler.name:
268
+ encoding_name = enchandler.name.decode('UTF-8')
269
+ finally:
270
+ tree.xmlCharEncCloseFunc(enchandler)
271
+ else:
272
+ c_name = tree.xmlGetCharEncodingName(enc)
273
+ if c_name:
274
+ encoding_name = c_name.decode('UTF-8')
275
+
276
+
277
+ if encoding_name and not encoding_name.endswith('LE') and not encoding_name.endswith('BE'):
278
+ encoding_name += 'BE' if python.PY_BIG_ENDIAN else 'LE'
279
+ return encoding_name or None
280
+
281
+ _pyucs4_encoding_name = _find_PyUCS4EncodingName()
282
+
283
+
284
+ ############################################################
285
+ ## support for file-like objects
286
+ ############################################################
287
+
288
+ @cython.final
289
+ @cython.internal
290
+ cdef class _FileReaderContext:
291
+ cdef object _filelike
292
+ cdef object _encoding
293
+ cdef object _url
294
+ cdef object _bytes
295
+ cdef _ExceptionContext _exc_context
296
+ cdef Py_ssize_t _bytes_read
297
+ cdef char* _c_url
298
+ cdef bint _close_file_after_read
299
+
300
+ def __cinit__(self, filelike, exc_context not None, url, encoding=None, bint close_file=False):
301
+ self._exc_context = exc_context
302
+ self._filelike = filelike
303
+ self._close_file_after_read = close_file
304
+ self._encoding = encoding
305
+ if url is not None:
306
+ url = _encodeFilename(url)
307
+ self._c_url = _cstr(url)
308
+ self._url = url
309
+ self._bytes = b''
310
+ self._bytes_read = 0
311
+
312
+ cdef _close_file(self):
313
+ if self._filelike is None or not self._close_file_after_read:
314
+ return
315
+ try:
316
+ close = self._filelike.close
317
+ except AttributeError:
318
+ close = None
319
+ finally:
320
+ self._filelike = None
321
+ if close is not None:
322
+ close()
323
+
324
+ cdef xmlparser.xmlParserInputBuffer* _createParserInputBuffer(self) noexcept:
325
+ cdef xmlparser.xmlParserInputBuffer* c_buffer = xmlparser.xmlAllocParserInputBuffer(0)
326
+ if c_buffer:
327
+ c_buffer.readcallback = _readFilelikeParser
328
+ c_buffer.context = <python.PyObject*> self
329
+ return c_buffer
330
+
331
+ cdef xmlparser.xmlParserInput* _createParserInput(
332
+ self, xmlparser.xmlParserCtxt* ctxt) noexcept:
333
+ cdef xmlparser.xmlParserInputBuffer* c_buffer = self._createParserInputBuffer()
334
+ if not c_buffer:
335
+ return NULL
336
+ return xmlparser.xmlNewIOInputStream(ctxt, c_buffer, 0)
337
+
338
+ cdef tree.xmlDtd* _readDtd(self) noexcept:
339
+ cdef xmlparser.xmlParserInputBuffer* c_buffer = self._createParserInputBuffer()
340
+ if not c_buffer:
341
+ return NULL
342
+ with nogil:
343
+ return xmlparser.xmlIOParseDTD(NULL, c_buffer, 0)
344
+
345
+ cdef xmlDoc* _readDoc(self, xmlparser.xmlParserCtxt* ctxt, int options) noexcept:
346
+ cdef xmlDoc* result
347
+ cdef void* c_callback_context = <python.PyObject*> self
348
+ cdef char* c_encoding = _cstr(self._encoding) if self._encoding is not None else NULL
349
+
350
+ orig_options = ctxt.options
351
+ with nogil:
352
+ if ctxt.html:
353
+ result = htmlparser.htmlCtxtReadIO(
354
+ ctxt, _readFilelikeParser, NULL, c_callback_context,
355
+ self._c_url, c_encoding, options)
356
+ if result is not NULL:
357
+ if _fixHtmlDictNames(ctxt.dict, result) < 0:
358
+ tree.xmlFreeDoc(result)
359
+ result = NULL
360
+ else:
361
+ result = xmlparser.xmlCtxtReadIO(
362
+ ctxt, _readFilelikeParser, NULL, c_callback_context,
363
+ self._c_url, c_encoding, options)
364
+ ctxt.options = orig_options # work around libxml2 problem
365
+
366
+ try:
367
+ self._close_file()
368
+ except:
369
+ self._exc_context._store_raised()
370
+ finally:
371
+ return result # swallow any exceptions
372
+
373
+ cdef int copyToBuffer(self, char* c_buffer, int c_requested) noexcept:
374
+ cdef int c_byte_count = 0
375
+ cdef char* c_start
376
+ cdef Py_ssize_t byte_count, remaining
377
+ if self._bytes_read < 0:
378
+ return 0
379
+ try:
380
+ byte_count = python.PyBytes_GET_SIZE(self._bytes)
381
+ remaining = byte_count - self._bytes_read
382
+ while c_requested > remaining:
383
+ c_start = _cstr(self._bytes) + self._bytes_read
384
+ cstring_h.memcpy(c_buffer, c_start, remaining)
385
+ c_byte_count += remaining
386
+ c_buffer += remaining
387
+ c_requested -= remaining
388
+
389
+ self._bytes = self._filelike.read(c_requested)
390
+ if not isinstance(self._bytes, bytes):
391
+ if isinstance(self._bytes, unicode):
392
+ if self._encoding is None:
393
+ self._bytes = (<unicode>self._bytes).encode('utf8')
394
+ else:
395
+ self._bytes = python.PyUnicode_AsEncodedString(
396
+ self._bytes, _cstr(self._encoding), NULL)
397
+ else:
398
+ self._close_file()
399
+ raise TypeError, \
400
+ "reading from file-like objects must return byte strings or unicode strings"
401
+
402
+ remaining = python.PyBytes_GET_SIZE(self._bytes)
403
+ if remaining == 0:
404
+ self._bytes_read = -1
405
+ self._close_file()
406
+ return c_byte_count
407
+ self._bytes_read = 0
408
+
409
+ if c_requested > 0:
410
+ c_start = _cstr(self._bytes) + self._bytes_read
411
+ cstring_h.memcpy(c_buffer, c_start, c_requested)
412
+ c_byte_count += c_requested
413
+ self._bytes_read += c_requested
414
+ except:
415
+ c_byte_count = -1
416
+ self._exc_context._store_raised()
417
+ try:
418
+ self._close_file()
419
+ except:
420
+ self._exc_context._store_raised()
421
+ finally:
422
+ return c_byte_count # swallow any exceptions
423
+
424
+ cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) noexcept with gil:
425
+ return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size)
426
+
427
+
428
+ ############################################################
429
+ ## support for custom document loaders
430
+ ############################################################
431
+
432
+ cdef xmlparser.xmlParserInput* _local_resolver(const_char* c_url, const_char* c_pubid,
433
+ xmlparser.xmlParserCtxt* c_context) noexcept with gil:
434
+ cdef _ResolverContext context
435
+ cdef xmlparser.xmlParserInput* c_input
436
+ cdef _InputDocument doc_ref
437
+ cdef _FileReaderContext file_context
438
+ # if there is no _ParserContext associated with the xmlParserCtxt
439
+ # passed, check to see if the thread state object has an implied
440
+ # context.
441
+ if c_context._private is not NULL:
442
+ context = <_ResolverContext>c_context._private
443
+ else:
444
+ context = __GLOBAL_PARSER_CONTEXT.findImpliedContext()
445
+
446
+ if context is None:
447
+ if __DEFAULT_ENTITY_LOADER is NULL:
448
+ return NULL
449
+ with nogil:
450
+ # free the GIL as we might do serious I/O here (e.g. HTTP)
451
+ c_input = __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context)
452
+ return c_input
453
+
454
+ try:
455
+ if c_url is NULL:
456
+ url = None
457
+ else:
458
+ # parsing a related document (DTD etc.) => UTF-8 encoded URL?
459
+ url = _decodeFilename(<const_xmlChar*>c_url)
460
+ if c_pubid is NULL:
461
+ pubid = None
462
+ else:
463
+ pubid = funicode(<const_xmlChar*>c_pubid) # always UTF-8
464
+
465
+ doc_ref = context._resolvers.resolve(url, pubid, context)
466
+ except:
467
+ context._store_raised()
468
+ return NULL
469
+
470
+ if doc_ref is not None:
471
+ if doc_ref._type == PARSER_DATA_STRING:
472
+ data = doc_ref._data_bytes
473
+ filename = doc_ref._filename
474
+ if not filename:
475
+ filename = None
476
+ elif not isinstance(filename, bytes):
477
+ # most likely a text URL
478
+ filename = filename.encode('utf8')
479
+ if not isinstance(filename, bytes):
480
+ filename = None
481
+
482
+ if tree.LIBXML_VERSION >= 21400:
483
+ c_filename = <char *>tree.xmlStrdup(_xcstr(filename)) if filename is not None else NULL
484
+ c_input = xmlparser.xmlNewInputFromMemory(
485
+ c_filename, _xcstr(data), <size_t> python.PyBytes_GET_SIZE(data), 0)
486
+ else:
487
+ c_input = xmlparser.xmlNewInputStream(c_context)
488
+ if c_input is not NULL:
489
+ if filename is not None:
490
+ c_input.filename = <char *>tree.xmlStrdup(_xcstr(filename))
491
+ c_input.base = _xcstr(data)
492
+ c_input.length = python.PyBytes_GET_SIZE(data)
493
+ c_input.cur = c_input.base
494
+ c_input.end = c_input.base + c_input.length
495
+ elif doc_ref._type == PARSER_DATA_FILENAME:
496
+ data = None
497
+ c_filename = _cstr(doc_ref._filename)
498
+ with nogil:
499
+ # free the GIL as we might do serious I/O here
500
+ c_input = xmlparser.xmlNewInputFromFile(
501
+ c_context, c_filename)
502
+ elif doc_ref._type == PARSER_DATA_FILE:
503
+ file_context = _FileReaderContext(doc_ref._file, context, url,
504
+ None, doc_ref._close_file)
505
+ c_input = file_context._createParserInput(c_context)
506
+ data = file_context
507
+ else:
508
+ data = None
509
+ c_input = NULL
510
+
511
+ if data is not None:
512
+ context._storage.add(data)
513
+ if c_input is not NULL:
514
+ return c_input
515
+
516
+ if __DEFAULT_ENTITY_LOADER is NULL:
517
+ return NULL
518
+
519
+ with nogil:
520
+ # free the GIL as we might do serious I/O here (e.g. HTTP)
521
+ c_input = __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context)
522
+ return c_input
523
+
524
+ cdef xmlparser.xmlExternalEntityLoader __DEFAULT_ENTITY_LOADER
525
+ __DEFAULT_ENTITY_LOADER = xmlparser.xmlGetExternalEntityLoader()
526
+
527
+
528
+ cdef xmlparser.xmlExternalEntityLoader _register_document_loader() noexcept nogil:
529
+ cdef xmlparser.xmlExternalEntityLoader old = xmlparser.xmlGetExternalEntityLoader()
530
+ xmlparser.xmlSetExternalEntityLoader(<xmlparser.xmlExternalEntityLoader>_local_resolver)
531
+ return old
532
+
533
+ cdef void _reset_document_loader(xmlparser.xmlExternalEntityLoader old) noexcept nogil:
534
+ xmlparser.xmlSetExternalEntityLoader(old)
535
+
536
+
537
+ ############################################################
538
+ ## Parsers
539
+ ############################################################
540
+
541
+ @cython.no_gc_clear # May have to call "self._validator.disconnect()" on dealloc.
542
+ @cython.internal
543
+ cdef class _ParserContext(_ResolverContext):
544
+ cdef _ErrorLog _error_log
545
+ cdef _ParserSchemaValidationContext _validator
546
+ cdef xmlparser.xmlParserCtxt* _c_ctxt
547
+ cdef xmlparser.xmlExternalEntityLoader _orig_loader
548
+ cdef python.PyThread_type_lock _lock
549
+ cdef _Document _doc
550
+ cdef bint _collect_ids
551
+
552
+ def __cinit__(self):
553
+ self._collect_ids = True
554
+ if config.ENABLE_THREADING:
555
+ self._lock = python.PyThread_allocate_lock()
556
+ self._error_log = _ErrorLog()
557
+
558
+ def __dealloc__(self):
559
+ if config.ENABLE_THREADING and self._lock is not NULL:
560
+ python.PyThread_free_lock(self._lock)
561
+ self._lock = NULL
562
+ if self._c_ctxt is not NULL:
563
+ if <void*>self._validator is not NULL and self._validator is not None:
564
+ # If the parser was not closed correctly (e.g. interrupted iterparse()),
565
+ # and the schema validator wasn't freed and cleaned up yet, the libxml2 SAX
566
+ # validator plug might still be in place, which will make xmlFreeParserCtxt()
567
+ # crash when trying to xmlFree() a static SAX handler.
568
+ # Thus, make sure we disconnect the handler interceptor here at the latest.
569
+ self._validator.disconnect()
570
+ xmlparser.xmlFreeParserCtxt(self._c_ctxt)
571
+
572
+ cdef _ParserContext _copy(self):
573
+ cdef _ParserContext context
574
+ context = self.__class__()
575
+ context._collect_ids = self._collect_ids
576
+ context._validator = self._validator.copy()
577
+ _initParserContext(context, self._resolvers._copy(), NULL)
578
+ return context
579
+
580
+ cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
581
+ """
582
+ Connects the libxml2-level context to the lxml-level parser context.
583
+ """
584
+ self._c_ctxt = c_ctxt
585
+ c_ctxt._private = <void*>self
586
+
587
+ cdef void _resetParserContext(self) noexcept:
588
+ if self._c_ctxt is not NULL:
589
+ if self._c_ctxt.html:
590
+ htmlparser.htmlCtxtReset(self._c_ctxt)
591
+ self._c_ctxt.disableSAX = 0 # work around bug in libxml2
592
+ else:
593
+ xmlparser.xmlClearParserCtxt(self._c_ctxt)
594
+ # work around bug in libxml2 [2.9.10 .. 2.9.14]:
595
+ # https://gitlab.gnome.org/GNOME/libxml2/-/issues/378
596
+ self._c_ctxt.nsNr = 0
597
+
598
+ cdef int prepare(self, bint set_document_loader=True) except -1:
599
+ cdef int result
600
+ if config.ENABLE_THREADING and self._lock is not NULL:
601
+ with nogil:
602
+ result = python.PyThread_acquire_lock(
603
+ self._lock, python.WAIT_LOCK)
604
+ if result == 0:
605
+ raise ParserError, "parser locking failed"
606
+ self._error_log.clear()
607
+ self._doc = None
608
+ # Connect the lxml error log with libxml2's error handling. In the case of parsing
609
+ # HTML, ctxt->sax is not set to null, so this always works. The libxml2 function
610
+ # that does this is htmlInitParserCtxt in HTMLparser.c. For HTML (and possibly XML
611
+ # too), libxml2's SAX's serror is set to be the place where errors are sent when
612
+ # schannel is set to ctxt->sax->serror in xmlCtxtErrMemory in libxml2's
613
+ # parserInternals.c.
614
+ # Need a cast here because older libxml2 releases do not use 'const' in the functype.
615
+ self._c_ctxt.sax.serror = <xmlerror.xmlStructuredErrorFunc> _receiveParserError
616
+ self._orig_loader = _register_document_loader() if set_document_loader else NULL
617
+ if self._validator is not None:
618
+ self._validator.connect(self._c_ctxt, self._error_log)
619
+ return 0
620
+
621
+ cdef int cleanup(self) except -1:
622
+ if self._orig_loader is not NULL:
623
+ _reset_document_loader(self._orig_loader)
624
+ try:
625
+ if self._validator is not None:
626
+ self._validator.disconnect()
627
+ self._resetParserContext()
628
+ self.clear()
629
+ self._doc = None
630
+ self._c_ctxt.sax.serror = NULL
631
+ finally:
632
+ if config.ENABLE_THREADING and self._lock is not NULL:
633
+ python.PyThread_release_lock(self._lock)
634
+ return 0
635
+
636
+ cdef object _handleParseResult(self, _BaseParser parser,
637
+ xmlDoc* result, filename):
638
+ c_doc = self._handleParseResultDoc(parser, result, filename)
639
+ if self._doc is not None and self._doc._c_doc is c_doc:
640
+ return self._doc
641
+ else:
642
+ return _documentFactory(c_doc, parser)
643
+
644
+ cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser,
645
+ xmlDoc* result, filename) except NULL:
646
+ recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
647
+ return _handleParseResult(self, self._c_ctxt, result,
648
+ filename, recover,
649
+ free_doc=self._doc is None)
650
+
651
+ cdef _initParserContext(_ParserContext context,
652
+ _ResolverRegistry resolvers,
653
+ xmlparser.xmlParserCtxt* c_ctxt):
654
+ _initResolverContext(context, resolvers)
655
+ if c_ctxt is not NULL:
656
+ context._initParserContext(c_ctxt)
657
+
658
+ cdef void _forwardParserError(xmlparser.xmlParserCtxt* _parser_context, const xmlerror.xmlError* error) noexcept with gil:
659
+ """
660
+ Add an error created by libxml2 to the lxml-level error_log.
661
+ """
662
+ (<_ParserContext>_parser_context._private)._error_log._receive(error)
663
+
664
+ cdef void _receiveParserError(void* c_context, const xmlerror.xmlError* error) noexcept nogil:
665
+ if __DEBUG:
666
+ if c_context is NULL or (<xmlparser.xmlParserCtxt*>c_context)._private is NULL:
667
+ _forwardError(NULL, error)
668
+ else:
669
+ _forwardParserError(<xmlparser.xmlParserCtxt*>c_context, error)
670
+
671
+ cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename,
672
+ _ErrorLog error_log) except -1:
673
+ if filename is not None and \
674
+ ctxt.lastError.domain == xmlerror.XML_FROM_IO:
675
+ if isinstance(filename, bytes):
676
+ filename = _decodeFilenameWithLength(
677
+ <bytes>filename, len(<bytes>filename))
678
+ if ctxt.lastError.message is not NULL:
679
+ try:
680
+ message = ctxt.lastError.message.decode('utf-8')
681
+ except UnicodeDecodeError:
682
+ # the filename may be in there => play it safe
683
+ message = ctxt.lastError.message.decode('iso8859-1')
684
+ message = f"Error reading file '{filename}': {message.strip()}"
685
+ else:
686
+ message = f"Error reading '{filename}'"
687
+ raise IOError, message
688
+ elif error_log:
689
+ raise error_log._buildParseException(
690
+ XMLSyntaxError, "Document is not well formed")
691
+ elif ctxt.lastError.message is not NULL:
692
+ message = ctxt.lastError.message.strip()
693
+ code = ctxt.lastError.code
694
+ line = ctxt.lastError.line
695
+ column = ctxt.lastError.int2
696
+ if ctxt.lastError.line > 0:
697
+ message = f"line {line}: {message}"
698
+ raise XMLSyntaxError(message, code, line, column, filename)
699
+ else:
700
+ raise XMLSyntaxError(None, xmlerror.XML_ERR_INTERNAL_ERROR, 0, 0,
701
+ filename)
702
+
703
+ cdef xmlDoc* _handleParseResult(_ParserContext context,
704
+ xmlparser.xmlParserCtxt* c_ctxt,
705
+ xmlDoc* result, filename,
706
+ bint recover, bint free_doc) except NULL:
707
+ # The C-level argument xmlDoc* result is passed in as NULL if the parser was not able
708
+ # to parse the document.
709
+ cdef bint well_formed
710
+ if result is not NULL:
711
+ __GLOBAL_PARSER_CONTEXT.initDocDict(result)
712
+
713
+ if c_ctxt.myDoc is not NULL:
714
+ if c_ctxt.myDoc is not result:
715
+ __GLOBAL_PARSER_CONTEXT.initDocDict(c_ctxt.myDoc)
716
+ tree.xmlFreeDoc(c_ctxt.myDoc)
717
+ c_ctxt.myDoc = NULL
718
+
719
+ if result is not NULL:
720
+ # "wellFormed" in libxml2 is 0 if the parser found fatal errors. It still returns a
721
+ # parse result document if 'recover=True'. Here, we determine if we can present
722
+ # the document to the user or consider it incorrect or broken enough to raise an error.
723
+ if (context._validator is not None and
724
+ not context._validator.isvalid()):
725
+ well_formed = 0 # actually not 'valid', but anyway ...
726
+ elif (not c_ctxt.wellFormed and not c_ctxt.html and
727
+ c_ctxt.charset == tree.XML_CHAR_ENCODING_8859_1 and
728
+ [1 for error in context._error_log
729
+ if error.type == ErrorTypes.ERR_INVALID_CHAR]):
730
+ # An encoding error occurred and libxml2 switched from UTF-8
731
+ # input to (undecoded) Latin-1, at some arbitrary point in the
732
+ # document. Better raise an error than allowing for a broken
733
+ # tree with mixed encodings. This is fixed in libxml2 2.12.
734
+ well_formed = 0
735
+ elif recover or (c_ctxt.wellFormed and
736
+ c_ctxt.lastError.level < xmlerror.XML_ERR_ERROR):
737
+ well_formed = 1
738
+ elif not c_ctxt.replaceEntities and not c_ctxt.validate \
739
+ and context is not None:
740
+ # in this mode, we ignore errors about undefined entities
741
+ for error in context._error_log.filter_from_errors():
742
+ if error.type != ErrorTypes.WAR_UNDECLARED_ENTITY and \
743
+ error.type != ErrorTypes.ERR_UNDECLARED_ENTITY:
744
+ well_formed = 0
745
+ break
746
+ else:
747
+ well_formed = 1
748
+ else:
749
+ well_formed = 0
750
+
751
+ if not well_formed:
752
+ if free_doc:
753
+ tree.xmlFreeDoc(result)
754
+ result = NULL
755
+
756
+ if context is not None and context._has_raised():
757
+ if result is not NULL:
758
+ if free_doc:
759
+ tree.xmlFreeDoc(result)
760
+ result = NULL
761
+ context._raise_if_stored()
762
+
763
+ if result is NULL:
764
+ if context is not None:
765
+ _raiseParseError(c_ctxt, filename, context._error_log)
766
+ else:
767
+ _raiseParseError(c_ctxt, filename, None)
768
+ else:
769
+ if result.URL is NULL and filename is not None:
770
+ result.URL = tree.xmlStrdup(_xcstr(filename))
771
+ if result.encoding is NULL:
772
+ result.encoding = tree.xmlStrdup(<unsigned char*>"UTF-8")
773
+
774
+ if context._validator is not None and \
775
+ context._validator._add_default_attributes:
776
+ # we currently need to do this here as libxml2 does not
777
+ # support inserting default attributes during parse-time
778
+ # validation
779
+ context._validator.inject_default_attributes(result)
780
+
781
+ return result
782
+
783
+ cdef int _fixHtmlDictNames(tree.xmlDict* c_dict, xmlDoc* c_doc) noexcept nogil:
784
+ cdef xmlNode* c_node
785
+ if c_doc is NULL:
786
+ return 0
787
+ c_node = c_doc.children
788
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(<xmlNode*>c_doc, c_node, 1)
789
+ if c_node.type == tree.XML_ELEMENT_NODE:
790
+ if _fixHtmlDictNodeNames(c_dict, c_node) < 0:
791
+ return -1
792
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
793
+ return 0
794
+
795
+ cdef int _fixHtmlDictSubtreeNames(tree.xmlDict* c_dict, xmlDoc* c_doc,
796
+ xmlNode* c_start_node) noexcept nogil:
797
+ """
798
+ Move names to the dict, iterating in document order, starting at
799
+ c_start_node. This is used in incremental parsing after each chunk.
800
+ """
801
+ cdef xmlNode* c_node
802
+ if not c_doc:
803
+ return 0
804
+ if not c_start_node:
805
+ return _fixHtmlDictNames(c_dict, c_doc)
806
+ c_node = c_start_node
807
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(<xmlNode*>c_doc, c_node, 1)
808
+ if c_node.type == tree.XML_ELEMENT_NODE:
809
+ if _fixHtmlDictNodeNames(c_dict, c_node) < 0:
810
+ return -1
811
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
812
+ return 0
813
+
814
+ cdef inline int _fixHtmlDictNodeNames(tree.xmlDict* c_dict,
815
+ xmlNode* c_node) noexcept nogil:
816
+ cdef xmlNode* c_attr
817
+ c_name = tree.xmlDictLookup(c_dict, c_node.name, -1)
818
+ if c_name is NULL:
819
+ return -1
820
+ if c_name is not c_node.name:
821
+ tree.xmlFree(<char*>c_node.name)
822
+ c_node.name = c_name
823
+ c_attr = <xmlNode*>c_node.properties
824
+ while c_attr is not NULL:
825
+ c_name = tree.xmlDictLookup(c_dict, c_attr.name, -1)
826
+ if c_name is NULL:
827
+ return -1
828
+ if c_name is not c_attr.name:
829
+ tree.xmlFree(<char*>c_attr.name)
830
+ c_attr.name = c_name
831
+ c_attr = c_attr.next
832
+ return 0
833
+
834
+
835
+ @cython.internal
836
+ cdef class _BaseParser:
837
+ cdef ElementClassLookup _class_lookup
838
+ cdef _ResolverRegistry _resolvers
839
+ cdef _ParserContext _parser_context
840
+ cdef _ParserContext _push_parser_context
841
+ cdef int _parse_options
842
+ cdef bint _for_html
843
+ cdef bint _remove_comments
844
+ cdef bint _remove_pis
845
+ cdef bint _strip_cdata
846
+ cdef bint _collect_ids
847
+ cdef bint _resolve_external_entities
848
+ cdef XMLSchema _schema
849
+ cdef bytes _filename
850
+ cdef readonly object target
851
+ cdef object _default_encoding
852
+ cdef tuple _events_to_collect # (event_types, tag)
853
+
854
+ def __init__(self, int parse_options, bint for_html, XMLSchema schema,
855
+ remove_comments, remove_pis, strip_cdata, collect_ids,
856
+ target, encoding, bint resolve_external_entities=True):
857
+ cdef tree.xmlCharEncodingHandler* enchandler
858
+ cdef int c_encoding
859
+ if not isinstance(self, (XMLParser, HTMLParser)):
860
+ raise TypeError, "This class cannot be instantiated"
861
+
862
+ if not collect_ids and tree.LIBXML_VERSION >= 21500:
863
+ parse_options |= xmlparser.XML_PARSE_SKIP_IDS
864
+
865
+ self._parse_options = parse_options
866
+ self.target = target
867
+ self._for_html = for_html
868
+ self._remove_comments = remove_comments
869
+ self._remove_pis = remove_pis
870
+ self._strip_cdata = strip_cdata
871
+ self._collect_ids = collect_ids
872
+ self._resolve_external_entities = resolve_external_entities
873
+ self._schema = schema
874
+
875
+ self._resolvers = _ResolverRegistry()
876
+
877
+ if encoding is None:
878
+ self._default_encoding = None
879
+ else:
880
+ encoding = _utf8(encoding)
881
+ enchandler = tree.xmlFindCharEncodingHandler(_cstr(encoding))
882
+ if enchandler is NULL:
883
+ raise LookupError, f"unknown encoding: '{encoding}'"
884
+ tree.xmlCharEncCloseFunc(enchandler)
885
+ self._default_encoding = encoding
886
+
887
+ cdef _setBaseURL(self, base_url):
888
+ self._filename = _encodeFilename(base_url)
889
+
890
+ cdef _collectEvents(self, event_types, tag):
891
+ if event_types is None:
892
+ event_types = ()
893
+ else:
894
+ event_types = tuple(set(event_types))
895
+ _buildParseEventFilter(event_types) # purely for validation
896
+ self._events_to_collect = (event_types, tag)
897
+
898
+ cdef _ParserContext _getParserContext(self):
899
+ cdef xmlparser.xmlParserCtxt* pctxt
900
+ if self._parser_context is None:
901
+ self._parser_context = self._createContext(self.target, None)
902
+ self._parser_context._collect_ids = self._collect_ids
903
+ if self._schema is not None:
904
+ self._parser_context._validator = \
905
+ self._schema._newSaxValidator(
906
+ self._parse_options & xmlparser.XML_PARSE_DTDATTR)
907
+ pctxt = self._newParserCtxt()
908
+ _initParserContext(self._parser_context, self._resolvers, pctxt)
909
+ self._configureSaxContext(pctxt)
910
+ return self._parser_context
911
+
912
+ cdef _ParserContext _getPushParserContext(self):
913
+ cdef xmlparser.xmlParserCtxt* pctxt
914
+ if self._push_parser_context is None:
915
+ self._push_parser_context = self._createContext(
916
+ self.target, self._events_to_collect)
917
+ self._push_parser_context._collect_ids = self._collect_ids
918
+ if self._schema is not None:
919
+ self._push_parser_context._validator = \
920
+ self._schema._newSaxValidator(
921
+ self._parse_options & xmlparser.XML_PARSE_DTDATTR)
922
+ pctxt = self._newPushParserCtxt()
923
+ _initParserContext(
924
+ self._push_parser_context, self._resolvers, pctxt)
925
+ self._configureSaxContext(pctxt)
926
+ return self._push_parser_context
927
+
928
+ cdef _ParserContext _createContext(self, target, events_to_collect):
929
+ """
930
+ This method creates and configures the lxml-level parser.
931
+ """
932
+ cdef _SaxParserContext sax_context
933
+ if target is not None:
934
+ sax_context = _TargetParserContext(self)
935
+ (<_TargetParserContext>sax_context)._setTarget(target)
936
+ elif events_to_collect:
937
+ sax_context = _SaxParserContext(self)
938
+ else:
939
+ # nothing special to configure
940
+ return _ParserContext()
941
+ if events_to_collect:
942
+ events, tag = events_to_collect
943
+ sax_context._setEventFilter(events, tag)
944
+ return sax_context
945
+
946
+ @cython.final
947
+ cdef int _configureSaxContext(self, xmlparser.xmlParserCtxt* pctxt) except -1:
948
+ if self._remove_comments:
949
+ pctxt.sax.comment = NULL
950
+ if self._remove_pis:
951
+ pctxt.sax.processingInstruction = NULL
952
+ if self._strip_cdata:
953
+ # hard switch-off for CDATA nodes => makes them plain text
954
+ pctxt.sax.cdataBlock = NULL
955
+ if not self._resolve_external_entities:
956
+ pctxt.sax.getEntity = _getInternalEntityOnly
957
+
958
+ cdef int _registerHtmlErrorHandler(self, xmlparser.xmlParserCtxt* c_ctxt) except -1:
959
+ cdef xmlparser.xmlSAXHandler* sax = c_ctxt.sax
960
+ if sax is not NULL and sax.initialized and sax.initialized != xmlparser.XML_SAX2_MAGIC:
961
+ # need to extend SAX1 context to SAX2 to get proper error reports
962
+ if <xmlparser.xmlSAXHandlerV1*>sax is &htmlparser.htmlDefaultSAXHandler:
963
+ sax = <xmlparser.xmlSAXHandler*> tree.xmlMalloc(sizeof(xmlparser.xmlSAXHandler))
964
+ if sax is NULL:
965
+ raise MemoryError()
966
+ cstring_h.memcpy(sax, &htmlparser.htmlDefaultSAXHandler,
967
+ sizeof(htmlparser.htmlDefaultSAXHandler))
968
+ c_ctxt.sax = sax
969
+ sax.initialized = xmlparser.XML_SAX2_MAGIC
970
+ # Need a cast here because older libxml2 releases do not use 'const' in the functype.
971
+ sax.serror = <xmlerror.xmlStructuredErrorFunc> _receiveParserError
972
+ sax.startElementNs = NULL
973
+ sax.endElementNs = NULL
974
+ sax._private = NULL
975
+ return 0
976
+
977
+ cdef xmlparser.xmlParserCtxt* _newParserCtxt(self) except NULL:
978
+ """
979
+ Create and initialise a libxml2-level parser context.
980
+ """
981
+ cdef xmlparser.xmlParserCtxt* c_ctxt
982
+ if self._for_html:
983
+ c_ctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5)
984
+ if c_ctxt is not NULL:
985
+ self._registerHtmlErrorHandler(c_ctxt)
986
+ else:
987
+ c_ctxt = xmlparser.xmlNewParserCtxt()
988
+ if c_ctxt is NULL:
989
+ raise MemoryError
990
+ c_ctxt.sax.startDocument = _initSaxDocument
991
+ return c_ctxt
992
+
993
+ cdef xmlparser.xmlParserCtxt* _newPushParserCtxt(self) except NULL:
994
+ cdef xmlparser.xmlParserCtxt* c_ctxt
995
+ cdef char* c_filename = _cstr(self._filename) if self._filename is not None else NULL
996
+ if self._for_html:
997
+ c_ctxt = htmlparser.htmlCreatePushParserCtxt(
998
+ NULL, NULL, NULL, 0, c_filename, tree.XML_CHAR_ENCODING_NONE)
999
+ if c_ctxt is not NULL:
1000
+ self._registerHtmlErrorHandler(c_ctxt)
1001
+ htmlparser.htmlCtxtUseOptions(c_ctxt, self._parse_options)
1002
+ else:
1003
+ c_ctxt = xmlparser.xmlCreatePushParserCtxt(
1004
+ NULL, NULL, NULL, 0, c_filename)
1005
+ if c_ctxt is not NULL:
1006
+ xmlparser.xmlCtxtUseOptions(c_ctxt, self._parse_options)
1007
+ if c_ctxt is NULL:
1008
+ raise MemoryError()
1009
+ c_ctxt.sax.startDocument = _initSaxDocument
1010
+ return c_ctxt
1011
+
1012
+ @property
1013
+ def error_log(self):
1014
+ """The error log of the last parser run.
1015
+ """
1016
+ cdef _ParserContext context
1017
+ context = self._getParserContext()
1018
+ return context._error_log.copy()
1019
+
1020
+ @property
1021
+ def resolvers(self):
1022
+ """The custom resolver registry of this parser."""
1023
+ return self._resolvers
1024
+
1025
+ @property
1026
+ def version(self):
1027
+ """The version of the underlying XML parser."""
1028
+ return "libxml2 %d.%d.%d" % LIBXML_VERSION
1029
+
1030
+ def set_element_class_lookup(self, ElementClassLookup lookup = None):
1031
+ """set_element_class_lookup(self, lookup = None)
1032
+
1033
+ Set a lookup scheme for element classes generated from this parser.
1034
+
1035
+ Reset it by passing None or nothing.
1036
+ """
1037
+ self._class_lookup = lookup
1038
+
1039
+ cdef _BaseParser _copy(self):
1040
+ "Create a new parser with the same configuration."
1041
+ cdef _BaseParser parser
1042
+ parser = self.__class__()
1043
+ parser._parse_options = self._parse_options
1044
+ parser._for_html = self._for_html
1045
+ parser._remove_comments = self._remove_comments
1046
+ parser._remove_pis = self._remove_pis
1047
+ parser._strip_cdata = self._strip_cdata
1048
+ parser._filename = self._filename
1049
+ parser._resolvers = self._resolvers
1050
+ parser.target = self.target
1051
+ parser._class_lookup = self._class_lookup
1052
+ parser._default_encoding = self._default_encoding
1053
+ parser._schema = self._schema
1054
+ parser._events_to_collect = self._events_to_collect
1055
+ return parser
1056
+
1057
+ def copy(self):
1058
+ """copy(self)
1059
+
1060
+ Create a new parser with the same configuration.
1061
+ """
1062
+ return self._copy()
1063
+
1064
+ def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
1065
+ """makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
1066
+
1067
+ Creates a new element associated with this parser.
1068
+ """
1069
+ return _makeElement(_tag, NULL, None, self, None, None,
1070
+ attrib, nsmap, _extra)
1071
+
1072
+ # internal parser methods
1073
+
1074
+ cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL:
1075
+ """Parse unicode document, share dictionary if possible.
1076
+ """
1077
+ cdef _ParserContext context
1078
+ cdef xmlDoc* result
1079
+ cdef xmlparser.xmlParserCtxt* pctxt
1080
+ cdef Py_ssize_t py_buffer_len
1081
+ cdef int buffer_len, c_kind
1082
+ cdef const_char* c_text
1083
+ cdef const_char* c_encoding = _PY_UNICODE_ENCODING
1084
+ if python.PyUnicode_IS_READY(utext):
1085
+ # PEP-393 string
1086
+ c_text = <const_char*>python.PyUnicode_DATA(utext)
1087
+ py_buffer_len = python.PyUnicode_GET_LENGTH(utext)
1088
+ c_kind = python.PyUnicode_KIND(utext)
1089
+ if c_kind == 1:
1090
+ if python.PyUnicode_MAX_CHAR_VALUE(utext) <= 127:
1091
+ c_encoding = 'UTF-8'
1092
+ else:
1093
+ c_encoding = 'ISO-8859-1'
1094
+ elif c_kind == 2:
1095
+ py_buffer_len *= 2
1096
+ if python.PY_BIG_ENDIAN:
1097
+ c_encoding = 'UTF-16BE' # actually UCS-2
1098
+ else:
1099
+ c_encoding = 'UTF-16LE' # actually UCS-2
1100
+ elif c_kind == 4:
1101
+ py_buffer_len *= 4
1102
+ if python.PY_BIG_ENDIAN:
1103
+ c_encoding = 'UTF-32BE' # actually UCS-4
1104
+ else:
1105
+ c_encoding = 'UTF-32LE' # actually UCS-4
1106
+ else:
1107
+ assert False, f"Illegal Unicode kind {c_kind}"
1108
+ else:
1109
+ # old Py_UNICODE string
1110
+ py_buffer_len = python.PyUnicode_GET_DATA_SIZE(utext)
1111
+ c_text = python.PyUnicode_AS_DATA(utext)
1112
+ assert 0 <= py_buffer_len <= limits.INT_MAX
1113
+ buffer_len = py_buffer_len
1114
+
1115
+ context = self._getParserContext()
1116
+ context.prepare()
1117
+ try:
1118
+ pctxt = context._c_ctxt
1119
+ __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
1120
+ orig_options = pctxt.options
1121
+ with nogil:
1122
+ if self._for_html:
1123
+ result = htmlparser.htmlCtxtReadMemory(
1124
+ pctxt, c_text, buffer_len, c_filename, c_encoding,
1125
+ self._parse_options)
1126
+ if result is not NULL:
1127
+ if _fixHtmlDictNames(pctxt.dict, result) < 0:
1128
+ tree.xmlFreeDoc(result)
1129
+ result = NULL
1130
+ else:
1131
+ result = xmlparser.xmlCtxtReadMemory(
1132
+ pctxt, c_text, buffer_len, c_filename, c_encoding,
1133
+ self._parse_options)
1134
+ pctxt.options = orig_options # work around libxml2 problem
1135
+
1136
+ return context._handleParseResultDoc(self, result, None)
1137
+ finally:
1138
+ context.cleanup()
1139
+
1140
+ cdef xmlDoc* _parseDoc(self, const char* c_text, int c_len, char* c_filename) except NULL:
1141
+ """Parse document, share dictionary if possible.
1142
+ """
1143
+ cdef _ParserContext context
1144
+ cdef xmlDoc* result
1145
+ cdef xmlparser.xmlParserCtxt* pctxt
1146
+ cdef char* c_encoding
1147
+ cdef tree.xmlCharEncoding enc
1148
+ context = self._getParserContext()
1149
+ context.prepare()
1150
+ try:
1151
+ pctxt = context._c_ctxt
1152
+ __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
1153
+
1154
+ if self._default_encoding is None:
1155
+ c_encoding = NULL
1156
+ # libxml2 (at least 2.9.3) does not recognise UTF-32 BOMs
1157
+ # NOTE: limit to problematic cases because it changes character offsets
1158
+ if c_len >= 4 and (c_text[0] == b'\xFF' and c_text[1] == b'\xFE' and
1159
+ c_text[2] == 0 and c_text[3] == 0):
1160
+ c_encoding = "UTF-32LE"
1161
+ c_text += 4
1162
+ c_len -= 4
1163
+ elif c_len >= 4 and (c_text[0] == 0 and c_text[1] == 0 and
1164
+ c_text[2] == b'\xFE' and c_text[3] == b'\xFF'):
1165
+ c_encoding = "UTF-32BE"
1166
+ c_text += 4
1167
+ c_len -= 4
1168
+ else:
1169
+ # no BOM => try to determine encoding
1170
+ enc = tree.xmlDetectCharEncoding(<const_xmlChar*>c_text, c_len)
1171
+ if enc == tree.XML_CHAR_ENCODING_UCS4LE:
1172
+ c_encoding = 'UTF-32LE'
1173
+ elif enc == tree.XML_CHAR_ENCODING_UCS4BE:
1174
+ c_encoding = 'UTF-32BE'
1175
+ else:
1176
+ c_encoding = _cstr(self._default_encoding)
1177
+
1178
+ orig_options = pctxt.options
1179
+ with nogil:
1180
+ if self._for_html:
1181
+ result = htmlparser.htmlCtxtReadMemory(
1182
+ pctxt, c_text, c_len, c_filename,
1183
+ c_encoding, self._parse_options)
1184
+ if result is not NULL:
1185
+ if _fixHtmlDictNames(pctxt.dict, result) < 0:
1186
+ tree.xmlFreeDoc(result)
1187
+ result = NULL
1188
+ else:
1189
+ result = xmlparser.xmlCtxtReadMemory(
1190
+ pctxt, c_text, c_len, c_filename,
1191
+ c_encoding, self._parse_options)
1192
+ pctxt.options = orig_options # work around libxml2 problem
1193
+
1194
+ return context._handleParseResultDoc(self, result, None)
1195
+ finally:
1196
+ context.cleanup()
1197
+
1198
+ cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL:
1199
+ cdef _ParserContext context
1200
+ cdef xmlDoc* result
1201
+ cdef xmlparser.xmlParserCtxt* pctxt
1202
+ cdef char* c_encoding
1203
+ result = NULL
1204
+
1205
+ context = self._getParserContext()
1206
+ context.prepare()
1207
+ try:
1208
+ pctxt = context._c_ctxt
1209
+ __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
1210
+
1211
+ if self._default_encoding is None:
1212
+ c_encoding = NULL
1213
+ else:
1214
+ c_encoding = _cstr(self._default_encoding)
1215
+
1216
+ orig_options = pctxt.options
1217
+ with nogil:
1218
+ if self._for_html:
1219
+ result = htmlparser.htmlCtxtReadFile(
1220
+ pctxt, c_filename, c_encoding, self._parse_options)
1221
+ if result is not NULL:
1222
+ if _fixHtmlDictNames(pctxt.dict, result) < 0:
1223
+ tree.xmlFreeDoc(result)
1224
+ result = NULL
1225
+ else:
1226
+ result = xmlparser.xmlCtxtReadFile(
1227
+ pctxt, c_filename, c_encoding, self._parse_options)
1228
+ pctxt.options = orig_options # work around libxml2 problem
1229
+
1230
+ return context._handleParseResultDoc(self, result, c_filename)
1231
+ finally:
1232
+ context.cleanup()
1233
+
1234
+ cdef xmlDoc* _parseDocFromFilelike(self, filelike, filename,
1235
+ encoding) except NULL:
1236
+ cdef _ParserContext context
1237
+ cdef _FileReaderContext file_context
1238
+ cdef xmlDoc* result
1239
+ cdef xmlparser.xmlParserCtxt* pctxt
1240
+ cdef char* c_filename
1241
+ if not filename:
1242
+ filename = None
1243
+
1244
+ context = self._getParserContext()
1245
+ context.prepare()
1246
+ try:
1247
+ pctxt = context._c_ctxt
1248
+ __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
1249
+ file_context = _FileReaderContext(
1250
+ filelike, context, filename,
1251
+ encoding or self._default_encoding)
1252
+ result = file_context._readDoc(pctxt, self._parse_options)
1253
+
1254
+ return context._handleParseResultDoc(
1255
+ self, result, filename)
1256
+ finally:
1257
+ context.cleanup()
1258
+
1259
+
1260
+ cdef tree.xmlEntity* _getInternalEntityOnly(void* ctxt, const_xmlChar* name) noexcept nogil:
1261
+ """
1262
+ Callback function to intercept the entity resolution when external entity loading is disabled.
1263
+ """
1264
+ cdef tree.xmlEntity* entity = xmlparser.xmlSAX2GetEntity(ctxt, name)
1265
+ if not entity:
1266
+ return NULL
1267
+ if entity.etype not in (
1268
+ tree.xmlEntityType.XML_EXTERNAL_GENERAL_PARSED_ENTITY,
1269
+ tree.xmlEntityType.XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
1270
+ tree.xmlEntityType.XML_EXTERNAL_PARAMETER_ENTITY):
1271
+ return entity
1272
+
1273
+ # Reject all external entities and fail the parsing instead. There is currently
1274
+ # no way in libxml2 to just prevent the entity resolution in this case.
1275
+ cdef xmlerror.xmlError c_error
1276
+ cdef xmlerror.xmlStructuredErrorFunc err_func
1277
+ cdef xmlparser.xmlParserInput* parser_input
1278
+ cdef void* err_context
1279
+
1280
+ c_ctxt = <xmlparser.xmlParserCtxt *> ctxt
1281
+ err_func = xmlerror.xmlStructuredError
1282
+ if err_func:
1283
+ parser_input = c_ctxt.input
1284
+ # Copied from xmlVErrParser() in libxml2: get current input from stack.
1285
+ if parser_input and parser_input.filename is NULL and c_ctxt.inputNr > 1:
1286
+ parser_input = c_ctxt.inputTab[c_ctxt.inputNr - 2]
1287
+
1288
+ c_error = xmlerror.xmlError(
1289
+ domain=xmlerror.xmlErrorDomain.XML_FROM_PARSER,
1290
+ code=xmlerror.xmlParserErrors.XML_ERR_EXT_ENTITY_STANDALONE,
1291
+ level=xmlerror.xmlErrorLevel.XML_ERR_FATAL,
1292
+ message=b"External entity resolution is disabled for security reasons "
1293
+ b"when resolving '&%s;'. Use 'XMLParser(resolve_entities=True)' "
1294
+ b"if you consider it safe to enable it.",
1295
+ file=parser_input.filename,
1296
+ node=entity,
1297
+ str1=<char*> name,
1298
+ str2=NULL,
1299
+ str3=NULL,
1300
+ line=parser_input.line if parser_input else 0,
1301
+ int1=0,
1302
+ int2=parser_input.col if parser_input else 0,
1303
+ )
1304
+ err_context = xmlerror.xmlStructuredErrorContext
1305
+ err_func(err_context, &c_error)
1306
+
1307
+ c_ctxt.wellFormed = 0
1308
+ # The entity was looked up and does not need to be freed.
1309
+ return NULL
1310
+
1311
+
1312
+ cdef void _initSaxDocument(void* ctxt) noexcept with gil:
1313
+ xmlparser.xmlSAX2StartDocument(ctxt)
1314
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
1315
+ c_doc = c_ctxt.myDoc
1316
+
1317
+ # set up document dict
1318
+ if c_doc and c_ctxt.dict and not c_doc.dict:
1319
+ # I have no idea why libxml2 disables this - we need it
1320
+ c_ctxt.dictNames = 1
1321
+ c_doc.dict = c_ctxt.dict
1322
+ xmlparser.xmlDictReference(c_ctxt.dict)
1323
+
1324
+ # set up XML ID hash table
1325
+ if c_ctxt._private:
1326
+ context = <_ParserContext>c_ctxt._private
1327
+ if context._collect_ids:
1328
+ # keep the global parser dict from filling up with XML IDs
1329
+ if c_doc and not c_doc.ids:
1330
+ # memory errors are not fatal here
1331
+ c_dict = xmlparser.xmlDictCreate()
1332
+ if c_dict:
1333
+ c_doc.ids = tree.xmlHashCreateDict(0, c_dict)
1334
+ xmlparser.xmlDictFree(c_dict)
1335
+ else:
1336
+ c_doc.ids = tree.xmlHashCreate(0)
1337
+ else:
1338
+ c_ctxt.loadsubset |= xmlparser.XML_SKIP_IDS
1339
+ if c_doc and c_doc.ids and not tree.xmlHashSize(c_doc.ids):
1340
+ # already initialised but empty => clear
1341
+ tree.xmlHashFree(c_doc.ids, NULL)
1342
+ c_doc.ids = NULL
1343
+
1344
+
1345
+ ############################################################
1346
+ ## ET feed parser
1347
+ ############################################################
1348
+
1349
+ cdef class _FeedParser(_BaseParser):
1350
+ cdef bint _feed_parser_running
1351
+
1352
+ @property
1353
+ def feed_error_log(self):
1354
+ """The error log of the last (or current) run of the feed parser.
1355
+
1356
+ Note that this is local to the feed parser and thus is
1357
+ different from what the ``error_log`` property returns.
1358
+ """
1359
+ return self._getPushParserContext()._error_log.copy()
1360
+
1361
+ cpdef feed(self, data):
1362
+ """feed(self, data)
1363
+
1364
+ Feeds data to the parser. The argument should be an 8-bit string
1365
+ buffer containing encoded data, although Unicode is supported as long
1366
+ as both string types are not mixed.
1367
+
1368
+ This is the main entry point to the consumer interface of a
1369
+ parser. The parser will parse as much of the XML stream as it
1370
+ can on each call. To finish parsing or to reset the parser,
1371
+ call the ``close()`` method. Both methods may raise
1372
+ ParseError if errors occur in the input data. If an error is
1373
+ raised, there is no longer a need to call ``close()``.
1374
+
1375
+ The feed parser interface is independent of the normal parser
1376
+ usage. You can use the same parser as a feed parser and in
1377
+ the ``parse()`` function concurrently.
1378
+ """
1379
+ cdef _ParserContext context
1380
+ cdef bytes bstring
1381
+ cdef xmlparser.xmlParserCtxt* pctxt
1382
+ cdef Py_ssize_t py_buffer_len, ustart
1383
+ cdef const_char* char_data
1384
+ cdef const_char* c_encoding
1385
+ cdef int buffer_len
1386
+ cdef int error
1387
+ cdef bint recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
1388
+
1389
+ if isinstance(data, bytes):
1390
+ if self._default_encoding is None:
1391
+ c_encoding = NULL
1392
+ else:
1393
+ c_encoding = self._default_encoding
1394
+ char_data = _cstr(data)
1395
+ py_buffer_len = python.PyBytes_GET_SIZE(data)
1396
+ ustart = 0
1397
+ elif isinstance(data, unicode):
1398
+ c_encoding = b"UTF-8"
1399
+ char_data = NULL
1400
+ py_buffer_len = len(<unicode> data)
1401
+ ustart = 0
1402
+ else:
1403
+ raise TypeError, "Parsing requires string data"
1404
+
1405
+ context = self._getPushParserContext()
1406
+ pctxt = context._c_ctxt
1407
+ error = 0
1408
+ if not self._feed_parser_running:
1409
+ context.prepare(set_document_loader=False)
1410
+ self._feed_parser_running = 1
1411
+ c_filename = (_cstr(self._filename)
1412
+ if self._filename is not None else NULL)
1413
+
1414
+ # We have to give *mlCtxtResetPush() enough input to figure
1415
+ # out the character encoding (at least four bytes),
1416
+ # however if we give it all we got, we'll have nothing for
1417
+ # *mlParseChunk() and things go wrong.
1418
+ buffer_len = 0
1419
+ if char_data is not NULL:
1420
+ buffer_len = 4 if py_buffer_len > 4 else <int>py_buffer_len
1421
+ orig_loader = _register_document_loader()
1422
+ if self._for_html:
1423
+ error = _htmlCtxtResetPush(
1424
+ pctxt, char_data, buffer_len, c_filename, c_encoding,
1425
+ self._parse_options)
1426
+ else:
1427
+ xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options)
1428
+ error = xmlparser.xmlCtxtResetPush(
1429
+ pctxt, char_data, buffer_len, c_filename, c_encoding)
1430
+ _reset_document_loader(orig_loader)
1431
+ py_buffer_len -= buffer_len
1432
+ char_data += buffer_len
1433
+ if error:
1434
+ raise MemoryError()
1435
+ __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
1436
+
1437
+ #print pctxt.charset, 'NONE' if c_encoding is NULL else c_encoding
1438
+
1439
+ fixup_error = 0
1440
+ while py_buffer_len > 0 and (error == 0 or recover):
1441
+ if char_data is NULL:
1442
+ # Unicode parsing by converting chunks to UTF-8
1443
+ buffer_len = 2**19 # len(bytes) <= 4 * (2**19) == 2 MiB
1444
+ bstring = (<unicode> data)[ustart : ustart+buffer_len].encode('UTF-8')
1445
+ ustart += buffer_len
1446
+ py_buffer_len -= buffer_len # may end up < 0
1447
+ error, fixup_error = _parse_data_chunk(pctxt, <const char*> bstring, <int> len(bstring))
1448
+ else:
1449
+ # Direct byte string parsing.
1450
+ buffer_len = <int>py_buffer_len if py_buffer_len <= limits.INT_MAX else limits.INT_MAX
1451
+ error, fixup_error = _parse_data_chunk(pctxt, char_data, buffer_len)
1452
+ py_buffer_len -= buffer_len
1453
+ char_data += buffer_len
1454
+
1455
+ if fixup_error:
1456
+ context.store_exception(MemoryError())
1457
+
1458
+ if context._has_raised():
1459
+ # propagate Python exceptions immediately
1460
+ recover = 0
1461
+ error = 1
1462
+ break
1463
+
1464
+ if error and not pctxt.replaceEntities and not pctxt.validate:
1465
+ # in this mode, we ignore errors about undefined entities
1466
+ for entry in context._error_log.filter_from_errors():
1467
+ if entry.type != ErrorTypes.WAR_UNDECLARED_ENTITY and \
1468
+ entry.type != ErrorTypes.ERR_UNDECLARED_ENTITY:
1469
+ break
1470
+ else:
1471
+ error = 0
1472
+
1473
+ if not pctxt.wellFormed and xmlparser.xmlCtxtIsStopped(pctxt) and context._has_raised():
1474
+ # propagate Python exceptions immediately
1475
+ recover = 0
1476
+ error = 1
1477
+
1478
+ if fixup_error or not recover and (error or not pctxt.wellFormed):
1479
+ self._feed_parser_running = 0
1480
+ try:
1481
+ context._handleParseResult(self, pctxt.myDoc, None)
1482
+ finally:
1483
+ context.cleanup()
1484
+
1485
+ cpdef close(self):
1486
+ """close(self)
1487
+
1488
+ Terminates feeding data to this parser. This tells the parser to
1489
+ process any remaining data in the feed buffer, and then returns the
1490
+ root Element of the tree that was parsed.
1491
+
1492
+ This method must be called after passing the last chunk of data into
1493
+ the ``feed()`` method. It should only be called when using the feed
1494
+ parser interface, all other usage is undefined.
1495
+ """
1496
+ if not self._feed_parser_running:
1497
+ raise XMLSyntaxError("no element found",
1498
+ xmlerror.XML_ERR_INTERNAL_ERROR, 0, 0,
1499
+ self._filename)
1500
+
1501
+ context = self._getPushParserContext()
1502
+ pctxt = context._c_ctxt
1503
+
1504
+ self._feed_parser_running = 0
1505
+ if self._for_html:
1506
+ htmlparser.htmlParseChunk(pctxt, NULL, 0, 1)
1507
+ else:
1508
+ xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
1509
+
1510
+ if (pctxt.recovery and not xmlparser.xmlCtxtIsStopped(pctxt) and
1511
+ isinstance(context, _SaxParserContext)):
1512
+ # apply any left-over 'end' events
1513
+ (<_SaxParserContext>context).flushEvents()
1514
+
1515
+ try:
1516
+ result = context._handleParseResult(self, pctxt.myDoc, None)
1517
+ finally:
1518
+ context.cleanup()
1519
+
1520
+ if isinstance(result, _Document):
1521
+ return (<_Document>result).getroot()
1522
+ else:
1523
+ return result
1524
+
1525
+
1526
+ cdef (int, int) _parse_data_chunk(xmlparser.xmlParserCtxt* c_ctxt,
1527
+ const char* char_data, int buffer_len):
1528
+ fixup_error = 0
1529
+ with nogil:
1530
+ if c_ctxt.html:
1531
+ c_node = c_ctxt.node # last node where the parser stopped
1532
+ orig_loader = _register_document_loader()
1533
+ error = htmlparser.htmlParseChunk(c_ctxt, char_data, buffer_len, 0)
1534
+ _reset_document_loader(orig_loader)
1535
+ # and now for the fun part: move node names to the dict
1536
+ if c_ctxt.myDoc:
1537
+ fixup_error = _fixHtmlDictSubtreeNames(
1538
+ c_ctxt.dict, c_ctxt.myDoc, c_node)
1539
+ if c_ctxt.myDoc.dict and c_ctxt.myDoc.dict is not c_ctxt.dict:
1540
+ xmlparser.xmlDictFree(c_ctxt.myDoc.dict)
1541
+ c_ctxt.myDoc.dict = c_ctxt.dict
1542
+ xmlparser.xmlDictReference(c_ctxt.dict)
1543
+ else:
1544
+ orig_loader = _register_document_loader()
1545
+ error = xmlparser.xmlParseChunk(c_ctxt, char_data, buffer_len, 0)
1546
+ _reset_document_loader(orig_loader)
1547
+ return (error, fixup_error)
1548
+
1549
+
1550
+ cdef int _htmlCtxtResetPush(xmlparser.xmlParserCtxt* c_ctxt,
1551
+ const_char* c_data, int buffer_len,
1552
+ const_char* c_filename, const_char* c_encoding,
1553
+ int parse_options) except -1:
1554
+ cdef xmlparser.xmlParserInput* c_input_stream
1555
+ # libxml2 lacks an HTML push parser setup function
1556
+ error = xmlparser.xmlCtxtResetPush(
1557
+ c_ctxt, c_data, buffer_len, c_filename, c_encoding)
1558
+ if error:
1559
+ return error
1560
+
1561
+ # fix libxml2 setup for HTML
1562
+ if tree.LIBXML_VERSION < 21400:
1563
+ c_ctxt.progressive = 1 # TODO: remove
1564
+ c_ctxt.html = 1
1565
+ htmlparser.htmlCtxtUseOptions(c_ctxt, parse_options)
1566
+
1567
+ return 0
1568
+
1569
+
1570
+ ############################################################
1571
+ ## XML parser
1572
+ ############################################################
1573
+
1574
+ cdef int _XML_DEFAULT_PARSE_OPTIONS
1575
+ _XML_DEFAULT_PARSE_OPTIONS = (
1576
+ xmlparser.XML_PARSE_NOENT |
1577
+ xmlparser.XML_PARSE_NOCDATA |
1578
+ xmlparser.XML_PARSE_NONET |
1579
+ xmlparser.XML_PARSE_COMPACT |
1580
+ xmlparser.XML_PARSE_BIG_LINES
1581
+ )
1582
+
1583
+ cdef class XMLParser(_FeedParser):
1584
+ """XMLParser(self, encoding=None, attribute_defaults=False, dtd_validation=False, \
1585
+ load_dtd=False, no_network=True, decompress=False, ns_clean=False, \
1586
+ recover=False, schema: XMLSchema =None, huge_tree=False, \
1587
+ remove_blank_text=False, resolve_entities=True, \
1588
+ remove_comments=False, remove_pis=False, strip_cdata=True, \
1589
+ collect_ids=True, target=None, compact=True)
1590
+
1591
+ The XML parser.
1592
+
1593
+ Parsers can be supplied as additional argument to various parse
1594
+ functions of the lxml API. A default parser is always available
1595
+ and can be replaced by a call to the global function
1596
+ 'set_default_parser'. New parsers can be created at any time
1597
+ without a major run-time overhead.
1598
+
1599
+ The keyword arguments in the constructor are mainly based on the
1600
+ libxml2 parser configuration. A DTD will also be loaded if DTD
1601
+ validation or attribute default values are requested (unless you
1602
+ additionally provide an XMLSchema from which the default
1603
+ attributes can be read).
1604
+
1605
+ Available boolean keyword arguments:
1606
+
1607
+ - attribute_defaults - inject default attributes from DTD or XMLSchema
1608
+ - dtd_validation - validate against a DTD referenced by the document
1609
+ - load_dtd - use DTD for parsing
1610
+ - no_network - prevent network access for related files (default: True)
1611
+ - decompress - automatically decompress gzip input
1612
+ (default: False, changed in lxml 6.0, disabling only affects libxml2 2.15+)
1613
+ - ns_clean - clean up redundant namespace declarations
1614
+ - recover - try hard to parse through broken XML
1615
+ - remove_blank_text - discard blank text nodes that appear ignorable
1616
+ - remove_comments - discard comments
1617
+ - remove_pis - discard processing instructions
1618
+ - strip_cdata - replace CDATA sections by normal text content (default: True)
1619
+ - compact - save memory for short text content (default: True)
1620
+ - collect_ids - use a hash table of XML IDs for fast access
1621
+ (default: True, always True with DTD validation)
1622
+ - huge_tree - disable security restrictions and support very deep trees
1623
+ and very long text content
1624
+
1625
+ Other keyword arguments:
1626
+
1627
+ - resolve_entities - replace entities by their text value: False for keeping the
1628
+ entity references, True for resolving them, and 'internal' for resolving
1629
+ internal definitions only (no external file/URL access).
1630
+ The default used to be True and was changed to 'internal' in lxml 5.0.
1631
+ - encoding - override the document encoding (note: libiconv encoding name)
1632
+ - target - a parser target object that will receive the parse events
1633
+ - schema - an XMLSchema to validate against
1634
+
1635
+ Note that you should avoid sharing parsers between threads. While this is
1636
+ not harmful, it is more efficient to use separate parsers. This does not
1637
+ apply to the default parser.
1638
+ """
1639
+ def __init__(self, *, encoding=None, attribute_defaults=False,
1640
+ dtd_validation=False, load_dtd=False, no_network=True, decompress=False,
1641
+ ns_clean=False, recover=False, XMLSchema schema=None,
1642
+ huge_tree=False, remove_blank_text=False, resolve_entities='internal',
1643
+ remove_comments=False, remove_pis=False, strip_cdata=True,
1644
+ collect_ids=True, target=None, compact=True):
1645
+ cdef int parse_options
1646
+ cdef bint resolve_external = True
1647
+ parse_options = _XML_DEFAULT_PARSE_OPTIONS
1648
+ if load_dtd:
1649
+ parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD
1650
+ if dtd_validation:
1651
+ parse_options = parse_options | xmlparser.XML_PARSE_DTDVALID | \
1652
+ xmlparser.XML_PARSE_DTDLOAD
1653
+ if attribute_defaults:
1654
+ parse_options = parse_options | xmlparser.XML_PARSE_DTDATTR
1655
+ if schema is None:
1656
+ parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD
1657
+ if ns_clean:
1658
+ parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN
1659
+ if recover:
1660
+ parse_options = parse_options | xmlparser.XML_PARSE_RECOVER
1661
+ if remove_blank_text:
1662
+ parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS
1663
+ if huge_tree:
1664
+ parse_options = parse_options | xmlparser.XML_PARSE_HUGE
1665
+ if not no_network:
1666
+ parse_options = parse_options ^ xmlparser.XML_PARSE_NONET
1667
+ if not compact:
1668
+ parse_options = parse_options ^ xmlparser.XML_PARSE_COMPACT
1669
+ if not resolve_entities:
1670
+ parse_options = parse_options ^ xmlparser.XML_PARSE_NOENT
1671
+ elif resolve_entities == 'internal':
1672
+ resolve_external = False
1673
+ if not strip_cdata:
1674
+ parse_options = parse_options ^ xmlparser.XML_PARSE_NOCDATA
1675
+ if decompress:
1676
+ parse_options |= xmlparser.XML_PARSE_UNZIP
1677
+
1678
+ _BaseParser.__init__(self, parse_options, False, schema,
1679
+ remove_comments, remove_pis, strip_cdata,
1680
+ collect_ids, target, encoding, resolve_external)
1681
+
1682
+ # Allow subscripting XMLParser in type annotions (PEP 560)
1683
+ def __class_getitem__(cls, item):
1684
+ return _GenericAlias(cls, item)
1685
+
1686
+
1687
+ cdef class XMLPullParser(XMLParser):
1688
+ """XMLPullParser(self, events=None, *, tag=None, **kwargs)
1689
+
1690
+ XML parser that collects parse events in an iterator.
1691
+
1692
+ The collected events are the same as for iterparse(), but the
1693
+ parser itself is non-blocking in the sense that it receives
1694
+ data chunks incrementally through its .feed() method, instead
1695
+ of reading them directly from a file(-like) object all by itself.
1696
+
1697
+ By default, it collects Element end events. To change that,
1698
+ pass any subset of the available events into the ``events``
1699
+ argument: ``'start'``, ``'end'``, ``'start-ns'``,
1700
+ ``'end-ns'``, ``'comment'``, ``'pi'``.
1701
+
1702
+ To support loading external dependencies relative to the input
1703
+ source, you can pass the ``base_url``.
1704
+ """
1705
+ def __init__(self, events=None, *, tag=None, base_url=None, **kwargs):
1706
+ XMLParser.__init__(self, **kwargs)
1707
+ if events is None:
1708
+ events = ('end',)
1709
+ self._setBaseURL(base_url)
1710
+ self._collectEvents(events, tag)
1711
+
1712
+ def read_events(self):
1713
+ return (<_SaxParserContext?>self._getPushParserContext()).events_iterator
1714
+
1715
+
1716
+ cdef class ETCompatXMLParser(XMLParser):
1717
+ """ETCompatXMLParser(self, encoding=None, attribute_defaults=False, \
1718
+ dtd_validation=False, load_dtd=False, no_network=True, decompress=False, \
1719
+ ns_clean=False, recover=False, schema=None, \
1720
+ huge_tree=False, remove_blank_text=False, resolve_entities=True, \
1721
+ remove_comments=True, remove_pis=True, strip_cdata=True, \
1722
+ target=None, compact=True)
1723
+
1724
+ An XML parser with an ElementTree compatible default setup.
1725
+
1726
+ See the XMLParser class for details.
1727
+
1728
+ This parser has ``remove_comments`` and ``remove_pis`` enabled by default
1729
+ and thus ignores comments and processing instructions.
1730
+ """
1731
+ def __init__(self, *, encoding=None, attribute_defaults=False,
1732
+ dtd_validation=False, load_dtd=False, no_network=True, decompress=False,
1733
+ ns_clean=False, recover=False, schema=None,
1734
+ huge_tree=False, remove_blank_text=False, resolve_entities=True,
1735
+ remove_comments=True, remove_pis=True, strip_cdata=True,
1736
+ target=None, compact=True):
1737
+ XMLParser.__init__(self,
1738
+ attribute_defaults=attribute_defaults,
1739
+ dtd_validation=dtd_validation,
1740
+ load_dtd=load_dtd,
1741
+ no_network=no_network,
1742
+ decompress=decompress,
1743
+ ns_clean=ns_clean,
1744
+ recover=recover,
1745
+ remove_blank_text=remove_blank_text,
1746
+ huge_tree=huge_tree,
1747
+ compact=compact,
1748
+ resolve_entities=resolve_entities,
1749
+ remove_comments=remove_comments,
1750
+ remove_pis=remove_pis,
1751
+ strip_cdata=strip_cdata,
1752
+ target=target,
1753
+ encoding=encoding,
1754
+ schema=schema,
1755
+ )
1756
+
1757
+ # ET 1.2 compatible name
1758
+ XMLTreeBuilder = ETCompatXMLParser
1759
+
1760
+
1761
+ cdef XMLParser __DEFAULT_XML_PARSER
1762
+ __DEFAULT_XML_PARSER = XMLParser()
1763
+
1764
+ __GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER)
1765
+
1766
+ def set_default_parser(_BaseParser parser=None):
1767
+ """set_default_parser(parser=None)
1768
+
1769
+ Set a default parser for the current thread. This parser is used
1770
+ globally whenever no parser is supplied to the various parse functions of
1771
+ the lxml API. If this function is called without a parser (or if it is
1772
+ None), the default parser is reset to the original configuration.
1773
+
1774
+ Note that the pre-installed default parser is not thread-safe. Avoid the
1775
+ default parser in multi-threaded environments. You can create a separate
1776
+ parser for each thread explicitly or use a parser pool.
1777
+ """
1778
+ if parser is None:
1779
+ parser = __DEFAULT_XML_PARSER
1780
+ __GLOBAL_PARSER_CONTEXT.setDefaultParser(parser)
1781
+
1782
+ def get_default_parser():
1783
+ "get_default_parser()"
1784
+ return __GLOBAL_PARSER_CONTEXT.getDefaultParser()
1785
+
1786
+ ############################################################
1787
+ ## HTML parser
1788
+ ############################################################
1789
+
1790
+ cdef int _HTML_DEFAULT_PARSE_OPTIONS
1791
+ _HTML_DEFAULT_PARSE_OPTIONS = (
1792
+ htmlparser.HTML_PARSE_RECOVER |
1793
+ htmlparser.HTML_PARSE_NONET |
1794
+ htmlparser.HTML_PARSE_COMPACT
1795
+ )
1796
+
1797
+ cdef object _UNUSED = object()
1798
+
1799
+ cdef class HTMLParser(_FeedParser):
1800
+ """HTMLParser(self, encoding=None, remove_blank_text=False, \
1801
+ remove_comments=False, remove_pis=False, \
1802
+ no_network=True, decompress=False, target=None, schema: XMLSchema =None, \
1803
+ recover=True, compact=True, collect_ids=True, huge_tree=False)
1804
+
1805
+ The HTML parser.
1806
+
1807
+ This parser allows reading HTML into a normal XML tree. By
1808
+ default, it can read broken (non well-formed) HTML, depending on
1809
+ the capabilities of libxml2. Use the 'recover' option to switch
1810
+ this off.
1811
+
1812
+ Available boolean keyword arguments:
1813
+
1814
+ - recover - try hard to parse through broken HTML (default: True)
1815
+ - no_network - prevent network access for related files (default: True)
1816
+ - decompress - automatically decompress gzip input
1817
+ (default: False, changed in lxml 6.0, disabling only affects libxml2 2.15+)
1818
+ - remove_blank_text - discard empty text nodes that are ignorable (i.e. not actual text content)
1819
+ - remove_comments - discard comments
1820
+ - remove_pis - discard processing instructions
1821
+ - compact - save memory for short text content (default: True)
1822
+ - default_doctype - add a default doctype even if it is not found in the HTML (default: True)
1823
+ - collect_ids - use a hash table of XML IDs for fast access (default: True)
1824
+ - huge_tree - disable security restrictions and support very deep trees
1825
+ and very long text content
1826
+
1827
+ Other keyword arguments:
1828
+
1829
+ - encoding - override the document encoding (note: libiconv encoding name)
1830
+ - target - a parser target object that will receive the parse events
1831
+ - schema - an XMLSchema to validate against
1832
+
1833
+ Note that you should avoid sharing parsers between threads for performance
1834
+ reasons.
1835
+ """
1836
+ def __init__(self, *, encoding=None, remove_blank_text=False,
1837
+ remove_comments=False, remove_pis=False, strip_cdata=_UNUSED,
1838
+ no_network=True, decompress=False, target=None, XMLSchema schema=None,
1839
+ recover=True, compact=True, default_doctype=True,
1840
+ collect_ids=True, huge_tree=False):
1841
+ cdef int parse_options
1842
+ parse_options = _HTML_DEFAULT_PARSE_OPTIONS
1843
+ if remove_blank_text:
1844
+ parse_options = parse_options | htmlparser.HTML_PARSE_NOBLANKS
1845
+ if not recover:
1846
+ parse_options = parse_options ^ htmlparser.HTML_PARSE_RECOVER
1847
+ if not no_network:
1848
+ parse_options = parse_options ^ htmlparser.HTML_PARSE_NONET
1849
+ if not compact:
1850
+ parse_options = parse_options ^ htmlparser.HTML_PARSE_COMPACT
1851
+ if not default_doctype:
1852
+ parse_options = parse_options ^ htmlparser.HTML_PARSE_NODEFDTD
1853
+ if huge_tree:
1854
+ parse_options = parse_options | xmlparser.XML_PARSE_HUGE
1855
+ if decompress:
1856
+ parse_options |= xmlparser.XML_PARSE_UNZIP
1857
+
1858
+ if strip_cdata is not _UNUSED:
1859
+ import warnings
1860
+ warnings.warn(
1861
+ "The 'strip_cdata' option of HTMLParser() has never done anything and will eventually be removed.",
1862
+ DeprecationWarning)
1863
+ _BaseParser.__init__(self, parse_options, True, schema,
1864
+ remove_comments, remove_pis, strip_cdata,
1865
+ collect_ids, target, encoding)
1866
+
1867
+ # Allow subscripting HTMLParser in type annotions (PEP 560)
1868
+ def __class_getitem__(cls, item):
1869
+ return _GenericAlias(cls, item)
1870
+
1871
+
1872
+ cdef HTMLParser __DEFAULT_HTML_PARSER
1873
+ __DEFAULT_HTML_PARSER = HTMLParser()
1874
+
1875
+
1876
+ cdef class HTMLPullParser(HTMLParser):
1877
+ """HTMLPullParser(self, events=None, *, tag=None, base_url=None, **kwargs)
1878
+
1879
+ HTML parser that collects parse events in an iterator.
1880
+
1881
+ The collected events are the same as for iterparse(), but the
1882
+ parser itself is non-blocking in the sense that it receives
1883
+ data chunks incrementally through its .feed() method, instead
1884
+ of reading them directly from a file(-like) object all by itself.
1885
+
1886
+ By default, it collects Element end events. To change that,
1887
+ pass any subset of the available events into the ``events``
1888
+ argument: ``'start'``, ``'end'``, ``'start-ns'``,
1889
+ ``'end-ns'``, ``'comment'``, ``'pi'``.
1890
+
1891
+ To support loading external dependencies relative to the input
1892
+ source, you can pass the ``base_url``.
1893
+ """
1894
+ def __init__(self, events=None, *, tag=None, base_url=None, **kwargs):
1895
+ HTMLParser.__init__(self, **kwargs)
1896
+ if events is None:
1897
+ events = ('end',)
1898
+ self._setBaseURL(base_url)
1899
+ self._collectEvents(events, tag)
1900
+
1901
+ def read_events(self):
1902
+ return (<_SaxParserContext?>self._getPushParserContext()).events_iterator
1903
+
1904
+
1905
+ ############################################################
1906
+ ## helper functions for document creation
1907
+ ############################################################
1908
+
1909
+ cdef xmlDoc* _parseDoc(text, filename, _BaseParser parser) except NULL:
1910
+ cdef char* c_filename
1911
+ if parser is None:
1912
+ parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
1913
+ if not filename:
1914
+ c_filename = NULL
1915
+ else:
1916
+ filename_utf = _encodeFilenameUTF8(filename)
1917
+ c_filename = _cstr(filename_utf)
1918
+ if isinstance(text, bytes):
1919
+ return _parseDoc_bytes(<bytes> text, filename, c_filename, parser)
1920
+ elif isinstance(text, unicode):
1921
+ return _parseDoc_unicode(<unicode> text, filename, c_filename, parser)
1922
+ else:
1923
+ return _parseDoc_charbuffer(text, filename, c_filename, parser)
1924
+
1925
+
1926
+ cdef xmlDoc* _parseDoc_unicode(unicode text, filename, char* c_filename, _BaseParser parser) except NULL:
1927
+ cdef Py_ssize_t c_len
1928
+ if python.PyUnicode_IS_READY(text):
1929
+ # PEP-393 Unicode string
1930
+ c_len = python.PyUnicode_GET_LENGTH(text) * python.PyUnicode_KIND(text)
1931
+ else:
1932
+ # old Py_UNICODE string
1933
+ c_len = python.PyUnicode_GET_DATA_SIZE(text)
1934
+ if c_len > limits.INT_MAX:
1935
+ return parser._parseDocFromFilelike(
1936
+ StringIO(text), filename, None)
1937
+ return parser._parseUnicodeDoc(text, c_filename)
1938
+
1939
+
1940
+ cdef xmlDoc* _parseDoc_bytes(bytes text, filename, char* c_filename, _BaseParser parser) except NULL:
1941
+ cdef Py_ssize_t c_len = len(text)
1942
+ if c_len > limits.INT_MAX:
1943
+ return parser._parseDocFromFilelike(BytesIO(text), filename, None)
1944
+ return parser._parseDoc(text, c_len, c_filename)
1945
+
1946
+
1947
+ cdef xmlDoc* _parseDoc_charbuffer(text, filename, char* c_filename, _BaseParser parser) except NULL:
1948
+ cdef const unsigned char[::1] data = memoryview(text).cast('B') # cast to 'unsigned char' buffer
1949
+ cdef Py_ssize_t c_len = len(data)
1950
+ if c_len > limits.INT_MAX:
1951
+ return parser._parseDocFromFilelike(BytesIO(text), filename, None)
1952
+ return parser._parseDoc(<const char*>&data[0], c_len, c_filename)
1953
+
1954
+
1955
+ cdef xmlDoc* _parseDocFromFile(filename8, _BaseParser parser) except NULL:
1956
+ if parser is None:
1957
+ parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
1958
+ return (<_BaseParser>parser)._parseDocFromFile(_cstr(filename8))
1959
+
1960
+
1961
+ cdef xmlDoc* _parseDocFromFilelike(source, filename,
1962
+ _BaseParser parser) except NULL:
1963
+ if parser is None:
1964
+ parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
1965
+ return (<_BaseParser>parser)._parseDocFromFilelike(source, filename, None)
1966
+
1967
+
1968
+ cdef xmlDoc* _newXMLDoc() except NULL:
1969
+ cdef xmlDoc* result
1970
+ result = tree.xmlNewDoc(NULL)
1971
+ if result is NULL:
1972
+ raise MemoryError()
1973
+ if result.encoding is NULL:
1974
+ result.encoding = tree.xmlStrdup(<unsigned char*>"UTF-8")
1975
+ __GLOBAL_PARSER_CONTEXT.initDocDict(result)
1976
+ return result
1977
+
1978
+ cdef xmlDoc* _newHTMLDoc() except NULL:
1979
+ cdef xmlDoc* result
1980
+ result = tree.htmlNewDoc(NULL, NULL)
1981
+ if result is NULL:
1982
+ raise MemoryError()
1983
+ __GLOBAL_PARSER_CONTEXT.initDocDict(result)
1984
+ return result
1985
+
1986
+ cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive) except NULL:
1987
+ cdef xmlDoc* result
1988
+ if recursive:
1989
+ with nogil:
1990
+ result = tree.xmlCopyDoc(c_doc, recursive)
1991
+ else:
1992
+ result = tree.xmlCopyDoc(c_doc, 0)
1993
+ if result is NULL:
1994
+ raise MemoryError()
1995
+ __GLOBAL_PARSER_CONTEXT.initDocDict(result)
1996
+ return result
1997
+
1998
+ cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root) except NULL:
1999
+ "Recursively copy the document and make c_new_root the new root node."
2000
+ cdef xmlDoc* result
2001
+ cdef xmlNode* c_node
2002
+ result = tree.xmlCopyDoc(c_doc, 0) # non recursive
2003
+ __GLOBAL_PARSER_CONTEXT.initDocDict(result)
2004
+ with nogil:
2005
+ c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive
2006
+ if c_node is NULL:
2007
+ raise MemoryError()
2008
+ tree.xmlDocSetRootElement(result, c_node)
2009
+ _copyTail(c_new_root.next, c_node)
2010
+ return result
2011
+
2012
+ cdef xmlNode* _copyNodeToDoc(xmlNode* c_node, xmlDoc* c_doc) except NULL:
2013
+ "Recursively copy the element into the document. c_doc is not modified."
2014
+ cdef xmlNode* c_root
2015
+ c_root = tree.xmlDocCopyNode(c_node, c_doc, 1) # recursive
2016
+ if c_root is NULL:
2017
+ raise MemoryError()
2018
+ _copyTail(c_node.next, c_root)
2019
+ return c_root
2020
+
2021
+
2022
+ ############################################################
2023
+ ## API level helper functions for _Document creation
2024
+ ############################################################
2025
+
2026
+ cdef _Document _parseDocument(source, _BaseParser parser, base_url):
2027
+ cdef _Document doc
2028
+ source = _getFSPathOrObject(source)
2029
+ if _isString(source):
2030
+ # parse the file directly from the filesystem
2031
+ doc = _parseDocumentFromURL(_encodeFilename(source), parser)
2032
+ # fix base URL if requested
2033
+ if base_url is not None:
2034
+ base_url = _encodeFilenameUTF8(base_url)
2035
+ if doc._c_doc.URL is not NULL:
2036
+ tree.xmlFree(<char*>doc._c_doc.URL)
2037
+ doc._c_doc.URL = tree.xmlStrdup(_xcstr(base_url))
2038
+ return doc
2039
+
2040
+ if base_url is not None:
2041
+ url = base_url
2042
+ else:
2043
+ url = _getFilenameForFile(source)
2044
+
2045
+ if hasattr(source, 'getvalue') and hasattr(source, 'tell'):
2046
+ # StringIO - reading from start?
2047
+ if source.tell() == 0:
2048
+ return _parseMemoryDocument(source.getvalue(), url, parser)
2049
+
2050
+ # Support for file-like objects (urlgrabber.urlopen, ...)
2051
+ if hasattr(source, 'read'):
2052
+ return _parseFilelikeDocument(source, url, parser)
2053
+
2054
+ raise TypeError, f"cannot parse from '{python._fqtypename(source).decode('UTF-8')}'"
2055
+
2056
+ cdef _Document _parseDocumentFromURL(url, _BaseParser parser):
2057
+ c_doc = _parseDocFromFile(url, parser)
2058
+ return _documentFactory(c_doc, parser)
2059
+
2060
+ cdef _Document _parseMemoryDocument(text, url, _BaseParser parser):
2061
+ if isinstance(text, unicode):
2062
+ if _hasEncodingDeclaration(text):
2063
+ raise ValueError(
2064
+ "Unicode strings with encoding declaration are not supported. "
2065
+ "Please use bytes input or XML fragments without declaration.")
2066
+ c_doc = _parseDoc(text, url, parser)
2067
+ return _documentFactory(c_doc, parser)
2068
+
2069
+ cdef _Document _parseFilelikeDocument(source, url, _BaseParser parser):
2070
+ c_doc = _parseDocFromFilelike(source, url, parser)
2071
+ return _documentFactory(c_doc, parser)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/parsertarget.pxi ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Parser target context (ET target interface)
2
+
3
+ cdef object inspect_getargspec
4
+ try:
5
+ from inspect import getfullargspec as inspect_getargspec
6
+ except ImportError:
7
+ from inspect import getargspec as inspect_getargspec
8
+
9
+
10
+ class _TargetParserResult(Exception):
11
+ # Admittedly, this is somewhat ugly, but it's the easiest way
12
+ # to push the Python level parser result through the parser
13
+ # machinery towards the API level functions
14
+ def __init__(self, result):
15
+ self.result = result
16
+
17
+
18
+ @cython.final
19
+ @cython.internal
20
+ cdef class _PythonSaxParserTarget(_SaxParserTarget):
21
+ cdef object _target_start
22
+ cdef object _target_end
23
+ cdef object _target_data
24
+ cdef object _target_start_ns
25
+ cdef object _target_end_ns
26
+ cdef object _target_doctype
27
+ cdef object _target_pi
28
+ cdef object _target_comment
29
+ cdef bint _start_takes_nsmap
30
+
31
+ def __cinit__(self, target):
32
+ cdef int event_filter
33
+ event_filter = 0
34
+ self._start_takes_nsmap = 0
35
+ try:
36
+ self._target_start = target.start
37
+ if self._target_start is not None:
38
+ event_filter |= SAX_EVENT_START
39
+ except AttributeError:
40
+ pass
41
+ else:
42
+ try:
43
+ arguments = inspect_getargspec(self._target_start)
44
+ if len(arguments[0]) > 3 or arguments[1] is not None:
45
+ self._start_takes_nsmap = 1
46
+ except TypeError:
47
+ pass
48
+ try:
49
+ self._target_end = target.end
50
+ if self._target_end is not None:
51
+ event_filter |= SAX_EVENT_END
52
+ except AttributeError:
53
+ pass
54
+ try:
55
+ self._target_start_ns = target.start_ns
56
+ if self._target_start_ns is not None:
57
+ event_filter |= SAX_EVENT_START_NS
58
+ except AttributeError:
59
+ pass
60
+ try:
61
+ self._target_end_ns = target.end_ns
62
+ if self._target_end_ns is not None:
63
+ event_filter |= SAX_EVENT_END_NS
64
+ except AttributeError:
65
+ pass
66
+ try:
67
+ self._target_data = target.data
68
+ if self._target_data is not None:
69
+ event_filter |= SAX_EVENT_DATA
70
+ except AttributeError:
71
+ pass
72
+ try:
73
+ self._target_doctype = target.doctype
74
+ if self._target_doctype is not None:
75
+ event_filter |= SAX_EVENT_DOCTYPE
76
+ except AttributeError:
77
+ pass
78
+ try:
79
+ self._target_pi = target.pi
80
+ if self._target_pi is not None:
81
+ event_filter |= SAX_EVENT_PI
82
+ except AttributeError:
83
+ pass
84
+ try:
85
+ self._target_comment = target.comment
86
+ if self._target_comment is not None:
87
+ event_filter |= SAX_EVENT_COMMENT
88
+ except AttributeError:
89
+ pass
90
+ self._sax_event_filter = event_filter
91
+
92
+ cdef _handleSaxStart(self, tag, attrib, nsmap):
93
+ if self._start_takes_nsmap:
94
+ return self._target_start(tag, attrib, nsmap)
95
+ else:
96
+ return self._target_start(tag, attrib)
97
+
98
+ cdef _handleSaxEnd(self, tag):
99
+ return self._target_end(tag)
100
+
101
+ cdef _handleSaxStartNs(self, prefix, uri):
102
+ return self._target_start_ns(prefix, uri)
103
+
104
+ cdef _handleSaxEndNs(self, prefix):
105
+ return self._target_end_ns(prefix)
106
+
107
+ cdef int _handleSaxData(self, data) except -1:
108
+ self._target_data(data)
109
+
110
+ cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
111
+ self._target_doctype(root_tag, public_id, system_id)
112
+
113
+ cdef _handleSaxPi(self, target, data):
114
+ return self._target_pi(target, data)
115
+
116
+ cdef _handleSaxComment(self, comment):
117
+ return self._target_comment(comment)
118
+
119
+
120
+ @cython.final
121
+ @cython.internal
122
+ @cython.no_gc_clear # Required because parent class uses it - Cython bug.
123
+ cdef class _TargetParserContext(_SaxParserContext):
124
+ """This class maps SAX2 events to the ET parser target interface.
125
+ """
126
+ cdef object _python_target
127
+ cdef int _setTarget(self, target) except -1:
128
+ self._python_target = target
129
+ if not isinstance(target, _SaxParserTarget) or \
130
+ hasattr(target, '__dict__'):
131
+ target = _PythonSaxParserTarget(target)
132
+ self._setSaxParserTarget(target)
133
+ return 0
134
+
135
+ cdef _ParserContext _copy(self):
136
+ cdef _TargetParserContext context
137
+ context = _ParserContext._copy(self)
138
+ context._setTarget(self._python_target)
139
+ return context
140
+
141
+ cdef void _cleanupTargetParserContext(self, xmlDoc* result) noexcept:
142
+ if self._c_ctxt.myDoc is not NULL:
143
+ if self._c_ctxt.myDoc is not result and \
144
+ self._c_ctxt.myDoc._private is NULL:
145
+ # no _Document proxy => orphen
146
+ tree.xmlFreeDoc(self._c_ctxt.myDoc)
147
+ self._c_ctxt.myDoc = NULL
148
+
149
+ cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result,
150
+ filename):
151
+ cdef bint recover
152
+ recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
153
+ try:
154
+ if self._has_raised():
155
+ self._cleanupTargetParserContext(result)
156
+ self._raise_if_stored()
157
+ if not self._c_ctxt.wellFormed and not recover:
158
+ _raiseParseError(self._c_ctxt, filename, self._error_log)
159
+ except:
160
+ self._python_target.close()
161
+ raise
162
+ return self._python_target.close()
163
+
164
+ cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser,
165
+ xmlDoc* result, filename) except NULL:
166
+ cdef bint recover
167
+ recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
168
+ if result is not NULL and result._private is NULL:
169
+ # no _Document proxy => orphen
170
+ tree.xmlFreeDoc(result)
171
+ try:
172
+ self._cleanupTargetParserContext(result)
173
+ self._raise_if_stored()
174
+ if not self._c_ctxt.wellFormed and not recover:
175
+ _raiseParseError(self._c_ctxt, filename, self._error_log)
176
+ except:
177
+ self._python_target.close()
178
+ raise
179
+ parse_result = self._python_target.close()
180
+ raise _TargetParserResult(parse_result)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read-only tree implementation
2
+
3
+ @cython.internal
4
+ cdef class _ReadOnlyProxy:
5
+ "A read-only proxy class suitable for PIs/Comments (for internal use only!)."
6
+ cdef bint _free_after_use
7
+ cdef xmlNode* _c_node
8
+ cdef _ReadOnlyProxy _source_proxy
9
+ cdef list _dependent_proxies
10
+ def __cinit__(self):
11
+ self._c_node = NULL
12
+ self._free_after_use = 0
13
+
14
+ cdef int _assertNode(self) except -1:
15
+ """This is our way of saying: this proxy is invalid!
16
+ """
17
+ if not self._c_node:
18
+ raise ReferenceError("Proxy invalidated!")
19
+ return 0
20
+
21
+ cdef int _raise_unsupported_type(self) except -1:
22
+ raise TypeError(f"Unsupported node type: {self._c_node.type}")
23
+
24
+ cdef void free_after_use(self) noexcept:
25
+ """Should the xmlNode* be freed when releasing the proxy?
26
+ """
27
+ self._free_after_use = 1
28
+
29
+ @property
30
+ def tag(self):
31
+ """Element tag
32
+ """
33
+ self._assertNode()
34
+ if self._c_node.type == tree.XML_ELEMENT_NODE:
35
+ return _namespacedName(self._c_node)
36
+ elif self._c_node.type == tree.XML_PI_NODE:
37
+ return ProcessingInstruction
38
+ elif self._c_node.type == tree.XML_COMMENT_NODE:
39
+ return Comment
40
+ elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
41
+ return Entity
42
+ else:
43
+ self._raise_unsupported_type()
44
+
45
+ @property
46
+ def text(self):
47
+ """Text before the first subelement. This is either a string or
48
+ the value None, if there was no text.
49
+ """
50
+ self._assertNode()
51
+ if self._c_node.type == tree.XML_ELEMENT_NODE:
52
+ return _collectText(self._c_node.children)
53
+ elif self._c_node.type in (tree.XML_PI_NODE,
54
+ tree.XML_COMMENT_NODE):
55
+ if self._c_node.content is NULL:
56
+ return ''
57
+ else:
58
+ return funicode(self._c_node.content)
59
+ elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
60
+ return f'&{funicode(self._c_node.name)};'
61
+ else:
62
+ self._raise_unsupported_type()
63
+
64
+ @property
65
+ def tail(self):
66
+ """Text after this element's end tag, but before the next sibling
67
+ element's start tag. This is either a string or the value None, if
68
+ there was no text.
69
+ """
70
+ self._assertNode()
71
+ return _collectText(self._c_node.next)
72
+
73
+ @property
74
+ def sourceline(self):
75
+ """Original line number as found by the parser or None if unknown.
76
+ """
77
+ cdef long line
78
+ self._assertNode()
79
+ line = tree.xmlGetLineNo(self._c_node)
80
+ if line > 0:
81
+ return line
82
+ else:
83
+ return None
84
+
85
+ def __repr__(self):
86
+ self._assertNode()
87
+ if self._c_node.type == tree.XML_ELEMENT_NODE:
88
+ return "<Element %s at 0x%x>" % (self.tag, id(self))
89
+ elif self._c_node.type == tree.XML_COMMENT_NODE:
90
+ return "<!--%s-->" % self.text
91
+ elif self._c_node.type == tree.XML_ENTITY_NODE:
92
+ return "&%s;" % funicode(self._c_node.name)
93
+ elif self._c_node.type == tree.XML_PI_NODE:
94
+ text = self.text
95
+ if text:
96
+ return "<?%s %s?>" % (self.target, text)
97
+ else:
98
+ return "<?%s?>" % self.target
99
+ else:
100
+ self._raise_unsupported_type()
101
+
102
+ def __getitem__(self, x):
103
+ """Returns the subelement at the given position or the requested
104
+ slice.
105
+ """
106
+ cdef xmlNode* c_node = NULL
107
+ cdef Py_ssize_t step = 0, slicelength = 0
108
+ cdef Py_ssize_t c, i
109
+ cdef _node_to_node_function next_element
110
+ cdef list result
111
+ self._assertNode()
112
+ if isinstance(x, slice):
113
+ # slicing
114
+ if _isFullSlice(<slice>x):
115
+ return _collectChildren(self)
116
+ _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
117
+ if c_node is NULL:
118
+ return []
119
+ if step > 0:
120
+ next_element = _nextElement
121
+ else:
122
+ step = -step
123
+ next_element = _previousElement
124
+ result = []
125
+ c = 0
126
+ while c_node is not NULL and c < slicelength:
127
+ result.append(_newReadOnlyProxy(self._source_proxy, c_node))
128
+ result.append(_elementFactory(self._doc, c_node))
129
+ c = c + 1
130
+ for i from 0 <= i < step:
131
+ c_node = next_element(c_node)
132
+ return result
133
+ else:
134
+ # indexing
135
+ c_node = _findChild(self._c_node, x)
136
+ if c_node is NULL:
137
+ raise IndexError, "list index out of range"
138
+ return _newReadOnlyProxy(self._source_proxy, c_node)
139
+
140
+ def __len__(self):
141
+ """Returns the number of subelements.
142
+ """
143
+ cdef Py_ssize_t c
144
+ cdef xmlNode* c_node
145
+ self._assertNode()
146
+ c = 0
147
+ c_node = self._c_node.children
148
+ while c_node is not NULL:
149
+ if tree._isElement(c_node):
150
+ c = c + 1
151
+ c_node = c_node.next
152
+ return c
153
+
154
+ def __bool__(self):
155
+ cdef xmlNode* c_node
156
+ self._assertNode()
157
+ c_node = _findChildBackwards(self._c_node, 0)
158
+ return c_node != NULL
159
+
160
+ def __deepcopy__(self, memo):
161
+ "__deepcopy__(self, memo)"
162
+ return self.__copy__()
163
+
164
+ cpdef __copy__(self):
165
+ "__copy__(self)"
166
+ cdef xmlDoc* c_doc
167
+ cdef xmlNode* c_node
168
+ cdef _Document new_doc
169
+ if self._c_node is NULL:
170
+ return self
171
+ c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive
172
+ new_doc = _documentFactory(c_doc, None)
173
+ root = new_doc.getroot()
174
+ if root is not None:
175
+ return root
176
+ # Comment/PI
177
+ c_node = c_doc.children
178
+ while c_node is not NULL and c_node.type != self._c_node.type:
179
+ c_node = c_node.next
180
+ if c_node is NULL:
181
+ return None
182
+ return _elementFactory(new_doc, c_node)
183
+
184
+ def __iter__(self):
185
+ return iter(self.getchildren())
186
+
187
+ def iterchildren(self, tag=None, *, reversed=False):
188
+ """iterchildren(self, tag=None, reversed=False)
189
+
190
+ Iterate over the children of this element.
191
+ """
192
+ children = self.getchildren()
193
+ if tag is not None and tag != '*':
194
+ children = [ el for el in children if el.tag == tag ]
195
+ if reversed:
196
+ children = children[::-1]
197
+ return iter(children)
198
+
199
+ cpdef getchildren(self):
200
+ """Returns all subelements. The elements are returned in document
201
+ order.
202
+ """
203
+ cdef xmlNode* c_node
204
+ cdef list result
205
+ self._assertNode()
206
+ result = []
207
+ c_node = self._c_node.children
208
+ while c_node is not NULL:
209
+ if tree._isElement(c_node):
210
+ result.append(_newReadOnlyProxy(self._source_proxy, c_node))
211
+ c_node = c_node.next
212
+ return result
213
+
214
+ def getparent(self):
215
+ """Returns the parent of this element or None for the root element.
216
+ """
217
+ cdef xmlNode* c_parent
218
+ self._assertNode()
219
+ c_parent = self._c_node.parent
220
+ if c_parent is NULL or not tree._isElement(c_parent):
221
+ return None
222
+ else:
223
+ return _newReadOnlyProxy(self._source_proxy, c_parent)
224
+
225
+ def getnext(self):
226
+ """Returns the following sibling of this element or None.
227
+ """
228
+ cdef xmlNode* c_node
229
+ self._assertNode()
230
+ c_node = _nextElement(self._c_node)
231
+ if c_node is not NULL:
232
+ return _newReadOnlyProxy(self._source_proxy, c_node)
233
+ return None
234
+
235
+ def getprevious(self):
236
+ """Returns the preceding sibling of this element or None.
237
+ """
238
+ cdef xmlNode* c_node
239
+ self._assertNode()
240
+ c_node = _previousElement(self._c_node)
241
+ if c_node is not NULL:
242
+ return _newReadOnlyProxy(self._source_proxy, c_node)
243
+ return None
244
+
245
+
246
+ @cython.final
247
+ @cython.internal
248
+ cdef class _ReadOnlyPIProxy(_ReadOnlyProxy):
249
+ """A read-only proxy for processing instructions (for internal use only!)"""
250
+ @property
251
+ def target(self):
252
+ self._assertNode()
253
+ return funicode(self._c_node.name)
254
+
255
+ @cython.final
256
+ @cython.internal
257
+ cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy):
258
+ """A read-only proxy for entity references (for internal use only!)"""
259
+ property name:
260
+ def __get__(self):
261
+ return funicode(self._c_node.name)
262
+
263
+ def __set__(self, value):
264
+ value_utf = _utf8(value)
265
+ if '&' in value or ';' in value:
266
+ raise ValueError(f"Invalid entity name '{value}'")
267
+ tree.xmlNodeSetName(self._c_node, _xcstr(value_utf))
268
+
269
+ @property
270
+ def text(self):
271
+ return f'&{funicode(self._c_node.name)};'
272
+
273
+
274
+ @cython.internal
275
+ cdef class _ReadOnlyElementProxy(_ReadOnlyProxy):
276
+ """The main read-only Element proxy class (for internal use only!)."""
277
+
278
+ @property
279
+ def attrib(self):
280
+ self._assertNode()
281
+ return dict(_collectAttributes(self._c_node, 3))
282
+
283
+ @property
284
+ def prefix(self):
285
+ """Namespace prefix or None.
286
+ """
287
+ self._assertNode()
288
+ if self._c_node.ns is not NULL:
289
+ if self._c_node.ns.prefix is not NULL:
290
+ return funicode(self._c_node.ns.prefix)
291
+ return None
292
+
293
+ @property
294
+ def nsmap(self):
295
+ """Namespace prefix->URI mapping known in the context of this
296
+ Element. This includes all namespace declarations of the
297
+ parents.
298
+
299
+ Note that changing the returned dict has no effect on the Element.
300
+ """
301
+ self._assertNode()
302
+ return _build_nsmap(self._c_node)
303
+
304
+ def get(self, key, default=None):
305
+ """Gets an element attribute.
306
+ """
307
+ self._assertNode()
308
+ return _getNodeAttributeValue(self._c_node, key, default)
309
+
310
+ def keys(self):
311
+ """Gets a list of attribute names. The names are returned in an
312
+ arbitrary order (just like for an ordinary Python dictionary).
313
+ """
314
+ self._assertNode()
315
+ return _collectAttributes(self._c_node, 1)
316
+
317
+ def values(self):
318
+ """Gets element attributes, as a sequence. The attributes are returned
319
+ in an arbitrary order.
320
+ """
321
+ self._assertNode()
322
+ return _collectAttributes(self._c_node, 2)
323
+
324
+ def items(self):
325
+ """Gets element attributes, as a sequence. The attributes are returned
326
+ in an arbitrary order.
327
+ """
328
+ self._assertNode()
329
+ return _collectAttributes(self._c_node, 3)
330
+
331
+ cdef _ReadOnlyProxy _newReadOnlyProxy(
332
+ _ReadOnlyProxy source_proxy, xmlNode* c_node):
333
+ cdef _ReadOnlyProxy el
334
+ if c_node.type == tree.XML_ELEMENT_NODE:
335
+ el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy)
336
+ elif c_node.type == tree.XML_PI_NODE:
337
+ el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy)
338
+ elif c_node.type in (tree.XML_COMMENT_NODE,
339
+ tree.XML_ENTITY_REF_NODE):
340
+ el = _ReadOnlyProxy.__new__(_ReadOnlyProxy)
341
+ else:
342
+ raise TypeError(f"Unsupported element type: {c_node.type}")
343
+ el._c_node = c_node
344
+ _initReadOnlyProxy(el, source_proxy)
345
+ return el
346
+
347
+ cdef inline _initReadOnlyProxy(_ReadOnlyProxy el,
348
+ _ReadOnlyProxy source_proxy):
349
+ if source_proxy is None:
350
+ el._source_proxy = el
351
+ el._dependent_proxies = [el]
352
+ else:
353
+ el._source_proxy = source_proxy
354
+ source_proxy._dependent_proxies.append(el)
355
+
356
+ cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy):
357
+ cdef xmlNode* c_node
358
+ cdef _ReadOnlyProxy el
359
+ if sourceProxy is None:
360
+ return
361
+ if sourceProxy._dependent_proxies is None:
362
+ return
363
+ for el in sourceProxy._dependent_proxies:
364
+ c_node = el._c_node
365
+ el._c_node = NULL
366
+ if el._free_after_use:
367
+ tree.xmlFreeNode(c_node)
368
+ del sourceProxy._dependent_proxies[:]
369
+
370
+ # opaque wrapper around non-element nodes, e.g. the document node
371
+ #
372
+ # This class does not imply any restrictions on modifiability or
373
+ # read-only status of the node, so use with caution.
374
+
375
+ @cython.internal
376
+ cdef class _OpaqueNodeWrapper:
377
+ cdef tree.xmlNode* _c_node
378
+ def __init__(self):
379
+ raise TypeError, "This type cannot be instantiated from Python"
380
+
381
+ @cython.final
382
+ @cython.internal
383
+ cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper):
384
+ cdef int _assertNode(self) except -1:
385
+ """This is our way of saying: this proxy is invalid!
386
+ """
387
+ assert self._c_node is not NULL, "Proxy invalidated!"
388
+ return 0
389
+
390
+ cpdef append(self, other_element):
391
+ """Append a copy of an Element to the list of children.
392
+ """
393
+ cdef xmlNode* c_next
394
+ cdef xmlNode* c_node
395
+ self._assertNode()
396
+ c_node = _roNodeOf(other_element)
397
+ if c_node.type == tree.XML_ELEMENT_NODE:
398
+ if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL:
399
+ raise ValueError, "cannot append, document already has a root element"
400
+ elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE):
401
+ raise TypeError, f"unsupported element type for top-level node: {c_node.type}"
402
+ c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node)
403
+ c_next = c_node.next
404
+ tree.xmlAddChild(self._c_node, c_node)
405
+ _moveTail(c_next, c_node)
406
+
407
+ def extend(self, elements):
408
+ """Append a copy of all Elements from a sequence to the list of
409
+ children.
410
+ """
411
+ self._assertNode()
412
+ for element in elements:
413
+ self.append(element)
414
+
415
+ cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node):
416
+ cdef _OpaqueNodeWrapper node
417
+ if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
418
+ node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper)
419
+ else:
420
+ node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper)
421
+ node._c_node = c_node
422
+ return node
423
+
424
+ # element proxies that allow restricted modification
425
+
426
+ @cython.internal
427
+ cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy):
428
+ """A read-only proxy that allows changing the text content.
429
+ """
430
+ property text:
431
+ def __get__(self):
432
+ self._assertNode()
433
+ if self._c_node.content is NULL:
434
+ return ''
435
+ else:
436
+ return funicode(self._c_node.content)
437
+
438
+ def __set__(self, value):
439
+ cdef tree.xmlDict* c_dict
440
+ self._assertNode()
441
+ if value is None:
442
+ c_text = <const_xmlChar*>NULL
443
+ else:
444
+ value = _utf8(value)
445
+ c_text = _xcstr(value)
446
+ tree.xmlNodeSetContent(self._c_node, c_text)
447
+
448
+ @cython.final
449
+ @cython.internal
450
+ cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy):
451
+ """A read-only proxy that allows changing the text/target content of a
452
+ processing instruction.
453
+ """
454
+ property target:
455
+ def __get__(self):
456
+ self._assertNode()
457
+ return funicode(self._c_node.name)
458
+
459
+ def __set__(self, value):
460
+ self._assertNode()
461
+ value = _utf8(value)
462
+ c_text = _xcstr(value)
463
+ tree.xmlNodeSetName(self._c_node, c_text)
464
+
465
+ @cython.final
466
+ @cython.internal
467
+ cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy):
468
+ "A read-only proxy for entity references (for internal use only!)"
469
+ property name:
470
+ def __get__(self):
471
+ return funicode(self._c_node.name)
472
+
473
+ def __set__(self, value):
474
+ value = _utf8(value)
475
+ assert '&' not in value and ';' not in value, \
476
+ f"Invalid entity name '{value}'"
477
+ c_text = _xcstr(value)
478
+ tree.xmlNodeSetName(self._c_node, c_text)
479
+
480
+
481
+ @cython.final
482
+ @cython.internal
483
+ cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy):
484
+ """A read-only element that allows adding children and changing the
485
+ text content (i.e. everything that adds to the subtree).
486
+ """
487
+ cpdef append(self, other_element):
488
+ """Append a copy of an Element to the list of children.
489
+ """
490
+ cdef xmlNode* c_next
491
+ cdef xmlNode* c_node
492
+ self._assertNode()
493
+ c_node = _roNodeOf(other_element)
494
+ c_node = _copyNodeToDoc(c_node, self._c_node.doc)
495
+ c_next = c_node.next
496
+ tree.xmlAddChild(self._c_node, c_node)
497
+ _moveTail(c_next, c_node)
498
+
499
+ def extend(self, elements):
500
+ """Append a copy of all Elements from a sequence to the list of
501
+ children.
502
+ """
503
+ self._assertNode()
504
+ for element in elements:
505
+ self.append(element)
506
+
507
+ property text:
508
+ """Text before the first subelement. This is either a string or the
509
+ value None, if there was no text.
510
+ """
511
+ def __get__(self):
512
+ self._assertNode()
513
+ return _collectText(self._c_node.children)
514
+
515
+ def __set__(self, value):
516
+ self._assertNode()
517
+ if isinstance(value, QName):
518
+ value = _resolveQNameText(self, value).decode('utf8')
519
+ _setNodeText(self._c_node, value)
520
+
521
+
522
+ cdef _ReadOnlyProxy _newAppendOnlyProxy(
523
+ _ReadOnlyProxy source_proxy, xmlNode* c_node):
524
+ cdef _ReadOnlyProxy el
525
+ if c_node.type == tree.XML_ELEMENT_NODE:
526
+ el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy)
527
+ elif c_node.type == tree.XML_PI_NODE:
528
+ el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy)
529
+ elif c_node.type == tree.XML_COMMENT_NODE:
530
+ el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy)
531
+ else:
532
+ raise TypeError(f"Unsupported element type: {c_node.type}")
533
+ el._c_node = c_node
534
+ _initReadOnlyProxy(el, source_proxy)
535
+ return el
536
+
537
+ cdef xmlNode* _roNodeOf(element) except NULL:
538
+ cdef xmlNode* c_node
539
+ if isinstance(element, _Element):
540
+ c_node = (<_Element>element)._c_node
541
+ elif isinstance(element, _ReadOnlyProxy):
542
+ c_node = (<_ReadOnlyProxy>element)._c_node
543
+ elif isinstance(element, _OpaqueNodeWrapper):
544
+ c_node = (<_OpaqueNodeWrapper>element)._c_node
545
+ else:
546
+ raise TypeError, f"invalid argument type {type(element)}"
547
+
548
+ if c_node is NULL:
549
+ raise TypeError, "invalid element"
550
+ return c_node
551
+
552
+ cdef xmlNode* _nonRoNodeOf(element) except NULL:
553
+ cdef xmlNode* c_node
554
+ if isinstance(element, _Element):
555
+ c_node = (<_Element>element)._c_node
556
+ elif isinstance(element, _AppendOnlyElementProxy):
557
+ c_node = (<_AppendOnlyElementProxy>element)._c_node
558
+ elif isinstance(element, _OpaqueNodeWrapper):
559
+ c_node = (<_OpaqueNodeWrapper>element)._c_node
560
+ else:
561
+ raise TypeError, f"invalid argument type {type(element)}"
562
+
563
+ if c_node is NULL:
564
+ raise TypeError, "invalid element"
565
+ return c_node
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi ADDED
@@ -0,0 +1,875 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SAX-like interfaces
2
+
3
+ class XMLSyntaxAssertionError(XMLSyntaxError, AssertionError):
4
+ """
5
+ An XMLSyntaxError that additionally inherits from AssertionError for
6
+ ElementTree / backwards compatibility reasons.
7
+
8
+ This class may get replaced by a plain XMLSyntaxError in a future version.
9
+ """
10
+ def __init__(self, message):
11
+ XMLSyntaxError.__init__(self, message, None, 0, 1)
12
+
13
+
14
+ ctypedef enum _SaxParserEvents:
15
+ SAX_EVENT_START = 1 << 0
16
+ SAX_EVENT_END = 1 << 1
17
+ SAX_EVENT_DATA = 1 << 2
18
+ SAX_EVENT_DOCTYPE = 1 << 3
19
+ SAX_EVENT_PI = 1 << 4
20
+ SAX_EVENT_COMMENT = 1 << 5
21
+ SAX_EVENT_START_NS = 1 << 6
22
+ SAX_EVENT_END_NS = 1 << 7
23
+
24
+ ctypedef enum _ParseEventFilter:
25
+ PARSE_EVENT_FILTER_START = 1 << 0
26
+ PARSE_EVENT_FILTER_END = 1 << 1
27
+ PARSE_EVENT_FILTER_START_NS = 1 << 2
28
+ PARSE_EVENT_FILTER_END_NS = 1 << 3
29
+ PARSE_EVENT_FILTER_COMMENT = 1 << 4
30
+ PARSE_EVENT_FILTER_PI = 1 << 5
31
+
32
+
33
+ cdef int _buildParseEventFilter(events) except -1:
34
+ cdef int event_filter = 0
35
+ for event in events:
36
+ if event == 'start':
37
+ event_filter |= PARSE_EVENT_FILTER_START
38
+ elif event == 'end':
39
+ event_filter |= PARSE_EVENT_FILTER_END
40
+ elif event == 'start-ns':
41
+ event_filter |= PARSE_EVENT_FILTER_START_NS
42
+ elif event == 'end-ns':
43
+ event_filter |= PARSE_EVENT_FILTER_END_NS
44
+ elif event == 'comment':
45
+ event_filter |= PARSE_EVENT_FILTER_COMMENT
46
+ elif event == 'pi':
47
+ event_filter |= PARSE_EVENT_FILTER_PI
48
+ else:
49
+ raise ValueError, f"invalid event name '{event}'"
50
+ return event_filter
51
+
52
+
53
+ cdef class _SaxParserTarget:
54
+ cdef int _sax_event_filter
55
+
56
+ cdef _handleSaxStart(self, tag, attrib, nsmap):
57
+ return None
58
+ cdef _handleSaxEnd(self, tag):
59
+ return None
60
+ cdef int _handleSaxData(self, data) except -1:
61
+ return 0
62
+ cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
63
+ return 0
64
+ cdef _handleSaxPi(self, target, data):
65
+ return None
66
+ cdef _handleSaxComment(self, comment):
67
+ return None
68
+ cdef _handleSaxStartNs(self, prefix, uri):
69
+ return None
70
+ cdef _handleSaxEndNs(self, prefix):
71
+ return None
72
+
73
+
74
+ #@cython.final
75
+ @cython.internal
76
+ @cython.no_gc_clear # Required because parent class uses it - Cython bug.
77
+ cdef class _SaxParserContext(_ParserContext):
78
+ """This class maps SAX2 events to parser target events.
79
+ """
80
+ cdef _SaxParserTarget _target
81
+ cdef _BaseParser _parser
82
+ cdef xmlparser.startElementNsSAX2Func _origSaxStart
83
+ cdef xmlparser.endElementNsSAX2Func _origSaxEnd
84
+ cdef xmlparser.startElementSAXFunc _origSaxStartNoNs
85
+ cdef xmlparser.endElementSAXFunc _origSaxEndNoNs
86
+ cdef xmlparser.charactersSAXFunc _origSaxData
87
+ cdef xmlparser.cdataBlockSAXFunc _origSaxCData
88
+ cdef xmlparser.internalSubsetSAXFunc _origSaxDoctype
89
+ cdef xmlparser.commentSAXFunc _origSaxComment
90
+ cdef xmlparser.processingInstructionSAXFunc _origSaxPI
91
+ cdef xmlparser.startDocumentSAXFunc _origSaxStartDocument
92
+
93
+ # for event collecting
94
+ cdef int _event_filter
95
+ cdef list _ns_stack
96
+ cdef list _node_stack
97
+ cdef _ParseEventsIterator events_iterator
98
+
99
+ # for iterparse
100
+ cdef _Element _root
101
+ cdef _MultiTagMatcher _matcher
102
+
103
+ def __cinit__(self, _BaseParser parser):
104
+ self._ns_stack = []
105
+ self._node_stack = []
106
+ self._parser = parser
107
+ self.events_iterator = _ParseEventsIterator()
108
+
109
+ cdef void _setSaxParserTarget(self, _SaxParserTarget target) noexcept:
110
+ self._target = target
111
+
112
+ cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
113
+ _ParserContext._initParserContext(self, c_ctxt)
114
+ if self._target is not None:
115
+ self._connectTarget(c_ctxt)
116
+ elif self._event_filter:
117
+ self._connectEvents(c_ctxt)
118
+
119
+ cdef void _connectTarget(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
120
+ """Wrap original SAX2 callbacks to call into parser target.
121
+ """
122
+ sax = c_ctxt.sax
123
+ self._origSaxStart = sax.startElementNs = NULL
124
+ self._origSaxStartNoNs = sax.startElement = NULL
125
+ if self._target._sax_event_filter & (SAX_EVENT_START |
126
+ SAX_EVENT_START_NS |
127
+ SAX_EVENT_END_NS):
128
+ # intercept => overwrite orig callback
129
+ # FIXME: also intercept on when collecting END events
130
+ if sax.initialized == xmlparser.XML_SAX2_MAGIC:
131
+ sax.startElementNs = _handleSaxTargetStart
132
+ if self._target._sax_event_filter & SAX_EVENT_START:
133
+ sax.startElement = _handleSaxTargetStartNoNs
134
+
135
+ self._origSaxEnd = sax.endElementNs = NULL
136
+ self._origSaxEndNoNs = sax.endElement = NULL
137
+ if self._target._sax_event_filter & (SAX_EVENT_END |
138
+ SAX_EVENT_END_NS):
139
+ if sax.initialized == xmlparser.XML_SAX2_MAGIC:
140
+ sax.endElementNs = _handleSaxEnd
141
+ if self._target._sax_event_filter & SAX_EVENT_END:
142
+ sax.endElement = _handleSaxEndNoNs
143
+
144
+ self._origSaxData = sax.characters = sax.cdataBlock = NULL
145
+ if self._target._sax_event_filter & SAX_EVENT_DATA:
146
+ sax.characters = sax.cdataBlock = _handleSaxData
147
+
148
+ # doctype propagation is always required for entity replacement
149
+ self._origSaxDoctype = sax.internalSubset
150
+ if self._target._sax_event_filter & SAX_EVENT_DOCTYPE:
151
+ sax.internalSubset = _handleSaxTargetDoctype
152
+
153
+ self._origSaxPI = sax.processingInstruction = NULL
154
+ if self._target._sax_event_filter & SAX_EVENT_PI:
155
+ sax.processingInstruction = _handleSaxTargetPI
156
+
157
+ self._origSaxComment = sax.comment = NULL
158
+ if self._target._sax_event_filter & SAX_EVENT_COMMENT:
159
+ sax.comment = _handleSaxTargetComment
160
+
161
+ # enforce entity replacement
162
+ sax.reference = NULL
163
+ c_ctxt.replaceEntities = 1
164
+
165
+ cdef void _connectEvents(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
166
+ """Wrap original SAX2 callbacks to collect parse events without parser target.
167
+ """
168
+ sax = c_ctxt.sax
169
+ self._origSaxStartDocument = sax.startDocument
170
+ sax.startDocument = _handleSaxStartDocument
171
+
172
+ # only override "start" event handler if needed
173
+ self._origSaxStart = sax.startElementNs
174
+ if self._event_filter == 0 or c_ctxt.html or \
175
+ self._event_filter & (PARSE_EVENT_FILTER_START |
176
+ PARSE_EVENT_FILTER_END |
177
+ PARSE_EVENT_FILTER_START_NS |
178
+ PARSE_EVENT_FILTER_END_NS):
179
+ sax.startElementNs = <xmlparser.startElementNsSAX2Func>_handleSaxStart
180
+
181
+ self._origSaxStartNoNs = sax.startElement
182
+ if self._event_filter == 0 or c_ctxt.html or \
183
+ self._event_filter & (PARSE_EVENT_FILTER_START |
184
+ PARSE_EVENT_FILTER_END):
185
+ sax.startElement = <xmlparser.startElementSAXFunc>_handleSaxStartNoNs
186
+
187
+ # only override "end" event handler if needed
188
+ self._origSaxEnd = sax.endElementNs
189
+ if self._event_filter == 0 or \
190
+ self._event_filter & (PARSE_EVENT_FILTER_END |
191
+ PARSE_EVENT_FILTER_END_NS):
192
+ sax.endElementNs = <xmlparser.endElementNsSAX2Func>_handleSaxEnd
193
+
194
+ self._origSaxEndNoNs = sax.endElement
195
+ if self._event_filter == 0 or \
196
+ self._event_filter & PARSE_EVENT_FILTER_END:
197
+ sax.endElement = <xmlparser.endElementSAXFunc>_handleSaxEndNoNs
198
+
199
+ self._origSaxComment = sax.comment
200
+ if self._event_filter & PARSE_EVENT_FILTER_COMMENT:
201
+ sax.comment = <xmlparser.commentSAXFunc>_handleSaxComment
202
+
203
+ self._origSaxPI = sax.processingInstruction
204
+ if self._event_filter & PARSE_EVENT_FILTER_PI:
205
+ sax.processingInstruction = <xmlparser.processingInstructionSAXFunc>_handleSaxPIEvent
206
+
207
+ cdef _setEventFilter(self, events, tag):
208
+ self._event_filter = _buildParseEventFilter(events)
209
+ if not self._event_filter or tag is None or tag == '*':
210
+ self._matcher = None
211
+ else:
212
+ self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
213
+
214
+ cdef int startDocument(self, xmlDoc* c_doc) except -1:
215
+ try:
216
+ self._doc = _documentFactory(c_doc, self._parser)
217
+ finally:
218
+ self._parser = None # clear circular reference ASAP
219
+ if self._matcher is not None:
220
+ self._matcher.cacheTags(self._doc, force_into_dict=True)
221
+ return 0
222
+
223
+ cdef int pushEvent(self, event, xmlNode* c_node) except -1:
224
+ cdef _Element root
225
+ if self._root is None:
226
+ root = self._doc.getroot()
227
+ if root is not None and root._c_node.type == tree.XML_ELEMENT_NODE:
228
+ self._root = root
229
+ node = _elementFactory(self._doc, c_node)
230
+ self.events_iterator._events.append( (event, node) )
231
+ return 0
232
+
233
+ cdef int flushEvents(self) except -1:
234
+ events = self.events_iterator._events
235
+ while self._node_stack:
236
+ events.append( ('end', self._node_stack.pop()) )
237
+ _pushSaxNsEndEvents(self)
238
+ while self._ns_stack:
239
+ _pushSaxNsEndEvents(self)
240
+
241
+ cdef void _handleSaxException(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
242
+ if c_ctxt.errNo == xmlerror.XML_ERR_OK:
243
+ c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
244
+ # stop parsing immediately
245
+ c_ctxt.wellFormed = 0
246
+ c_ctxt.disableSAX = 1
247
+ c_ctxt.instate = xmlparser.XML_PARSER_EOF
248
+ self._store_raised()
249
+
250
+
251
+ @cython.final
252
+ @cython.internal
253
+ cdef class _ParseEventsIterator:
254
+ """A reusable parse events iterator"""
255
+ cdef list _events
256
+ cdef int _event_index
257
+
258
+ def __cinit__(self):
259
+ self._events = []
260
+ self._event_index = 0
261
+
262
+ def __iter__(self):
263
+ return self
264
+
265
+ def __next__(self):
266
+ cdef int event_index = self._event_index
267
+ events = self._events
268
+ if event_index >= 2**10 or event_index * 2 >= len(events):
269
+ if event_index:
270
+ # clean up from time to time
271
+ del events[:event_index]
272
+ self._event_index = event_index = 0
273
+ if event_index >= len(events):
274
+ raise StopIteration
275
+ item = events[event_index]
276
+ self._event_index = event_index + 1
277
+ return item
278
+
279
+
280
+ cdef list _build_prefix_uri_list(_SaxParserContext context, int c_nb_namespaces,
281
+ const_xmlChar** c_namespaces):
282
+ "Build [(prefix, uri)] list of declared namespaces."
283
+ cdef int i
284
+ namespaces = []
285
+ for i in xrange(c_nb_namespaces):
286
+ namespaces.append((funicodeOrEmpty(c_namespaces[0]), funicode(c_namespaces[1])))
287
+ c_namespaces += 2
288
+ return namespaces
289
+
290
+
291
+ cdef void _handleSaxStart(
292
+ void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix,
293
+ const_xmlChar* c_namespace, int c_nb_namespaces,
294
+ const_xmlChar** c_namespaces,
295
+ int c_nb_attributes, int c_nb_defaulted,
296
+ const_xmlChar** c_attributes) noexcept with gil:
297
+ cdef int i
298
+ cdef size_t c_len
299
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
300
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
301
+ return
302
+ context = <_SaxParserContext>c_ctxt._private
303
+ cdef int event_filter = context._event_filter
304
+ try:
305
+ if (c_nb_namespaces and
306
+ event_filter & (PARSE_EVENT_FILTER_START_NS |
307
+ PARSE_EVENT_FILTER_END_NS)):
308
+ declared_namespaces = _build_prefix_uri_list(
309
+ context, c_nb_namespaces, c_namespaces)
310
+ if event_filter & PARSE_EVENT_FILTER_START_NS:
311
+ for prefix_uri_tuple in declared_namespaces:
312
+ context.events_iterator._events.append(("start-ns", prefix_uri_tuple))
313
+ else:
314
+ declared_namespaces = None
315
+
316
+ context._origSaxStart(c_ctxt, c_localname, c_prefix, c_namespace,
317
+ c_nb_namespaces, c_namespaces, c_nb_attributes,
318
+ c_nb_defaulted, c_attributes)
319
+ if c_ctxt.html:
320
+ _fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node)
321
+ # The HTML parser in libxml2 reports the missing opening tags when it finds
322
+ # misplaced ones, but with tag names from C string constants that ignore the
323
+ # parser dict. Thus, we need to intern the name ourselves.
324
+ c_localname = tree.xmlDictLookup(c_ctxt.dict, c_localname, -1)
325
+ if c_localname is NULL:
326
+ raise MemoryError()
327
+
328
+ if event_filter & PARSE_EVENT_FILTER_END_NS:
329
+ context._ns_stack.append(declared_namespaces)
330
+ if event_filter & (PARSE_EVENT_FILTER_END |
331
+ PARSE_EVENT_FILTER_START):
332
+ _pushSaxStartEvent(context, c_ctxt, c_namespace, c_localname, None)
333
+ except:
334
+ context._handleSaxException(c_ctxt)
335
+ finally:
336
+ return # swallow any further exceptions
337
+
338
+
339
+ cdef void _handleSaxTargetStart(
340
+ void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix,
341
+ const_xmlChar* c_namespace, int c_nb_namespaces,
342
+ const_xmlChar** c_namespaces,
343
+ int c_nb_attributes, int c_nb_defaulted,
344
+ const_xmlChar** c_attributes) noexcept with gil:
345
+ cdef int i
346
+ cdef size_t c_len
347
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
348
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
349
+ return
350
+ context = <_SaxParserContext>c_ctxt._private
351
+
352
+ cdef int event_filter = context._event_filter
353
+ cdef int sax_event_filter = context._target._sax_event_filter
354
+ try:
355
+ if c_nb_namespaces:
356
+ declared_namespaces = _build_prefix_uri_list(
357
+ context, c_nb_namespaces, c_namespaces)
358
+
359
+ if event_filter & PARSE_EVENT_FILTER_START_NS:
360
+ for prefix_uri_tuple in declared_namespaces:
361
+ context.events_iterator._events.append(("start-ns", prefix_uri_tuple))
362
+
363
+ if sax_event_filter & SAX_EVENT_START_NS:
364
+ for prefix, uri in declared_namespaces:
365
+ context._target._handleSaxStartNs(prefix, uri)
366
+ else:
367
+ declared_namespaces = None
368
+
369
+ if sax_event_filter & SAX_EVENT_START:
370
+ if c_nb_defaulted > 0:
371
+ # only add default attributes if we asked for them
372
+ if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0:
373
+ c_nb_attributes -= c_nb_defaulted
374
+ if c_nb_attributes == 0:
375
+ attrib = IMMUTABLE_EMPTY_MAPPING
376
+ else:
377
+ attrib = {}
378
+ for i in xrange(c_nb_attributes):
379
+ name = _namespacedNameFromNsName(
380
+ c_attributes[2], c_attributes[0])
381
+ if c_attributes[3] is NULL:
382
+ value = ''
383
+ else:
384
+ c_len = c_attributes[4] - c_attributes[3]
385
+ value = c_attributes[3][:c_len].decode('utf8')
386
+ attrib[name] = value
387
+ c_attributes += 5
388
+
389
+ nsmap = dict(declared_namespaces) if c_nb_namespaces else IMMUTABLE_EMPTY_MAPPING
390
+
391
+ element = _callTargetSaxStart(
392
+ context, c_ctxt,
393
+ _namespacedNameFromNsName(c_namespace, c_localname),
394
+ attrib, nsmap)
395
+ else:
396
+ element = None
397
+
398
+ if (event_filter & PARSE_EVENT_FILTER_END_NS or
399
+ sax_event_filter & SAX_EVENT_END_NS):
400
+ context._ns_stack.append(declared_namespaces)
401
+ if event_filter & (PARSE_EVENT_FILTER_END |
402
+ PARSE_EVENT_FILTER_START):
403
+ _pushSaxStartEvent(context, c_ctxt, c_namespace,
404
+ c_localname, element)
405
+ except:
406
+ context._handleSaxException(c_ctxt)
407
+ finally:
408
+ return # swallow any further exceptions
409
+
410
+
411
+ cdef void _handleSaxStartNoNs(void* ctxt, const_xmlChar* c_name,
412
+ const_xmlChar** c_attributes) noexcept with gil:
413
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
414
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
415
+ return
416
+ context = <_SaxParserContext>c_ctxt._private
417
+ try:
418
+ context._origSaxStartNoNs(c_ctxt, c_name, c_attributes)
419
+ if c_ctxt.html:
420
+ _fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node)
421
+ # The HTML parser in libxml2 reports the missing opening tags when it finds
422
+ # misplaced ones, but with tag names from C string constants that ignore the
423
+ # parser dict. Thus, we need to intern the name ourselves.
424
+ c_name = tree.xmlDictLookup(c_ctxt.dict, c_name, -1)
425
+ if c_name is NULL:
426
+ raise MemoryError()
427
+ if context._event_filter & (PARSE_EVENT_FILTER_END |
428
+ PARSE_EVENT_FILTER_START):
429
+ _pushSaxStartEvent(context, c_ctxt, NULL, c_name, None)
430
+ except:
431
+ context._handleSaxException(c_ctxt)
432
+ finally:
433
+ return # swallow any further exceptions
434
+
435
+
436
+ cdef void _handleSaxTargetStartNoNs(void* ctxt, const_xmlChar* c_name,
437
+ const_xmlChar** c_attributes) noexcept with gil:
438
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
439
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
440
+ return
441
+ context = <_SaxParserContext>c_ctxt._private
442
+ try:
443
+ if c_attributes is NULL:
444
+ attrib = IMMUTABLE_EMPTY_MAPPING
445
+ else:
446
+ attrib = {}
447
+ while c_attributes[0] is not NULL:
448
+ name = funicode(c_attributes[0])
449
+ attrib[name] = funicodeOrEmpty(c_attributes[1])
450
+ c_attributes += 2
451
+ element = _callTargetSaxStart(
452
+ context, c_ctxt, funicode(c_name),
453
+ attrib, IMMUTABLE_EMPTY_MAPPING)
454
+ if context._event_filter & (PARSE_EVENT_FILTER_END |
455
+ PARSE_EVENT_FILTER_START):
456
+ _pushSaxStartEvent(context, c_ctxt, NULL, c_name, element)
457
+ except:
458
+ context._handleSaxException(c_ctxt)
459
+ finally:
460
+ return # swallow any further exceptions
461
+
462
+
463
+ cdef _callTargetSaxStart(_SaxParserContext context,
464
+ xmlparser.xmlParserCtxt* c_ctxt,
465
+ tag, attrib, nsmap):
466
+ element = context._target._handleSaxStart(tag, attrib, nsmap)
467
+ if element is not None and c_ctxt.input is not NULL:
468
+ if isinstance(element, _Element):
469
+ (<_Element>element)._c_node.line = (
470
+ <unsigned short>c_ctxt.input.line
471
+ if c_ctxt.input.line < 65535 else 65535)
472
+ return element
473
+
474
+
475
+ cdef int _pushSaxStartEvent(_SaxParserContext context,
476
+ xmlparser.xmlParserCtxt* c_ctxt,
477
+ const_xmlChar* c_href,
478
+ const_xmlChar* c_name, node) except -1:
479
+ if (context._matcher is None or
480
+ context._matcher.matchesNsTag(c_href, c_name)):
481
+ if node is None and context._target is None:
482
+ assert context._doc is not None
483
+ node = _elementFactory(context._doc, c_ctxt.node)
484
+ if context._event_filter & PARSE_EVENT_FILTER_START:
485
+ context.events_iterator._events.append(('start', node))
486
+ if (context._target is None and
487
+ context._event_filter & PARSE_EVENT_FILTER_END):
488
+ context._node_stack.append(node)
489
+ return 0
490
+
491
+
492
+ cdef void _handleSaxEnd(void* ctxt, const_xmlChar* c_localname,
493
+ const_xmlChar* c_prefix,
494
+ const_xmlChar* c_namespace) noexcept with gil:
495
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
496
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
497
+ return
498
+ context = <_SaxParserContext>c_ctxt._private
499
+ try:
500
+ if context._target is not None:
501
+ if context._target._sax_event_filter & SAX_EVENT_END:
502
+ node = context._target._handleSaxEnd(
503
+ _namespacedNameFromNsName(c_namespace, c_localname))
504
+ else:
505
+ node = None
506
+ else:
507
+ context._origSaxEnd(c_ctxt, c_localname, c_prefix, c_namespace)
508
+ node = None
509
+ _pushSaxEndEvent(context, c_namespace, c_localname, node)
510
+ _pushSaxNsEndEvents(context)
511
+ except:
512
+ context._handleSaxException(c_ctxt)
513
+ finally:
514
+ return # swallow any further exceptions
515
+
516
+
517
+ cdef void _handleSaxEndNoNs(void* ctxt, const_xmlChar* c_name) noexcept with gil:
518
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
519
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
520
+ return
521
+ context = <_SaxParserContext>c_ctxt._private
522
+ try:
523
+ if context._target is not None:
524
+ node = context._target._handleSaxEnd(funicode(c_name))
525
+ else:
526
+ context._origSaxEndNoNs(c_ctxt, c_name)
527
+ node = None
528
+ _pushSaxEndEvent(context, NULL, c_name, node)
529
+ except:
530
+ context._handleSaxException(c_ctxt)
531
+ finally:
532
+ return # swallow any further exceptions
533
+
534
+
535
+ cdef int _pushSaxNsEndEvents(_SaxParserContext context) except -1:
536
+ cdef bint build_events = context._event_filter & PARSE_EVENT_FILTER_END_NS
537
+ cdef bint call_target = (
538
+ context._target is not None
539
+ and context._target._sax_event_filter & SAX_EVENT_END_NS)
540
+ if not build_events and not call_target:
541
+ return 0
542
+
543
+ cdef list declared_namespaces = context._ns_stack.pop()
544
+ if declared_namespaces is None:
545
+ return 0
546
+
547
+ cdef tuple prefix_uri
548
+ for prefix_uri in reversed(declared_namespaces):
549
+ if call_target:
550
+ context._target._handleSaxEndNs(prefix_uri[0])
551
+ if build_events:
552
+ context.events_iterator._events.append(('end-ns', None))
553
+
554
+ return 0
555
+
556
+
557
+ cdef int _pushSaxEndEvent(_SaxParserContext context,
558
+ const_xmlChar* c_href,
559
+ const_xmlChar* c_name, node) except -1:
560
+ if context._event_filter & PARSE_EVENT_FILTER_END:
561
+ if (context._matcher is None or
562
+ context._matcher.matchesNsTag(c_href, c_name)):
563
+ if context._target is None:
564
+ node = context._node_stack.pop()
565
+ context.events_iterator._events.append(('end', node))
566
+ return 0
567
+
568
+
569
+ cdef void _handleSaxData(void* ctxt, const_xmlChar* c_data, int data_len) noexcept with gil:
570
+ # can only be called if parsing with a target
571
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
572
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
573
+ return
574
+ context = <_SaxParserContext>c_ctxt._private
575
+ try:
576
+ context._target._handleSaxData(
577
+ c_data[:data_len].decode('utf8'))
578
+ except:
579
+ context._handleSaxException(c_ctxt)
580
+ finally:
581
+ return # swallow any further exceptions
582
+
583
+
584
+ cdef void _handleSaxTargetDoctype(void* ctxt, const_xmlChar* c_name,
585
+ const_xmlChar* c_public,
586
+ const_xmlChar* c_system) noexcept with gil:
587
+ # can only be called if parsing with a target
588
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
589
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
590
+ return
591
+ context = <_SaxParserContext>c_ctxt._private
592
+ try:
593
+ context._target._handleSaxDoctype(
594
+ funicodeOrNone(c_name),
595
+ funicodeOrNone(c_public),
596
+ funicodeOrNone(c_system))
597
+ except:
598
+ context._handleSaxException(c_ctxt)
599
+ finally:
600
+ return # swallow any further exceptions
601
+
602
+
603
+ cdef void _handleSaxStartDocument(void* ctxt) noexcept with gil:
604
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
605
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
606
+ return
607
+ context = <_SaxParserContext>c_ctxt._private
608
+ context._origSaxStartDocument(ctxt)
609
+ c_doc = c_ctxt.myDoc
610
+ try:
611
+ context.startDocument(c_doc)
612
+ except:
613
+ context._handleSaxException(c_ctxt)
614
+ finally:
615
+ return # swallow any further exceptions
616
+
617
+
618
+ cdef void _handleSaxTargetPI(void* ctxt, const_xmlChar* c_target,
619
+ const_xmlChar* c_data) noexcept with gil:
620
+ # can only be called if parsing with a target
621
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
622
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
623
+ return
624
+ context = <_SaxParserContext>c_ctxt._private
625
+ try:
626
+ pi = context._target._handleSaxPi(
627
+ funicodeOrNone(c_target),
628
+ funicodeOrEmpty(c_data))
629
+ if context._event_filter & PARSE_EVENT_FILTER_PI:
630
+ context.events_iterator._events.append(('pi', pi))
631
+ except:
632
+ context._handleSaxException(c_ctxt)
633
+ finally:
634
+ return # swallow any further exceptions
635
+
636
+
637
+ cdef void _handleSaxPIEvent(void* ctxt, const_xmlChar* target,
638
+ const_xmlChar* data) noexcept with gil:
639
+ # can only be called when collecting pi events
640
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
641
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
642
+ return
643
+ context = <_SaxParserContext>c_ctxt._private
644
+ context._origSaxPI(ctxt, target, data)
645
+ c_node = _findLastEventNode(c_ctxt)
646
+ if c_node is NULL:
647
+ return
648
+ try:
649
+ context.pushEvent('pi', c_node)
650
+ except:
651
+ context._handleSaxException(c_ctxt)
652
+ finally:
653
+ return # swallow any further exceptions
654
+
655
+
656
+ cdef void _handleSaxTargetComment(void* ctxt, const_xmlChar* c_data) noexcept with gil:
657
+ # can only be called if parsing with a target
658
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
659
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
660
+ return
661
+ context = <_SaxParserContext>c_ctxt._private
662
+ try:
663
+ comment = context._target._handleSaxComment(funicodeOrEmpty(c_data))
664
+ if context._event_filter & PARSE_EVENT_FILTER_COMMENT:
665
+ context.events_iterator._events.append(('comment', comment))
666
+ except:
667
+ context._handleSaxException(c_ctxt)
668
+ finally:
669
+ return # swallow any further exceptions
670
+
671
+
672
+ cdef void _handleSaxComment(void* ctxt, const_xmlChar* text) noexcept with gil:
673
+ # can only be called when collecting comment events
674
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
675
+ if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
676
+ return
677
+ context = <_SaxParserContext>c_ctxt._private
678
+ context._origSaxComment(ctxt, text)
679
+ c_node = _findLastEventNode(c_ctxt)
680
+ if c_node is NULL:
681
+ return
682
+ try:
683
+ context.pushEvent('comment', c_node)
684
+ except:
685
+ context._handleSaxException(c_ctxt)
686
+ finally:
687
+ return # swallow any further exceptions
688
+
689
+
690
+ cdef inline xmlNode* _findLastEventNode(xmlparser.xmlParserCtxt* c_ctxt):
691
+ # this mimics what libxml2 creates for comments/PIs
692
+ if c_ctxt.inSubset == 1:
693
+ return c_ctxt.myDoc.intSubset.last
694
+ elif c_ctxt.inSubset == 2:
695
+ return c_ctxt.myDoc.extSubset.last
696
+ elif c_ctxt.node is NULL:
697
+ return c_ctxt.myDoc.last
698
+ elif c_ctxt.node.type == tree.XML_ELEMENT_NODE:
699
+ return c_ctxt.node.last
700
+ else:
701
+ return c_ctxt.node.next
702
+
703
+
704
+ ############################################################
705
+ ## ET compatible XML tree builder
706
+ ############################################################
707
+
708
+ cdef class TreeBuilder(_SaxParserTarget):
709
+ """TreeBuilder(self, element_factory=None, parser=None,
710
+ comment_factory=None, pi_factory=None,
711
+ insert_comments=True, insert_pis=True)
712
+
713
+ Parser target that builds a tree from parse event callbacks.
714
+
715
+ The factory arguments can be used to influence the creation of
716
+ elements, comments and processing instructions.
717
+
718
+ By default, comments and processing instructions are inserted into
719
+ the tree, but they can be ignored by passing the respective flags.
720
+
721
+ The final tree is returned by the ``close()`` method.
722
+ """
723
+ cdef _BaseParser _parser
724
+ cdef object _factory
725
+ cdef object _comment_factory
726
+ cdef object _pi_factory
727
+ cdef list _data
728
+ cdef list _element_stack
729
+ cdef object _element_stack_pop
730
+ cdef _Element _last # may be None
731
+ cdef bint _in_tail
732
+ cdef bint _insert_comments
733
+ cdef bint _insert_pis
734
+
735
+ def __init__(self, *, element_factory=None, parser=None,
736
+ comment_factory=None, pi_factory=None,
737
+ bint insert_comments=True, bint insert_pis=True):
738
+ self._sax_event_filter = \
739
+ SAX_EVENT_START | SAX_EVENT_END | SAX_EVENT_DATA | \
740
+ SAX_EVENT_PI | SAX_EVENT_COMMENT
741
+ self._data = [] # data collector
742
+ self._element_stack = [] # element stack
743
+ self._element_stack_pop = self._element_stack.pop
744
+ self._last = None # last element
745
+ self._in_tail = 0 # true if we're after an end tag
746
+ self._factory = element_factory
747
+ self._comment_factory = comment_factory if comment_factory is not None else Comment
748
+ self._pi_factory = pi_factory if pi_factory is not None else ProcessingInstruction
749
+ self._insert_comments = insert_comments
750
+ self._insert_pis = insert_pis
751
+ self._parser = parser
752
+
753
+ @cython.final
754
+ cdef int _flush(self) except -1:
755
+ if self._data:
756
+ if self._last is not None:
757
+ text = "".join(self._data)
758
+ if self._in_tail:
759
+ assert self._last.tail is None, "internal error (tail)"
760
+ self._last.tail = text
761
+ else:
762
+ assert self._last.text is None, "internal error (text)"
763
+ self._last.text = text
764
+ del self._data[:]
765
+ return 0
766
+
767
+ # internal SAX event handlers
768
+
769
+ @cython.final
770
+ cdef _handleSaxStart(self, tag, attrib, nsmap):
771
+ self._flush()
772
+ if self._factory is not None:
773
+ self._last = self._factory(tag, attrib)
774
+ if self._element_stack:
775
+ _appendChild(self._element_stack[-1], self._last)
776
+ elif self._element_stack:
777
+ self._last = _makeSubElement(
778
+ self._element_stack[-1], tag, None, None, attrib, nsmap, None)
779
+ else:
780
+ self._last = _makeElement(
781
+ tag, NULL, None, self._parser, None, None, attrib, nsmap, None)
782
+ self._element_stack.append(self._last)
783
+ self._in_tail = 0
784
+ return self._last
785
+
786
+ @cython.final
787
+ cdef _handleSaxEnd(self, tag):
788
+ self._flush()
789
+ self._last = self._element_stack_pop()
790
+ self._in_tail = 1
791
+ return self._last
792
+
793
+ @cython.final
794
+ cdef int _handleSaxData(self, data) except -1:
795
+ self._data.append(data)
796
+
797
+ @cython.final
798
+ cdef _handleSaxPi(self, target, data):
799
+ elem = self._pi_factory(target, data)
800
+ if self._insert_pis:
801
+ self._flush()
802
+ self._last = elem
803
+ if self._element_stack:
804
+ _appendChild(self._element_stack[-1], self._last)
805
+ self._in_tail = 1
806
+ return self._last
807
+
808
+ @cython.final
809
+ cdef _handleSaxComment(self, comment):
810
+ elem = self._comment_factory(comment)
811
+ if self._insert_comments:
812
+ self._flush()
813
+ self._last = elem
814
+ if self._element_stack:
815
+ _appendChild(self._element_stack[-1], self._last)
816
+ self._in_tail = 1
817
+ return elem
818
+
819
+ # Python level event handlers
820
+
821
+ def close(self):
822
+ """close(self)
823
+
824
+ Flushes the builder buffers, and returns the toplevel document
825
+ element. Raises XMLSyntaxError on inconsistencies.
826
+ """
827
+ if self._element_stack:
828
+ raise XMLSyntaxAssertionError("missing end tags")
829
+ # TODO: this does not necessarily seem like an error case. Why not just return None?
830
+ if self._last is None:
831
+ raise XMLSyntaxAssertionError("missing toplevel element")
832
+ return self._last
833
+
834
+ def data(self, data):
835
+ """data(self, data)
836
+
837
+ Adds text to the current element. The value should be either an
838
+ 8-bit string containing ASCII text, or a Unicode string.
839
+ """
840
+ self._handleSaxData(data)
841
+
842
+ def start(self, tag, attrs, nsmap=None):
843
+ """start(self, tag, attrs, nsmap=None)
844
+
845
+ Opens a new element.
846
+ """
847
+ if nsmap is None:
848
+ nsmap = IMMUTABLE_EMPTY_MAPPING
849
+ return self._handleSaxStart(tag, attrs, nsmap)
850
+
851
+ def end(self, tag):
852
+ """end(self, tag)
853
+
854
+ Closes the current element.
855
+ """
856
+ element = self._handleSaxEnd(tag)
857
+ assert self._last.tag == tag,\
858
+ f"end tag mismatch (expected {self._last.tag}, got {tag})"
859
+ return element
860
+
861
+ def pi(self, target, data=None):
862
+ """pi(self, target, data=None)
863
+
864
+ Creates a processing instruction using the factory, appends it
865
+ (unless disabled) and returns it.
866
+ """
867
+ return self._handleSaxPi(target, data)
868
+
869
+ def comment(self, comment):
870
+ """comment(self, comment)
871
+
872
+ Creates a comment using the factory, appends it (unless disabled)
873
+ and returns it.
874
+ """
875
+ return self._handleSaxComment(comment)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/usedoctest.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Doctest module for XML comparison.
2
+
3
+ Usage::
4
+
5
+ >>> import lxml.usedoctest
6
+ >>> # now do your XML doctests ...
7
+
8
+ See `lxml.doctestcompare`
9
+ """
10
+
11
+ from lxml import doctestcompare
12
+
13
+ doctestcompare.temp_install(del_module=__name__)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xmlid.pxi ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cdef object _find_id_attributes
2
+
3
+ def XMLID(text, parser=None, *, base_url=None):
4
+ """XMLID(text, parser=None, base_url=None)
5
+
6
+ Parse the text and return a tuple (root node, ID dictionary). The root
7
+ node is the same as returned by the XML() function. The dictionary
8
+ contains string-element pairs. The dictionary keys are the values of 'id'
9
+ attributes. The elements referenced by the ID are stored as dictionary
10
+ values.
11
+ """
12
+ cdef dict dic
13
+ global _find_id_attributes
14
+ if _find_id_attributes is None:
15
+ _find_id_attributes = XPath('//*[string(@id)]')
16
+
17
+ # ElementTree compatible implementation: parse and look for 'id' attributes
18
+ root = XML(text, parser, base_url=base_url)
19
+ dic = {}
20
+ for elem in _find_id_attributes(root):
21
+ dic[elem.get('id')] = elem
22
+ return root, dic
23
+
24
+ def XMLDTDID(text, parser=None, *, base_url=None):
25
+ """XMLDTDID(text, parser=None, base_url=None)
26
+
27
+ Parse the text and return a tuple (root node, ID dictionary). The root
28
+ node is the same as returned by the XML() function. The dictionary
29
+ contains string-element pairs. The dictionary keys are the values of ID
30
+ attributes as defined by the DTD. The elements referenced by the ID are
31
+ stored as dictionary values.
32
+
33
+ Note that you must not modify the XML tree if you use the ID dictionary.
34
+ The results are undefined.
35
+ """
36
+ cdef _Element root
37
+ root = XML(text, parser, base_url=base_url)
38
+ # xml:id spec compatible implementation: use DTD ID attributes from libxml2
39
+ if root._doc._c_doc.ids is NULL:
40
+ return root, {}
41
+ else:
42
+ return root, _IDDict(root)
43
+
44
+ def parseid(source, parser=None, *, base_url=None):
45
+ """parseid(source, parser=None)
46
+
47
+ Parses the source into a tuple containing an ElementTree object and an
48
+ ID dictionary. If no parser is provided as second argument, the default
49
+ parser is used.
50
+
51
+ Note that you must not modify the XML tree if you use the ID dictionary.
52
+ The results are undefined.
53
+ """
54
+ cdef _Document doc
55
+ doc = _parseDocument(source, parser, base_url)
56
+ return _elementTreeFactory(doc, None), _IDDict(doc)
57
+
58
+ cdef class _IDDict:
59
+ """IDDict(self, etree)
60
+ A dictionary-like proxy class that mapps ID attributes to elements.
61
+
62
+ The dictionary must be instantiated with the root element of a parsed XML
63
+ document, otherwise the behaviour is undefined. Elements and XML trees
64
+ that were created or modified 'by hand' are not supported.
65
+ """
66
+ cdef _Document _doc
67
+ cdef object _keys
68
+ cdef object _items
69
+ def __cinit__(self, etree):
70
+ cdef _Document doc
71
+ doc = _documentOrRaise(etree)
72
+ if doc._c_doc.ids is NULL:
73
+ raise ValueError, "No ID dictionary available."
74
+ self._doc = doc
75
+ self._keys = None
76
+ self._items = None
77
+
78
+ def copy(self):
79
+ return _IDDict(self._doc)
80
+
81
+ def __getitem__(self, id_name):
82
+ cdef tree.xmlHashTable* c_ids
83
+ cdef tree.xmlID* c_id
84
+ cdef xmlAttr* c_attr
85
+ c_ids = self._doc._c_doc.ids
86
+ id_utf = _utf8(id_name)
87
+ c_id = <tree.xmlID*>tree.xmlHashLookup(c_ids, _xcstr(id_utf))
88
+ if c_id is NULL:
89
+ raise KeyError, "key not found."
90
+ c_attr = c_id.attr
91
+ if c_attr is NULL or c_attr.parent is NULL:
92
+ raise KeyError, "ID attribute not found."
93
+ return _elementFactory(self._doc, c_attr.parent)
94
+
95
+ def get(self, id_name):
96
+ return self[id_name]
97
+
98
+ def __contains__(self, id_name):
99
+ cdef tree.xmlID* c_id
100
+ id_utf = _utf8(id_name)
101
+ c_id = <tree.xmlID*>tree.xmlHashLookup(
102
+ self._doc._c_doc.ids, _xcstr(id_utf))
103
+ return c_id is not NULL
104
+
105
+ def has_key(self, id_name):
106
+ return id_name in self
107
+
108
+ def __repr__(self):
109
+ return repr(dict(self))
110
+
111
+ def keys(self):
112
+ if self._keys is None:
113
+ self._keys = self._build_keys()
114
+ return self._keys[:]
115
+
116
+ def __iter__(self):
117
+ if self._keys is None:
118
+ self._keys = self._build_keys()
119
+ return iter(self._keys)
120
+
121
+ def iterkeys(self):
122
+ return self
123
+
124
+ def __len__(self):
125
+ if self._keys is None:
126
+ self._keys = self._build_keys()
127
+ return len(self._keys)
128
+
129
+ def items(self):
130
+ if self._items is None:
131
+ self._items = self._build_items()
132
+ return self._items[:]
133
+
134
+ def iteritems(self):
135
+ if self._items is None:
136
+ self._items = self._build_items()
137
+ return iter(self._items)
138
+
139
+ def values(self):
140
+ cdef list values = []
141
+ if self._items is None:
142
+ self._items = self._build_items()
143
+ for item in self._items:
144
+ value = python.PyTuple_GET_ITEM(item, 1)
145
+ python.Py_INCREF(value)
146
+ values.append(value)
147
+ return values
148
+
149
+ def itervalues(self):
150
+ return iter(self.values())
151
+
152
+ cdef object _build_keys(self):
153
+ keys = []
154
+ tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids,
155
+ <tree.xmlHashScanner>_collectIdHashKeys, <python.PyObject*>keys)
156
+ return keys
157
+
158
+ cdef object _build_items(self):
159
+ items = []
160
+ context = (items, self._doc)
161
+ tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids,
162
+ <tree.xmlHashScanner>_collectIdHashItemList, <python.PyObject*>context)
163
+ return items
164
+
165
+ cdef void _collectIdHashItemList(void* payload, void* context, xmlChar* name) noexcept:
166
+ # collect elements from ID attribute hash table
167
+ cdef list lst
168
+ c_id = <tree.xmlID*>payload
169
+ if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL:
170
+ return
171
+ lst, doc = <tuple>context
172
+ element = _elementFactory(doc, c_id.attr.parent)
173
+ lst.append( (funicode(name), element) )
174
+
175
+ cdef void _collectIdHashKeys(void* payload, void* collect_list, xmlChar* name) noexcept:
176
+ c_id = <tree.xmlID*>payload
177
+ if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL:
178
+ return
179
+ (<list>collect_list).append(funicode(name))
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xmlschema.pxi ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # support for XMLSchema validation
2
+ from lxml.includes cimport xmlschema
3
+
4
+
5
+ cdef class XMLSchemaError(LxmlError):
6
+ """Base class of all XML Schema errors
7
+ """
8
+
9
+ cdef class XMLSchemaParseError(XMLSchemaError):
10
+ """Error while parsing an XML document as XML Schema.
11
+ """
12
+
13
+ cdef class XMLSchemaValidateError(XMLSchemaError):
14
+ """Error while validating an XML document with an XML Schema.
15
+ """
16
+
17
+
18
+ ################################################################################
19
+ # XMLSchema
20
+
21
+ cdef XPath _check_for_default_attributes = XPath(
22
+ "boolean(//xs:attribute[@default or @fixed][1])",
23
+ namespaces={'xs': 'http://www.w3.org/2001/XMLSchema'})
24
+
25
+
26
+ cdef class XMLSchema(_Validator):
27
+ """XMLSchema(self, etree=None, file=None)
28
+ Turn a document into an XML Schema validator.
29
+
30
+ Either pass a schema as Element or ElementTree, or pass a file or
31
+ filename through the ``file`` keyword argument.
32
+
33
+ Passing the ``attribute_defaults`` boolean option will make the
34
+ schema insert default/fixed attributes into validated documents.
35
+ """
36
+ cdef xmlschema.xmlSchema* _c_schema
37
+ cdef _Document _doc
38
+ cdef bint _has_default_attributes
39
+ cdef bint _add_attribute_defaults
40
+
41
+ def __cinit__(self):
42
+ self._has_default_attributes = True # play it safe
43
+ self._add_attribute_defaults = False
44
+
45
+ def __init__(self, etree=None, *, file=None, bint attribute_defaults=False):
46
+ cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt
47
+ cdef xmlDoc* c_doc
48
+
49
+ self._add_attribute_defaults = attribute_defaults
50
+ _Validator.__init__(self)
51
+ c_doc = NULL
52
+ if etree is not None:
53
+ doc = _documentOrRaise(etree)
54
+ root_node = _rootNodeOrRaise(etree)
55
+ c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
56
+ self._doc = _documentFactory(c_doc, doc._parser)
57
+ parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(c_doc)
58
+ elif file is not None:
59
+ file = _getFSPathOrObject(file)
60
+ if _isString(file):
61
+ filename = _encodeFilename(file)
62
+ parser_ctxt = xmlschema.xmlSchemaNewParserCtxt(_cstr(filename))
63
+ else:
64
+ self._doc = _parseDocument(file, None, None)
65
+ parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(self._doc._c_doc)
66
+ else:
67
+ raise XMLSchemaParseError, "No tree or file given"
68
+
69
+ if parser_ctxt is NULL:
70
+ raise MemoryError()
71
+
72
+ # Need a cast here because older libxml2 releases do not use 'const' in the functype.
73
+ xmlschema.xmlSchemaSetParserStructuredErrors(
74
+ parser_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
75
+ if self._doc is not None:
76
+ # calling xmlSchemaParse on a schema with imports or
77
+ # includes will cause libxml2 to create an internal
78
+ # context for parsing, so push an implied context to route
79
+ # resolve requests to the document's parser
80
+ __GLOBAL_PARSER_CONTEXT.pushImpliedContextFromParser(self._doc._parser)
81
+ with nogil:
82
+ orig_loader = _register_document_loader()
83
+ self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt)
84
+ _reset_document_loader(orig_loader)
85
+ if self._doc is not None:
86
+ __GLOBAL_PARSER_CONTEXT.popImpliedContext()
87
+ xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt)
88
+
89
+ if self._c_schema is NULL:
90
+ raise XMLSchemaParseError(
91
+ self._error_log._buildExceptionMessage(
92
+ "Document is not valid XML Schema"),
93
+ self._error_log)
94
+
95
+ if self._doc is not None:
96
+ self._has_default_attributes = _check_for_default_attributes(self._doc)
97
+ self._add_attribute_defaults = attribute_defaults and self._has_default_attributes
98
+
99
+ def __dealloc__(self):
100
+ xmlschema.xmlSchemaFree(self._c_schema)
101
+
102
+ def __call__(self, etree):
103
+ """__call__(self, etree)
104
+
105
+ Validate doc using XML Schema.
106
+
107
+ Returns true if document is valid, false if not.
108
+ """
109
+ cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt
110
+ cdef _Document doc
111
+ cdef _Element root_node
112
+ cdef xmlDoc* c_doc
113
+ cdef int ret
114
+
115
+ assert self._c_schema is not NULL, "Schema instance not initialised"
116
+ doc = _documentOrRaise(etree)
117
+ root_node = _rootNodeOrRaise(etree)
118
+
119
+ valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(self._c_schema)
120
+ if valid_ctxt is NULL:
121
+ raise MemoryError()
122
+
123
+ try:
124
+ if self._add_attribute_defaults:
125
+ xmlschema.xmlSchemaSetValidOptions(
126
+ valid_ctxt, xmlschema.XML_SCHEMA_VAL_VC_I_CREATE)
127
+
128
+ self._error_log.clear()
129
+ # Need a cast here because older libxml2 releases do not use 'const' in the functype.
130
+ xmlschema.xmlSchemaSetValidStructuredErrors(
131
+ valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
132
+
133
+ c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
134
+ with nogil:
135
+ ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc)
136
+ _destroyFakeDoc(doc._c_doc, c_doc)
137
+ finally:
138
+ xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt)
139
+
140
+ if ret == -1:
141
+ raise XMLSchemaValidateError(
142
+ "Internal error in XML Schema validation.",
143
+ self._error_log)
144
+ if ret == 0:
145
+ return True
146
+ else:
147
+ return False
148
+
149
+ cdef _ParserSchemaValidationContext _newSaxValidator(
150
+ self, bint add_default_attributes):
151
+ cdef _ParserSchemaValidationContext context
152
+ context = _ParserSchemaValidationContext.__new__(_ParserSchemaValidationContext)
153
+ context._schema = self
154
+ context._add_default_attributes = (self._has_default_attributes and (
155
+ add_default_attributes or self._add_attribute_defaults))
156
+ return context
157
+
158
+ @cython.final
159
+ @cython.internal
160
+ cdef class _ParserSchemaValidationContext:
161
+ cdef XMLSchema _schema
162
+ cdef xmlschema.xmlSchemaValidCtxt* _valid_ctxt
163
+ cdef xmlschema.xmlSchemaSAXPlugStruct* _sax_plug
164
+ cdef bint _add_default_attributes
165
+ def __cinit__(self):
166
+ self._valid_ctxt = NULL
167
+ self._sax_plug = NULL
168
+ self._add_default_attributes = False
169
+
170
+ def __dealloc__(self):
171
+ self.disconnect()
172
+ if self._valid_ctxt:
173
+ xmlschema.xmlSchemaFreeValidCtxt(self._valid_ctxt)
174
+
175
+ cdef _ParserSchemaValidationContext copy(self):
176
+ assert self._schema is not None, "_ParserSchemaValidationContext not initialised"
177
+ return self._schema._newSaxValidator(
178
+ self._add_default_attributes)
179
+
180
+ cdef void inject_default_attributes(self, xmlDoc* c_doc) noexcept:
181
+ # we currently need to insert default attributes manually
182
+ # after parsing, as libxml2 does not support this at parse
183
+ # time
184
+ if self._add_default_attributes:
185
+ with nogil:
186
+ xmlschema.xmlSchemaValidateDoc(self._valid_ctxt, c_doc)
187
+
188
+ cdef int connect(self, xmlparser.xmlParserCtxt* c_ctxt, _BaseErrorLog error_log) except -1:
189
+ if self._valid_ctxt is NULL:
190
+ self._valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(
191
+ self._schema._c_schema)
192
+ if self._valid_ctxt is NULL:
193
+ raise MemoryError()
194
+ if self._add_default_attributes:
195
+ xmlschema.xmlSchemaSetValidOptions(
196
+ self._valid_ctxt, xmlschema.XML_SCHEMA_VAL_VC_I_CREATE)
197
+ if error_log is not None:
198
+ # Need a cast here because older libxml2 releases do not use 'const' in the functype.
199
+ xmlschema.xmlSchemaSetValidStructuredErrors(
200
+ self._valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>error_log)
201
+ self._sax_plug = xmlschema.xmlSchemaSAXPlug(
202
+ self._valid_ctxt, &c_ctxt.sax, &c_ctxt.userData)
203
+
204
+ cdef void disconnect(self) noexcept:
205
+ if self._sax_plug is not NULL:
206
+ xmlschema.xmlSchemaSAXUnplug(self._sax_plug)
207
+ self._sax_plug = NULL
208
+ if self._valid_ctxt is not NULL:
209
+ xmlschema.xmlSchemaSetValidStructuredErrors(
210
+ self._valid_ctxt, NULL, NULL)
211
+
212
+ cdef bint isvalid(self) noexcept:
213
+ if self._valid_ctxt is NULL:
214
+ return 1 # valid
215
+ return xmlschema.xmlSchemaIsValid(self._valid_ctxt)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/xslt.pxi ADDED
@@ -0,0 +1,957 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # XSLT
2
+ from lxml.includes cimport xslt
3
+
4
+
5
+ cdef class XSLTError(LxmlError):
6
+ """Base class of all XSLT errors.
7
+ """
8
+
9
+ cdef class XSLTParseError(XSLTError):
10
+ """Error parsing a stylesheet document.
11
+ """
12
+
13
+ cdef class XSLTApplyError(XSLTError):
14
+ """Error running an XSL transformation.
15
+ """
16
+
17
+ class XSLTSaveError(XSLTError, SerialisationError):
18
+ """Error serialising an XSLT result.
19
+ """
20
+
21
+ cdef class XSLTExtensionError(XSLTError):
22
+ """Error registering an XSLT extension.
23
+ """
24
+
25
+
26
+ # version information
27
+ LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
28
+ LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
29
+
30
+
31
+ ################################################################################
32
+ # Where do we store what?
33
+ #
34
+ # xsltStylesheet->doc->_private
35
+ # == _XSLTResolverContext for XSL stylesheet
36
+ #
37
+ # xsltTransformContext->_private
38
+ # == _XSLTResolverContext for transformed document
39
+ #
40
+ ################################################################################
41
+
42
+
43
+ ################################################################################
44
+ # XSLT document loaders
45
+
46
+ @cython.final
47
+ @cython.internal
48
+ cdef class _XSLTResolverContext(_ResolverContext):
49
+ cdef xmlDoc* _c_style_doc
50
+ cdef _BaseParser _parser
51
+
52
+ cdef _XSLTResolverContext _copy(self):
53
+ cdef _XSLTResolverContext context
54
+ context = _XSLTResolverContext()
55
+ _initXSLTResolverContext(context, self._parser)
56
+ context._c_style_doc = self._c_style_doc
57
+ return context
58
+
59
+ cdef _initXSLTResolverContext(_XSLTResolverContext context,
60
+ _BaseParser parser):
61
+ _initResolverContext(context, parser.resolvers)
62
+ context._parser = parser
63
+ context._c_style_doc = NULL
64
+
65
+ cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context,
66
+ int parse_options, int* error) with gil:
67
+ # call the Python document loaders
68
+ cdef _XSLTResolverContext context
69
+ cdef _ResolverRegistry resolvers
70
+ cdef _InputDocument doc_ref
71
+ cdef xmlDoc* c_doc
72
+ cdef xmlDoc* c_return_doc = NULL
73
+
74
+ error[0] = 0
75
+ context = <_XSLTResolverContext>c_context
76
+
77
+ # shortcut if we resolve the stylesheet itself
78
+ c_doc = context._c_style_doc
79
+ try:
80
+ if c_doc is not NULL and c_doc.URL is not NULL:
81
+ if tree.xmlStrcmp(c_uri, c_doc.URL) == 0:
82
+ c_return_doc = _copyDoc(c_doc, 1)
83
+ return c_return_doc # 'goto', see 'finally' below
84
+
85
+ # delegate to the Python resolvers
86
+ resolvers = context._resolvers
87
+ if tree.xmlStrncmp(<unsigned char*>'string://__STRING__XSLT__/', c_uri, 26) == 0:
88
+ c_uri += 26
89
+ uri = _decodeFilename(c_uri)
90
+ doc_ref = resolvers.resolve(uri, None, context)
91
+
92
+ if doc_ref is not None:
93
+ if doc_ref._type == PARSER_DATA_STRING:
94
+ c_return_doc = _parseDoc(
95
+ doc_ref._data_bytes, doc_ref._filename, context._parser)
96
+ elif doc_ref._type == PARSER_DATA_FILENAME:
97
+ c_return_doc = _parseDocFromFile(
98
+ doc_ref._filename, context._parser)
99
+ elif doc_ref._type == PARSER_DATA_FILE:
100
+ c_return_doc = _parseDocFromFilelike(
101
+ doc_ref._file, doc_ref._filename, context._parser)
102
+ elif doc_ref._type == PARSER_DATA_EMPTY:
103
+ c_return_doc = _newXMLDoc()
104
+ if c_return_doc is not NULL and c_return_doc.URL is NULL:
105
+ c_return_doc.URL = tree.xmlStrdup(c_uri)
106
+ except:
107
+ error[0] = 1
108
+ context._store_raised()
109
+ finally:
110
+ return c_return_doc # and swallow any further exceptions
111
+
112
+
113
+ cdef void _xslt_store_resolver_exception(const_xmlChar* c_uri, void* context,
114
+ xslt.xsltLoadType c_type) noexcept with gil:
115
+ try:
116
+ message = f"Cannot resolve URI {_decodeFilename(c_uri)}"
117
+ if c_type == xslt.XSLT_LOAD_DOCUMENT:
118
+ exception = XSLTApplyError(message)
119
+ else:
120
+ exception = XSLTParseError(message)
121
+ (<_XSLTResolverContext>context)._store_exception(exception)
122
+ except BaseException as e:
123
+ (<_XSLTResolverContext>context)._store_exception(e)
124
+ finally:
125
+ return # and swallow any further exceptions
126
+
127
+
128
+ cdef xmlDoc* _xslt_doc_loader(const_xmlChar* c_uri, tree.xmlDict* c_dict,
129
+ int parse_options, void* c_ctxt,
130
+ xslt.xsltLoadType c_type) noexcept nogil:
131
+ # nogil => no Python objects here, may be called without thread context !
132
+ cdef xmlDoc* c_doc
133
+ cdef xmlDoc* result
134
+ cdef void* c_pcontext
135
+ cdef int error = 0
136
+ # find resolver contexts of stylesheet and transformed doc
137
+ if c_type == xslt.XSLT_LOAD_DOCUMENT:
138
+ # transformation time
139
+ c_pcontext = (<xslt.xsltTransformContext*>c_ctxt)._private
140
+ elif c_type == xslt.XSLT_LOAD_STYLESHEET:
141
+ # include/import resolution while parsing
142
+ c_pcontext = (<xslt.xsltStylesheet*>c_ctxt).doc._private
143
+ else:
144
+ c_pcontext = NULL
145
+
146
+ if c_pcontext is NULL:
147
+ # can't call Python without context, fall back to default loader
148
+ return XSLT_DOC_DEFAULT_LOADER(
149
+ c_uri, c_dict, parse_options, c_ctxt, c_type)
150
+
151
+ c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error)
152
+ if c_doc is NULL and not error:
153
+ c_doc = XSLT_DOC_DEFAULT_LOADER(
154
+ c_uri, c_dict, parse_options, c_ctxt, c_type)
155
+ if c_doc is NULL:
156
+ _xslt_store_resolver_exception(c_uri, c_pcontext, c_type)
157
+
158
+ if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET:
159
+ c_doc._private = c_pcontext
160
+ return c_doc
161
+
162
+ cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader
163
+ xslt.xsltSetLoaderFunc(<xslt.xsltDocLoaderFunc>_xslt_doc_loader)
164
+
165
+ ################################################################################
166
+ # XSLT file/network access control
167
+
168
+ cdef class XSLTAccessControl:
169
+ """XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True)
170
+
171
+ Access control for XSLT: reading/writing files, directories and
172
+ network I/O. Access to a type of resource is granted or denied by
173
+ passing any of the following boolean keyword arguments. All of
174
+ them default to True to allow access.
175
+
176
+ - read_file
177
+ - write_file
178
+ - create_dir
179
+ - read_network
180
+ - write_network
181
+
182
+ For convenience, there is also a class member `DENY_ALL` that
183
+ provides an XSLTAccessControl instance that is readily configured
184
+ to deny everything, and a `DENY_WRITE` member that denies all
185
+ write access but allows read access.
186
+
187
+ See `XSLT`.
188
+ """
189
+ cdef xslt.xsltSecurityPrefs* _prefs
190
+ def __cinit__(self):
191
+ self._prefs = xslt.xsltNewSecurityPrefs()
192
+ if self._prefs is NULL:
193
+ raise MemoryError()
194
+
195
+ def __init__(self, *, bint read_file=True, bint write_file=True, bint create_dir=True,
196
+ bint read_network=True, bint write_network=True):
197
+ self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file)
198
+ self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file)
199
+ self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir)
200
+ self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network)
201
+ self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network)
202
+
203
+ DENY_ALL = XSLTAccessControl(
204
+ read_file=False, write_file=False, create_dir=False,
205
+ read_network=False, write_network=False)
206
+
207
+ DENY_WRITE = XSLTAccessControl(
208
+ read_file=True, write_file=False, create_dir=False,
209
+ read_network=True, write_network=False)
210
+
211
+ def __dealloc__(self):
212
+ if self._prefs is not NULL:
213
+ xslt.xsltFreeSecurityPrefs(self._prefs)
214
+
215
+ @cython.final
216
+ cdef _setAccess(self, xslt.xsltSecurityOption option, bint allow):
217
+ cdef xslt.xsltSecurityCheck function
218
+ if allow:
219
+ function = xslt.xsltSecurityAllow
220
+ else:
221
+ function = xslt.xsltSecurityForbid
222
+ xslt.xsltSetSecurityPrefs(self._prefs, option, function)
223
+
224
+ @cython.final
225
+ cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt) noexcept:
226
+ xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
227
+
228
+ @property
229
+ def options(self):
230
+ """The access control configuration as a map of options."""
231
+ return {
232
+ 'read_file': self._optval(xslt.XSLT_SECPREF_READ_FILE),
233
+ 'write_file': self._optval(xslt.XSLT_SECPREF_WRITE_FILE),
234
+ 'create_dir': self._optval(xslt.XSLT_SECPREF_CREATE_DIRECTORY),
235
+ 'read_network': self._optval(xslt.XSLT_SECPREF_READ_NETWORK),
236
+ 'write_network': self._optval(xslt.XSLT_SECPREF_WRITE_NETWORK),
237
+ }
238
+
239
+ @cython.final
240
+ cdef _optval(self, xslt.xsltSecurityOption option):
241
+ cdef xslt.xsltSecurityCheck function
242
+ function = xslt.xsltGetSecurityPrefs(self._prefs, option)
243
+ if function is <xslt.xsltSecurityCheck>xslt.xsltSecurityAllow:
244
+ return True
245
+ elif function is <xslt.xsltSecurityCheck>xslt.xsltSecurityForbid:
246
+ return False
247
+ else:
248
+ return None
249
+
250
+ def __repr__(self):
251
+ items = sorted(self.options.items())
252
+ return "%s(%s)" % (
253
+ python._fqtypename(self).decode('UTF-8').split('.')[-1],
254
+ ', '.join(["%s=%r" % item for item in items]))
255
+
256
+ ################################################################################
257
+ # XSLT
258
+
259
+ cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf) noexcept:
260
+ if ns_utf is None:
261
+ return 0
262
+ # libxml2 internalises the strings if ctxt has a dict
263
+ return xslt.xsltRegisterExtFunction(
264
+ <xslt.xsltTransformContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf),
265
+ <xslt.xmlXPathFunction>_xpath_function_call)
266
+
267
+ cdef dict EMPTY_DICT = {}
268
+
269
+ @cython.final
270
+ @cython.internal
271
+ cdef class _XSLTContext(_BaseContext):
272
+ cdef xslt.xsltTransformContext* _xsltCtxt
273
+ cdef _ReadOnlyElementProxy _extension_element_proxy
274
+ cdef dict _extension_elements
275
+ def __cinit__(self):
276
+ self._xsltCtxt = NULL
277
+ self._extension_elements = EMPTY_DICT
278
+
279
+ def __init__(self, namespaces, extensions, error_log, enable_regexp,
280
+ build_smart_strings):
281
+ if extensions is not None and extensions:
282
+ for ns_name_tuple, extension in extensions.items():
283
+ if ns_name_tuple[0] is None:
284
+ raise XSLTExtensionError, \
285
+ "extensions must not have empty namespaces"
286
+ if isinstance(extension, XSLTExtension):
287
+ if self._extension_elements is EMPTY_DICT:
288
+ self._extension_elements = {}
289
+ extensions = extensions.copy()
290
+ ns_utf = _utf8(ns_name_tuple[0])
291
+ name_utf = _utf8(ns_name_tuple[1])
292
+ self._extension_elements[(ns_utf, name_utf)] = extension
293
+ del extensions[ns_name_tuple]
294
+ _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
295
+ build_smart_strings)
296
+
297
+ cdef _BaseContext _copy(self):
298
+ cdef _XSLTContext context
299
+ context = <_XSLTContext>_BaseContext._copy(self)
300
+ context._extension_elements = self._extension_elements
301
+ return context
302
+
303
+ cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
304
+ _Document doc):
305
+ self._xsltCtxt = xsltCtxt
306
+ self._set_xpath_context(xsltCtxt.xpathCtxt)
307
+ self._register_context(doc)
308
+ self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
309
+ self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
310
+ _registerXSLTExtensions(xsltCtxt, self._extension_elements)
311
+
312
+ cdef free_context(self):
313
+ self._cleanup_context()
314
+ self._release_context()
315
+ if self._xsltCtxt is not NULL:
316
+ xslt.xsltFreeTransformContext(self._xsltCtxt)
317
+ self._xsltCtxt = NULL
318
+ self._release_temp_refs()
319
+
320
+
321
+ @cython.final
322
+ @cython.internal
323
+ @cython.freelist(8)
324
+ cdef class _XSLTQuotedStringParam:
325
+ """A wrapper class for literal XSLT string parameters that require
326
+ quote escaping.
327
+ """
328
+ cdef bytes strval
329
+ def __cinit__(self, strval):
330
+ self.strval = _utf8(strval)
331
+
332
+
333
+ @cython.no_gc_clear
334
+ cdef class XSLT:
335
+ """XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None)
336
+
337
+ Turn an XSL document into an XSLT object.
338
+
339
+ Calling this object on a tree or Element will execute the XSLT::
340
+
341
+ transform = etree.XSLT(xsl_tree)
342
+ result = transform(xml_tree)
343
+
344
+ Keyword arguments of the constructor:
345
+
346
+ - extensions: a dict mapping ``(namespace, name)`` pairs to
347
+ extension functions or extension elements
348
+ - regexp: enable exslt regular expression support in XPath
349
+ (default: True)
350
+ - access_control: access restrictions for network or file
351
+ system (see `XSLTAccessControl`)
352
+
353
+ Keyword arguments of the XSLT call:
354
+
355
+ - profile_run: enable XSLT profiling and make the profile available
356
+ as XML document in ``result.xslt_profile`` (default: False)
357
+
358
+ Other keyword arguments of the call are passed to the stylesheet
359
+ as parameters.
360
+ """
361
+ cdef _XSLTContext _context
362
+ cdef xslt.xsltStylesheet* _c_style
363
+ cdef _XSLTResolverContext _xslt_resolver_context
364
+ cdef XSLTAccessControl _access_control
365
+ cdef _ErrorLog _error_log
366
+
367
+ def __cinit__(self):
368
+ self._c_style = NULL
369
+
370
+ def __init__(self, xslt_input, *, extensions=None, regexp=True,
371
+ access_control=None):
372
+ cdef xslt.xsltStylesheet* c_style = NULL
373
+ cdef xmlDoc* c_doc
374
+ cdef _Document doc
375
+ cdef _Element root_node
376
+
377
+ doc = _documentOrRaise(xslt_input)
378
+ root_node = _rootNodeOrRaise(xslt_input)
379
+
380
+ # set access control or raise TypeError
381
+ self._access_control = access_control
382
+
383
+ # make a copy of the document as stylesheet parsing modifies it
384
+ c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
385
+
386
+ # make sure we always have a stylesheet URL
387
+ if c_doc.URL is NULL:
388
+ doc_url_utf = python.PyUnicode_AsASCIIString(
389
+ f"string://__STRING__XSLT__/{id(self)}.xslt")
390
+ c_doc.URL = tree.xmlStrdup(_xcstr(doc_url_utf))
391
+
392
+ self._error_log = _ErrorLog()
393
+ self._xslt_resolver_context = _XSLTResolverContext()
394
+ _initXSLTResolverContext(self._xslt_resolver_context, doc._parser)
395
+ # keep a copy in case we need to access the stylesheet via 'document()'
396
+ self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1)
397
+ c_doc._private = <python.PyObject*>self._xslt_resolver_context
398
+
399
+ with self._error_log:
400
+ orig_loader = _register_document_loader()
401
+ c_style = xslt.xsltParseStylesheetDoc(c_doc)
402
+ _reset_document_loader(orig_loader)
403
+
404
+ if c_style is NULL or c_style.errors:
405
+ tree.xmlFreeDoc(c_doc)
406
+ if c_style is not NULL:
407
+ xslt.xsltFreeStylesheet(c_style)
408
+ self._xslt_resolver_context._raise_if_stored()
409
+ # last error seems to be the most accurate here
410
+ if self._error_log.last_error is not None and \
411
+ self._error_log.last_error.message:
412
+ raise XSLTParseError(self._error_log.last_error.message,
413
+ self._error_log)
414
+ else:
415
+ raise XSLTParseError(
416
+ self._error_log._buildExceptionMessage(
417
+ "Cannot parse stylesheet"),
418
+ self._error_log)
419
+
420
+ c_doc._private = NULL # no longer used!
421
+ self._c_style = c_style
422
+ self._context = _XSLTContext(None, extensions, self._error_log, regexp, True)
423
+
424
+ def __dealloc__(self):
425
+ if self._xslt_resolver_context is not None and \
426
+ self._xslt_resolver_context._c_style_doc is not NULL:
427
+ tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc)
428
+ # this cleans up the doc copy as well
429
+ if self._c_style is not NULL:
430
+ xslt.xsltFreeStylesheet(self._c_style)
431
+
432
+ @property
433
+ def error_log(self):
434
+ """The log of errors and warnings of an XSLT execution."""
435
+ return self._error_log.copy()
436
+
437
+ @staticmethod
438
+ def strparam(strval):
439
+ """strparam(strval)
440
+
441
+ Mark an XSLT string parameter that requires quote escaping
442
+ before passing it into the transformation. Use it like this::
443
+
444
+ result = transform(doc, some_strval = XSLT.strparam(
445
+ '''it's \"Monty Python's\" ...'''))
446
+
447
+ Escaped string parameters can be reused without restriction.
448
+ """
449
+ return _XSLTQuotedStringParam(strval)
450
+
451
+ @staticmethod
452
+ def set_global_max_depth(int max_depth):
453
+ """set_global_max_depth(max_depth)
454
+
455
+ The maximum traversal depth that the stylesheet engine will allow.
456
+ This does not only count the template recursion depth but also takes
457
+ the number of variables/parameters into account. The required setting
458
+ for a run depends on both the stylesheet and the input data.
459
+
460
+ Example::
461
+
462
+ XSLT.set_global_max_depth(5000)
463
+
464
+ Note that this is currently a global, module-wide setting because
465
+ libxslt does not support it at a per-stylesheet level.
466
+ """
467
+ if max_depth < 0:
468
+ raise ValueError("cannot set a maximum stylesheet traversal depth < 0")
469
+ xslt.xsltMaxDepth = max_depth
470
+
471
+ def tostring(self, _ElementTree result_tree):
472
+ """tostring(self, result_tree)
473
+
474
+ Save result doc to string based on stylesheet output method.
475
+
476
+ :deprecated: use str(result_tree) instead.
477
+ """
478
+ return str(result_tree)
479
+
480
+ def __deepcopy__(self, memo):
481
+ return self.__copy__()
482
+
483
+ def __copy__(self):
484
+ return _copyXSLT(self)
485
+
486
+ def __call__(self, _input, *, profile_run=False, **kw):
487
+ """__call__(self, _input, profile_run=False, **kw)
488
+
489
+ Execute the XSL transformation on a tree or Element.
490
+
491
+ Pass the ``profile_run`` option to get profile information
492
+ about the XSLT. The result of the XSLT will have a property
493
+ xslt_profile that holds an XML tree with profiling data.
494
+ """
495
+ cdef _XSLTContext context = None
496
+ cdef _XSLTResolverContext resolver_context
497
+ cdef _Document input_doc
498
+ cdef _Element root_node
499
+ cdef _Document result_doc
500
+ cdef _Document profile_doc = None
501
+ cdef xmlDoc* c_profile_doc
502
+ cdef xslt.xsltTransformContext* transform_ctxt
503
+ cdef xmlDoc* c_result = NULL
504
+ cdef xmlDoc* c_doc
505
+ cdef tree.xmlDict* c_dict
506
+ cdef const_char** params = NULL
507
+
508
+ assert self._c_style is not NULL, "XSLT stylesheet not initialised"
509
+ input_doc = _documentOrRaise(_input)
510
+ root_node = _rootNodeOrRaise(_input)
511
+
512
+ c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
513
+
514
+ transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
515
+ if transform_ctxt is NULL:
516
+ _destroyFakeDoc(input_doc._c_doc, c_doc)
517
+ raise MemoryError()
518
+
519
+ # using the stylesheet dict is safer than using a possibly
520
+ # unrelated dict from the current thread. Almost all
521
+ # non-input tag/attr names will come from the stylesheet
522
+ # anyway.
523
+ if transform_ctxt.dict is not NULL:
524
+ xmlparser.xmlDictFree(transform_ctxt.dict)
525
+ if kw:
526
+ # parameter values are stored in the dict
527
+ # => avoid unnecessarily cluttering the global dict
528
+ transform_ctxt.dict = xmlparser.xmlDictCreateSub(self._c_style.doc.dict)
529
+ if transform_ctxt.dict is NULL:
530
+ xslt.xsltFreeTransformContext(transform_ctxt)
531
+ raise MemoryError()
532
+ else:
533
+ transform_ctxt.dict = self._c_style.doc.dict
534
+ xmlparser.xmlDictReference(transform_ctxt.dict)
535
+
536
+ xslt.xsltSetCtxtParseOptions(
537
+ transform_ctxt, input_doc._parser._parse_options)
538
+
539
+ if profile_run:
540
+ transform_ctxt.profile = 1
541
+
542
+ try:
543
+ context = self._context._copy()
544
+ context.register_context(transform_ctxt, input_doc)
545
+
546
+ resolver_context = self._xslt_resolver_context._copy()
547
+ transform_ctxt._private = <python.PyObject*>resolver_context
548
+
549
+ _convert_xslt_parameters(transform_ctxt, kw, &params)
550
+ c_result = self._run_transform(
551
+ c_doc, params, context, transform_ctxt)
552
+ if params is not NULL:
553
+ # deallocate space for parameters
554
+ python.lxml_free(params)
555
+
556
+ if transform_ctxt.state != xslt.XSLT_STATE_OK:
557
+ if c_result is not NULL:
558
+ tree.xmlFreeDoc(c_result)
559
+ c_result = NULL
560
+
561
+ if transform_ctxt.profile:
562
+ c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
563
+ if c_profile_doc is not NULL:
564
+ profile_doc = _documentFactory(
565
+ c_profile_doc, input_doc._parser)
566
+ finally:
567
+ if context is not None:
568
+ context.free_context()
569
+ _destroyFakeDoc(input_doc._c_doc, c_doc)
570
+
571
+ try:
572
+ if resolver_context is not None and resolver_context._has_raised():
573
+ if c_result is not NULL:
574
+ tree.xmlFreeDoc(c_result)
575
+ c_result = NULL
576
+ resolver_context._raise_if_stored()
577
+
578
+ if context._exc._has_raised():
579
+ if c_result is not NULL:
580
+ tree.xmlFreeDoc(c_result)
581
+ c_result = NULL
582
+ context._exc._raise_if_stored()
583
+
584
+ if c_result is NULL:
585
+ # last error seems to be the most accurate here
586
+ error = self._error_log.last_error
587
+ if error is not None and error.message:
588
+ if error.line > 0:
589
+ message = f"{error.message}, line {error.line}"
590
+ else:
591
+ message = error.message
592
+ elif error is not None and error.line > 0:
593
+ message = f"Error applying stylesheet, line {error.line}"
594
+ else:
595
+ message = "Error applying stylesheet"
596
+ raise XSLTApplyError(message, self._error_log)
597
+ finally:
598
+ if resolver_context is not None:
599
+ resolver_context.clear()
600
+
601
+ result_doc = _documentFactory(c_result, input_doc._parser)
602
+
603
+ c_dict = c_result.dict
604
+ xmlparser.xmlDictReference(c_dict)
605
+ __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
606
+ if c_dict is not c_result.dict or \
607
+ self._c_style.doc.dict is not c_result.dict or \
608
+ input_doc._c_doc.dict is not c_result.dict:
609
+ with nogil:
610
+ if c_dict is not c_result.dict:
611
+ fixThreadDictNames(<xmlNode*>c_result,
612
+ c_dict, c_result.dict)
613
+ if self._c_style.doc.dict is not c_result.dict:
614
+ fixThreadDictNames(<xmlNode*>c_result,
615
+ self._c_style.doc.dict, c_result.dict)
616
+ if input_doc._c_doc.dict is not c_result.dict:
617
+ fixThreadDictNames(<xmlNode*>c_result,
618
+ input_doc._c_doc.dict, c_result.dict)
619
+ xmlparser.xmlDictFree(c_dict)
620
+
621
+ return _xsltResultTreeFactory(result_doc, self, profile_doc)
622
+
623
+ cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc,
624
+ const_char** params, _XSLTContext context,
625
+ xslt.xsltTransformContext* transform_ctxt):
626
+ cdef xmlDoc* c_result
627
+ xslt.xsltSetTransformErrorFunc(transform_ctxt, <void*>self._error_log,
628
+ <xmlerror.xmlGenericErrorFunc>_receiveXSLTError)
629
+ if self._access_control is not None:
630
+ self._access_control._register_in_context(transform_ctxt)
631
+ with self._error_log, nogil:
632
+ orig_loader = _register_document_loader()
633
+ c_result = xslt.xsltApplyStylesheetUser(
634
+ self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
635
+ _reset_document_loader(orig_loader)
636
+ return c_result
637
+
638
+
639
+ cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt,
640
+ dict parameters, const_char*** params_ptr):
641
+ cdef Py_ssize_t i, parameter_count
642
+ cdef const_char** params
643
+ cdef tree.xmlDict* c_dict = transform_ctxt.dict
644
+ params_ptr[0] = NULL
645
+ parameter_count = len(parameters)
646
+ if parameter_count == 0:
647
+ return
648
+ # allocate space for parameters
649
+ # * 2 as we want an entry for both key and value,
650
+ # and + 1 as array is NULL terminated
651
+ params = <const_char**>python.lxml_malloc(parameter_count * 2 + 1, sizeof(const_char*))
652
+ if not params:
653
+ raise MemoryError()
654
+ try:
655
+ i = 0
656
+ for key, value in parameters.iteritems():
657
+ k = _utf8(key)
658
+ if isinstance(value, _XSLTQuotedStringParam):
659
+ v = (<_XSLTQuotedStringParam>value).strval
660
+ xslt.xsltQuoteOneUserParam(
661
+ transform_ctxt, _xcstr(k), _xcstr(v))
662
+ else:
663
+ if isinstance(value, XPath):
664
+ v = (<XPath>value)._path
665
+ else:
666
+ v = _utf8(value)
667
+
668
+ c_len = len(k)
669
+ if c_len > limits.INT_MAX:
670
+ raise ValueError("Parameter name too long")
671
+ params[i] = <const_char*> tree.xmlDictLookup(c_dict, _xcstr(k), <int> c_len)
672
+ i += 1
673
+ c_len = len(v)
674
+ if c_len > limits.INT_MAX:
675
+ raise ValueError("Parameter value too long")
676
+ params[i] = <const_char*> tree.xmlDictLookup(c_dict, _xcstr(v), <int> c_len)
677
+ i += 1
678
+ except:
679
+ python.lxml_free(params)
680
+ raise
681
+ params[i] = NULL
682
+ params_ptr[0] = params
683
+
684
+ cdef XSLT _copyXSLT(XSLT stylesheet):
685
+ cdef XSLT new_xslt
686
+ cdef xmlDoc* c_doc
687
+ assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised"
688
+ new_xslt = XSLT.__new__(XSLT)
689
+ new_xslt._access_control = stylesheet._access_control
690
+ new_xslt._error_log = _ErrorLog()
691
+ new_xslt._context = stylesheet._context._copy()
692
+
693
+ new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy()
694
+ new_xslt._xslt_resolver_context._c_style_doc = _copyDoc(
695
+ stylesheet._xslt_resolver_context._c_style_doc, 1)
696
+
697
+ c_doc = _copyDoc(stylesheet._c_style.doc, 1)
698
+ new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc)
699
+ if new_xslt._c_style is NULL:
700
+ tree.xmlFreeDoc(c_doc)
701
+ raise MemoryError()
702
+
703
+ return new_xslt
704
+
705
+ @cython.final
706
+ cdef class _XSLTResultTree(_ElementTree):
707
+ """The result of an XSLT evaluation.
708
+
709
+ Use ``str()`` or ``bytes()`` (or ``unicode()`` in Python 2.x) to serialise to a string,
710
+ and the ``.write_output()`` method to write serialise to a file.
711
+ """
712
+ cdef XSLT _xslt
713
+ cdef _Document _profile
714
+ cdef xmlChar* _buffer
715
+ cdef Py_ssize_t _buffer_len
716
+ cdef Py_ssize_t _buffer_refcnt
717
+
718
+ def write_output(self, file, *, compression=0):
719
+ """write_output(self, file, *, compression=0)
720
+
721
+ Serialise the XSLT output to a file or file-like object.
722
+
723
+ As opposed to the generic ``.write()`` method, ``.write_output()`` serialises
724
+ the result as defined by the ``<xsl:output>`` tag.
725
+ """
726
+ cdef _FilelikeWriter writer = None
727
+ cdef _Document doc
728
+ cdef int r, rclose, c_compression
729
+ cdef const_xmlChar* c_encoding = NULL
730
+ cdef tree.xmlOutputBuffer* c_buffer
731
+
732
+ if self._context_node is not None:
733
+ doc = self._context_node._doc
734
+ else:
735
+ doc = None
736
+ if doc is None:
737
+ doc = self._doc
738
+ if doc is None:
739
+ raise XSLTSaveError("No document to serialise")
740
+ c_compression = compression or 0
741
+ xslt.LXML_GET_XSLT_ENCODING(c_encoding, self._xslt._c_style)
742
+ writer = _create_output_buffer(file, <const_char*>c_encoding, c_compression, &c_buffer, close=False)
743
+ if writer is None:
744
+ with nogil:
745
+ r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
746
+ rclose = tree.xmlOutputBufferClose(c_buffer)
747
+ else:
748
+ r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
749
+ rclose = tree.xmlOutputBufferClose(c_buffer)
750
+ if writer is not None:
751
+ writer._exc_context._raise_if_stored()
752
+ if r < 0 or rclose == -1:
753
+ python.PyErr_SetFromErrno(IOError) # raises IOError
754
+
755
+ cdef _saveToStringAndSize(self, xmlChar** s, int* l):
756
+ cdef _Document doc
757
+ cdef int r
758
+ if self._context_node is not None:
759
+ doc = self._context_node._doc
760
+ else:
761
+ doc = None
762
+ if doc is None:
763
+ doc = self._doc
764
+ if doc is None:
765
+ s[0] = NULL
766
+ return
767
+ with nogil:
768
+ r = xslt.xsltSaveResultToString(s, l, doc._c_doc,
769
+ self._xslt._c_style)
770
+ if r == -1:
771
+ raise MemoryError()
772
+
773
+ def __str__(self):
774
+ cdef xmlChar* encoding
775
+ cdef xmlChar* s = NULL
776
+ cdef int l = 0
777
+ self._saveToStringAndSize(&s, &l)
778
+ if s is NULL:
779
+ return ''
780
+ encoding = self._xslt._c_style.encoding
781
+ try:
782
+ if encoding is NULL:
783
+ result = s[:l].decode('UTF-8')
784
+ else:
785
+ result = s[:l].decode(encoding)
786
+ finally:
787
+ tree.xmlFree(s)
788
+ return _stripEncodingDeclaration(result)
789
+
790
+ def __getbuffer__(self, Py_buffer* buffer, int flags):
791
+ cdef int l = 0
792
+ if buffer is NULL:
793
+ return
794
+ if self._buffer is NULL or flags & python.PyBUF_WRITABLE:
795
+ self._saveToStringAndSize(<xmlChar**>&buffer.buf, &l)
796
+ buffer.len = l
797
+ if self._buffer is NULL and not flags & python.PyBUF_WRITABLE:
798
+ self._buffer = <xmlChar*>buffer.buf
799
+ self._buffer_len = l
800
+ self._buffer_refcnt = 1
801
+ else:
802
+ buffer.buf = self._buffer
803
+ buffer.len = self._buffer_len
804
+ self._buffer_refcnt += 1
805
+ if flags & python.PyBUF_WRITABLE:
806
+ buffer.readonly = 0
807
+ else:
808
+ buffer.readonly = 1
809
+ if flags & python.PyBUF_FORMAT:
810
+ buffer.format = "B"
811
+ else:
812
+ buffer.format = NULL
813
+ buffer.ndim = 0
814
+ buffer.shape = NULL
815
+ buffer.strides = NULL
816
+ buffer.suboffsets = NULL
817
+ buffer.itemsize = 1
818
+ buffer.internal = NULL
819
+ if buffer.obj is not self: # set by Cython?
820
+ buffer.obj = self
821
+
822
+ def __releasebuffer__(self, Py_buffer* buffer):
823
+ if buffer is NULL:
824
+ return
825
+ if <xmlChar*>buffer.buf is self._buffer:
826
+ self._buffer_refcnt -= 1
827
+ if self._buffer_refcnt == 0:
828
+ tree.xmlFree(<char*>self._buffer)
829
+ self._buffer = NULL
830
+ else:
831
+ tree.xmlFree(<char*>buffer.buf)
832
+ buffer.buf = NULL
833
+
834
+ property xslt_profile:
835
+ """Return an ElementTree with profiling data for the stylesheet run.
836
+ """
837
+ def __get__(self):
838
+ cdef object root
839
+ if self._profile is None:
840
+ return None
841
+ root = self._profile.getroot()
842
+ if root is None:
843
+ return None
844
+ return ElementTree(root)
845
+
846
+ def __del__(self):
847
+ self._profile = None
848
+
849
+ cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile):
850
+ cdef _XSLTResultTree result
851
+ result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree)
852
+ result._xslt = xslt
853
+ result._profile = profile
854
+ return result
855
+
856
+ # functions like "output" and "write" are a potential security risk, but we
857
+ # rely on the user to configure XSLTAccessControl as needed
858
+ xslt.xsltRegisterAllExtras()
859
+
860
+ # enable EXSLT support for XSLT
861
+ xslt.exsltRegisterAll()
862
+
863
+
864
+ ################################################################################
865
+ # XSLT PI support
866
+
867
+ cdef object _RE_PI_HREF = re.compile(r'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")')
868
+ cdef object _FIND_PI_HREF = _RE_PI_HREF.findall
869
+ cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub
870
+ cdef XPath __findStylesheetByID = None
871
+
872
+ cdef _findStylesheetByID(_Document doc, id):
873
+ global __findStylesheetByID
874
+ if __findStylesheetByID is None:
875
+ __findStylesheetByID = XPath(
876
+ "//xsl:stylesheet[@xml:id = $id]",
877
+ namespaces={"xsl" : "http://www.w3.org/1999/XSL/Transform"})
878
+ return __findStylesheetByID(doc, id=id)
879
+
880
+ cdef class _XSLTProcessingInstruction(PIBase):
881
+ def parseXSL(self, parser=None):
882
+ """parseXSL(self, parser=None)
883
+
884
+ Try to parse the stylesheet referenced by this PI and return
885
+ an ElementTree for it. If the stylesheet is embedded in the
886
+ same document (referenced via xml:id), find and return an
887
+ ElementTree for the stylesheet Element.
888
+
889
+ The optional ``parser`` keyword argument can be passed to specify the
890
+ parser used to read from external stylesheet URLs.
891
+ """
892
+ cdef _Document result_doc
893
+ cdef _Element result_node
894
+ cdef bytes href_utf
895
+ cdef const_xmlChar* c_href
896
+ cdef xmlAttr* c_attr
897
+ _assertValidNode(self)
898
+ if self._c_node.content is NULL:
899
+ raise ValueError, "PI lacks content"
900
+ hrefs = _FIND_PI_HREF(' ' + (<unsigned char*>self._c_node.content).decode('UTF-8'))
901
+ if len(hrefs) != 1:
902
+ raise ValueError, "malformed PI attributes"
903
+ hrefs = hrefs[0]
904
+ href_utf = utf8(hrefs[0] or hrefs[1])
905
+ c_href = _xcstr(href_utf)
906
+
907
+ if c_href[0] != c'#':
908
+ # normal URL, try to parse from it
909
+ c_href = tree.xmlBuildURI(
910
+ c_href,
911
+ tree.xmlNodeGetBase(self._c_node.doc, self._c_node))
912
+ if c_href is not NULL:
913
+ try:
914
+ href_utf = <unsigned char*>c_href
915
+ finally:
916
+ tree.xmlFree(<char*>c_href)
917
+ result_doc = _parseDocumentFromURL(href_utf, parser)
918
+ return _elementTreeFactory(result_doc, None)
919
+
920
+ # ID reference to embedded stylesheet
921
+ # try XML:ID lookup
922
+ _assertValidDoc(self._doc)
923
+ c_href += 1 # skip leading '#'
924
+ c_attr = tree.xmlGetID(self._c_node.doc, c_href)
925
+ if c_attr is not NULL and c_attr.doc is self._c_node.doc:
926
+ result_node = _elementFactory(self._doc, c_attr.parent)
927
+ return _elementTreeFactory(result_node._doc, result_node)
928
+
929
+ # try XPath search
930
+ root = _findStylesheetByID(self._doc, funicode(c_href))
931
+ if not root:
932
+ raise ValueError, "reference to non-existing embedded stylesheet"
933
+ elif len(root) > 1:
934
+ raise ValueError, "ambiguous reference to embedded stylesheet"
935
+ result_node = root[0]
936
+ return _elementTreeFactory(result_node._doc, result_node)
937
+
938
+ def set(self, key, value):
939
+ """set(self, key, value)
940
+
941
+ Supports setting the 'href' pseudo-attribute in the text of
942
+ the processing instruction.
943
+ """
944
+ if key != "href":
945
+ raise AttributeError, \
946
+ "only setting the 'href' attribute is supported on XSLT-PIs"
947
+ if value is None:
948
+ attrib = ""
949
+ elif '"' in value or '>' in value:
950
+ raise ValueError, "Invalid URL, must not contain '\"' or '>'"
951
+ else:
952
+ attrib = f' href="{value}"'
953
+ text = ' ' + self.text
954
+ if _FIND_PI_HREF(text):
955
+ self.text = _REPLACE_PI_HREF(attrib, text)
956
+ else:
957
+ self.text = text + attrib
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/LICENSE ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2010-2020 Benjamin Peterson
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ the Software, and to permit persons to whom the Software is furnished to do so,
8
+ subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/METADATA ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: six
3
+ Version: 1.16.0
4
+ Summary: Python 2 and 3 compatibility utilities
5
+ Home-page: https://github.com/benjaminp/six
6
+ Author: Benjamin Peterson
7
+ Author-email: benjamin@python.org
8
+ License: MIT
9
+ Platform: UNKNOWN
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Programming Language :: Python :: 2
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Classifier: Topic :: Utilities
17
+ Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*
18
+
19
+ .. image:: https://img.shields.io/pypi/v/six.svg
20
+ :target: https://pypi.org/project/six/
21
+ :alt: six on PyPI
22
+
23
+ .. image:: https://travis-ci.org/benjaminp/six.svg?branch=master
24
+ :target: https://travis-ci.org/benjaminp/six
25
+ :alt: six on TravisCI
26
+
27
+ .. image:: https://readthedocs.org/projects/six/badge/?version=latest
28
+ :target: https://six.readthedocs.io/
29
+ :alt: six's documentation on Read the Docs
30
+
31
+ .. image:: https://img.shields.io/badge/license-MIT-green.svg
32
+ :target: https://github.com/benjaminp/six/blob/master/LICENSE
33
+ :alt: MIT License badge
34
+
35
+ Six is a Python 2 and 3 compatibility library. It provides utility functions
36
+ for smoothing over the differences between the Python versions with the goal of
37
+ writing Python code that is compatible on both Python versions. See the
38
+ documentation for more information on what is provided.
39
+
40
+ Six supports Python 2.7 and 3.3+. It is contained in only one Python
41
+ file, so it can be easily copied into your project. (The copyright and license
42
+ notice must be retained.)
43
+
44
+ Online documentation is at https://six.readthedocs.io/.
45
+
46
+ Bugs can be reported to https://github.com/benjaminp/six. The code can also
47
+ be found there.
48
+
49
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/RECORD ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ __pycache__/six.cpython-312.pyc,,
2
+ six-1.16.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
3
+ six-1.16.0.dist-info/LICENSE,sha256=i7hQxWWqOJ_cFvOkaWWtI9gq3_YPI5P8J2K2MYXo5sk,1066
4
+ six-1.16.0.dist-info/METADATA,sha256=VQcGIFCAEmfZcl77E5riPCN4v2TIsc_qtacnjxKHJoI,1795
5
+ six-1.16.0.dist-info/RECORD,,
6
+ six-1.16.0.dist-info/WHEEL,sha256=Z-nyYpwrcSqxfdux5Mbn_DQ525iP7J2DG3JgGvOYyTQ,110
7
+ six-1.16.0.dist-info/top_level.txt,sha256=_iVH_iYEtEXnD8nYGQYpYFUvkUW9sEO1GYbkeKSAais,4
8
+ six.py,sha256=TOOfQi7nFGfMrIvtdr6wX4wyHH8M7aknmuLfo2cBBrM,34549
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/WHEEL ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.36.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py2-none-any
5
+ Tag: py3-none-any
6
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/six-1.16.0.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ six