BryanW commited on
Commit
7f9dddc
·
verified ·
1 Parent(s): 20a2dfd

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/INSTALLER +1 -0
  2. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/LICENSE.txt +13 -0
  3. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/METADATA +250 -0
  4. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/RECORD +131 -0
  5. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/WHEEL +6 -0
  6. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/top_level.txt +1 -0
  7. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/__init__.py +77 -0
  8. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/from_thread.py +527 -0
  9. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/lowlevel.py +161 -0
  10. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/py.typed +0 -0
  11. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/pytest_plugin.py +191 -0
  12. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_interpreter.py +218 -0
  13. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_process.py +258 -0
  14. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_thread.py +69 -0
  15. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/_compat.py +11 -0
  16. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/parser_inline.py +147 -0
  17. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/port.yaml +48 -0
  18. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/token.py +180 -0
  19. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/INSTALLER +1 -0
  20. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/LICENSE +202 -0
  21. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/METADATA +317 -0
  22. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/NOTICE +13 -0
  23. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/RECORD +18 -0
  24. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/WHEEL +6 -0
  25. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/top_level.txt +1 -0
  26. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/__init__.py +437 -0
  27. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_azurefs.pyx +134 -0
  28. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute.pyx +0 -0
  29. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute_docstrings.py +56 -0
  30. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_cuda.pyx +1080 -0
  31. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_dataset_parquet.pyx +1053 -0
  32. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_fs.pxd +91 -0
  33. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_hdfs.pyx +160 -0
  34. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pxd +36 -0
  35. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pyx +310 -0
  36. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_orc.pyx +445 -0
  37. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_parquet.pyx +2266 -0
  38. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_pyarrow_cpp_tests.pxd +33 -0
  39. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_substrait.pyx +481 -0
  40. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cffi.py +81 -0
  41. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/compute.py +744 -0
  42. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/config.pxi +95 -0
  43. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cuda.py +25 -0
  44. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/error.pxi +274 -0
  45. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/fs.py +431 -0
  46. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.pxi +1403 -0
  47. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.py +285 -0
  48. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/jvm.py +335 -0
  49. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib.pxd +770 -0
  50. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib_api.h +201 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/LICENSE.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright aio-libs contributors.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/METADATA ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.2
2
+ Name: aiohttp
3
+ Version: 3.11.13
4
+ Summary: Async http client/server framework (asyncio)
5
+ Home-page: https://github.com/aio-libs/aiohttp
6
+ Maintainer: aiohttp team <team@aiohttp.org>
7
+ Maintainer-email: team@aiohttp.org
8
+ License: Apache-2.0
9
+ Project-URL: Chat: Matrix, https://matrix.to/#/#aio-libs:matrix.org
10
+ Project-URL: Chat: Matrix Space, https://matrix.to/#/#aio-libs-space:matrix.org
11
+ Project-URL: CI: GitHub Actions, https://github.com/aio-libs/aiohttp/actions?query=workflow%3ACI
12
+ Project-URL: Coverage: codecov, https://codecov.io/github/aio-libs/aiohttp
13
+ Project-URL: Docs: Changelog, https://docs.aiohttp.org/en/stable/changes.html
14
+ Project-URL: Docs: RTD, https://docs.aiohttp.org
15
+ Project-URL: GitHub: issues, https://github.com/aio-libs/aiohttp/issues
16
+ Project-URL: GitHub: repo, https://github.com/aio-libs/aiohttp
17
+ Classifier: Development Status :: 5 - Production/Stable
18
+ Classifier: Framework :: AsyncIO
19
+ Classifier: Intended Audience :: Developers
20
+ Classifier: License :: OSI Approved :: Apache Software License
21
+ Classifier: Operating System :: POSIX
22
+ Classifier: Operating System :: MacOS :: MacOS X
23
+ Classifier: Operating System :: Microsoft :: Windows
24
+ Classifier: Programming Language :: Python
25
+ Classifier: Programming Language :: Python :: 3
26
+ Classifier: Programming Language :: Python :: 3.9
27
+ Classifier: Programming Language :: Python :: 3.10
28
+ Classifier: Programming Language :: Python :: 3.11
29
+ Classifier: Programming Language :: Python :: 3.12
30
+ Classifier: Programming Language :: Python :: 3.13
31
+ Classifier: Topic :: Internet :: WWW/HTTP
32
+ Requires-Python: >=3.9
33
+ Description-Content-Type: text/x-rst
34
+ License-File: LICENSE.txt
35
+ Requires-Dist: aiohappyeyeballs>=2.3.0
36
+ Requires-Dist: aiosignal>=1.1.2
37
+ Requires-Dist: async-timeout<6.0,>=4.0; python_version < "3.11"
38
+ Requires-Dist: attrs>=17.3.0
39
+ Requires-Dist: frozenlist>=1.1.1
40
+ Requires-Dist: multidict<7.0,>=4.5
41
+ Requires-Dist: propcache>=0.2.0
42
+ Requires-Dist: yarl<2.0,>=1.17.0
43
+ Provides-Extra: speedups
44
+ Requires-Dist: aiodns>=3.2.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "speedups"
45
+ Requires-Dist: Brotli; platform_python_implementation == "CPython" and extra == "speedups"
46
+ Requires-Dist: brotlicffi; platform_python_implementation != "CPython" and extra == "speedups"
47
+
48
+ ==================================
49
+ Async http client/server framework
50
+ ==================================
51
+
52
+ .. image:: https://raw.githubusercontent.com/aio-libs/aiohttp/master/docs/aiohttp-plain.svg
53
+ :height: 64px
54
+ :width: 64px
55
+ :alt: aiohttp logo
56
+
57
+ |
58
+
59
+ .. image:: https://github.com/aio-libs/aiohttp/workflows/CI/badge.svg
60
+ :target: https://github.com/aio-libs/aiohttp/actions?query=workflow%3ACI
61
+ :alt: GitHub Actions status for master branch
62
+
63
+ .. image:: https://codecov.io/gh/aio-libs/aiohttp/branch/master/graph/badge.svg
64
+ :target: https://codecov.io/gh/aio-libs/aiohttp
65
+ :alt: codecov.io status for master branch
66
+
67
+ .. image:: https://img.shields.io/endpoint?url=https://codspeed.io/badge.json
68
+ :target: https://codspeed.io/aio-libs/aiohttp
69
+ :alt: Codspeed.io status for aiohttp
70
+
71
+ .. image:: https://badge.fury.io/py/aiohttp.svg
72
+ :target: https://pypi.org/project/aiohttp
73
+ :alt: Latest PyPI package version
74
+
75
+ .. image:: https://readthedocs.org/projects/aiohttp/badge/?version=latest
76
+ :target: https://docs.aiohttp.org/
77
+ :alt: Latest Read The Docs
78
+
79
+ .. image:: https://img.shields.io/matrix/aio-libs:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
80
+ :target: https://matrix.to/#/%23aio-libs:matrix.org
81
+ :alt: Matrix Room — #aio-libs:matrix.org
82
+
83
+ .. image:: https://img.shields.io/matrix/aio-libs-space:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs-space%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
84
+ :target: https://matrix.to/#/%23aio-libs-space:matrix.org
85
+ :alt: Matrix Space — #aio-libs-space:matrix.org
86
+
87
+
88
+ Key Features
89
+ ============
90
+
91
+ - Supports both client and server side of HTTP protocol.
92
+ - Supports both client and server Web-Sockets out-of-the-box and avoids
93
+ Callback Hell.
94
+ - Provides Web-server with middleware and pluggable routing.
95
+
96
+
97
+ Getting started
98
+ ===============
99
+
100
+ Client
101
+ ------
102
+
103
+ To get something from the web:
104
+
105
+ .. code-block:: python
106
+
107
+ import aiohttp
108
+ import asyncio
109
+
110
+ async def main():
111
+
112
+ async with aiohttp.ClientSession() as session:
113
+ async with session.get('http://python.org') as response:
114
+
115
+ print("Status:", response.status)
116
+ print("Content-type:", response.headers['content-type'])
117
+
118
+ html = await response.text()
119
+ print("Body:", html[:15], "...")
120
+
121
+ asyncio.run(main())
122
+
123
+ This prints:
124
+
125
+ .. code-block::
126
+
127
+ Status: 200
128
+ Content-type: text/html; charset=utf-8
129
+ Body: <!doctype html> ...
130
+
131
+ Coming from `requests <https://requests.readthedocs.io/>`_ ? Read `why we need so many lines <https://aiohttp.readthedocs.io/en/latest/http_request_lifecycle.html>`_.
132
+
133
+ Server
134
+ ------
135
+
136
+ An example using a simple server:
137
+
138
+ .. code-block:: python
139
+
140
+ # examples/server_simple.py
141
+ from aiohttp import web
142
+
143
+ async def handle(request):
144
+ name = request.match_info.get('name', "Anonymous")
145
+ text = "Hello, " + name
146
+ return web.Response(text=text)
147
+
148
+ async def wshandle(request):
149
+ ws = web.WebSocketResponse()
150
+ await ws.prepare(request)
151
+
152
+ async for msg in ws:
153
+ if msg.type == web.WSMsgType.text:
154
+ await ws.send_str("Hello, {}".format(msg.data))
155
+ elif msg.type == web.WSMsgType.binary:
156
+ await ws.send_bytes(msg.data)
157
+ elif msg.type == web.WSMsgType.close:
158
+ break
159
+
160
+ return ws
161
+
162
+
163
+ app = web.Application()
164
+ app.add_routes([web.get('/', handle),
165
+ web.get('/echo', wshandle),
166
+ web.get('/{name}', handle)])
167
+
168
+ if __name__ == '__main__':
169
+ web.run_app(app)
170
+
171
+
172
+ Documentation
173
+ =============
174
+
175
+ https://aiohttp.readthedocs.io/
176
+
177
+
178
+ Demos
179
+ =====
180
+
181
+ https://github.com/aio-libs/aiohttp-demos
182
+
183
+
184
+ External links
185
+ ==============
186
+
187
+ * `Third party libraries
188
+ <http://aiohttp.readthedocs.io/en/latest/third_party.html>`_
189
+ * `Built with aiohttp
190
+ <http://aiohttp.readthedocs.io/en/latest/built_with.html>`_
191
+ * `Powered by aiohttp
192
+ <http://aiohttp.readthedocs.io/en/latest/powered_by.html>`_
193
+
194
+ Feel free to make a Pull Request for adding your link to these pages!
195
+
196
+
197
+ Communication channels
198
+ ======================
199
+
200
+ *aio-libs Discussions*: https://github.com/aio-libs/aiohttp/discussions
201
+
202
+ *Matrix*: `#aio-libs:matrix.org <https://matrix.to/#/#aio-libs:matrix.org>`_
203
+
204
+ We support `Stack Overflow
205
+ <https://stackoverflow.com/questions/tagged/aiohttp>`_.
206
+ Please add *aiohttp* tag to your question there.
207
+
208
+ Requirements
209
+ ============
210
+
211
+ - attrs_
212
+ - multidict_
213
+ - yarl_
214
+ - frozenlist_
215
+
216
+ Optionally you may install the aiodns_ library (highly recommended for sake of speed).
217
+
218
+ .. _aiodns: https://pypi.python.org/pypi/aiodns
219
+ .. _attrs: https://github.com/python-attrs/attrs
220
+ .. _multidict: https://pypi.python.org/pypi/multidict
221
+ .. _frozenlist: https://pypi.org/project/frozenlist/
222
+ .. _yarl: https://pypi.python.org/pypi/yarl
223
+ .. _async-timeout: https://pypi.python.org/pypi/async_timeout
224
+
225
+ License
226
+ =======
227
+
228
+ ``aiohttp`` is offered under the Apache 2 license.
229
+
230
+
231
+ Keepsafe
232
+ ========
233
+
234
+ The aiohttp community would like to thank Keepsafe
235
+ (https://www.getkeepsafe.com) for its support in the early days of
236
+ the project.
237
+
238
+
239
+ Source code
240
+ ===========
241
+
242
+ The latest developer version is available in a GitHub repository:
243
+ https://github.com/aio-libs/aiohttp
244
+
245
+ Benchmarks
246
+ ==========
247
+
248
+ If you are interested in efficiency, the AsyncIO community maintains a
249
+ list of benchmarks on the official wiki:
250
+ https://github.com/python/asyncio/wiki/Benchmarks
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/RECORD ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp-3.11.13.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ aiohttp-3.11.13.dist-info/LICENSE.txt,sha256=n4DQ2311WpQdtFchcsJw7L2PCCuiFd3QlZhZQu2Uqes,588
3
+ aiohttp-3.11.13.dist-info/METADATA,sha256=V_vS5w25_e4iRV4NQ4mjjJeaSpzg0IaWqru7XzJqJLc,7712
4
+ aiohttp-3.11.13.dist-info/RECORD,,
5
+ aiohttp-3.11.13.dist-info/WHEEL,sha256=siqMuoWpRueIZ87ijidBxnOwHeSOOcxNwYCs-pC4Yv0,151
6
+ aiohttp-3.11.13.dist-info/top_level.txt,sha256=iv-JIaacmTl-hSho3QmphcKnbRRYx1st47yjz_178Ro,8
7
+ aiohttp/.hash/_cparser.pxd.hash,sha256=hYa9Vje-oMs2eh_7MfCPOh2QW_1x1yCjcZuc7AmwLd0,121
8
+ aiohttp/.hash/_find_header.pxd.hash,sha256=_mbpD6vM-CVCKq3ulUvsOAz5Wdo88wrDzfpOsMQaMNA,125
9
+ aiohttp/.hash/_http_parser.pyx.hash,sha256=GBgZjCNbtZApPhf9-gHpS5Z2WMIzM-vgp5VSZIEvZfk,125
10
+ aiohttp/.hash/_http_writer.pyx.hash,sha256=-UgSF82qclpxjP0og_gcFEsstXRKF9e3Ou4wziAyDvI,125
11
+ aiohttp/.hash/hdrs.py.hash,sha256=v6IaKbsxjsdQxBzhb5AjP0x_9G3rUe84D7avf7AI4cs,116
12
+ aiohttp/__init__.py,sha256=612FENJ9aLTJsbeZNR9LF_PAJ4nLUY4NgyqlkDnO70c,7840
13
+ aiohttp/__pycache__/__init__.cpython-312.pyc,,
14
+ aiohttp/__pycache__/abc.cpython-312.pyc,,
15
+ aiohttp/__pycache__/base_protocol.cpython-312.pyc,,
16
+ aiohttp/__pycache__/client.cpython-312.pyc,,
17
+ aiohttp/__pycache__/client_exceptions.cpython-312.pyc,,
18
+ aiohttp/__pycache__/client_proto.cpython-312.pyc,,
19
+ aiohttp/__pycache__/client_reqrep.cpython-312.pyc,,
20
+ aiohttp/__pycache__/client_ws.cpython-312.pyc,,
21
+ aiohttp/__pycache__/compression_utils.cpython-312.pyc,,
22
+ aiohttp/__pycache__/connector.cpython-312.pyc,,
23
+ aiohttp/__pycache__/cookiejar.cpython-312.pyc,,
24
+ aiohttp/__pycache__/formdata.cpython-312.pyc,,
25
+ aiohttp/__pycache__/hdrs.cpython-312.pyc,,
26
+ aiohttp/__pycache__/helpers.cpython-312.pyc,,
27
+ aiohttp/__pycache__/http.cpython-312.pyc,,
28
+ aiohttp/__pycache__/http_exceptions.cpython-312.pyc,,
29
+ aiohttp/__pycache__/http_parser.cpython-312.pyc,,
30
+ aiohttp/__pycache__/http_websocket.cpython-312.pyc,,
31
+ aiohttp/__pycache__/http_writer.cpython-312.pyc,,
32
+ aiohttp/__pycache__/log.cpython-312.pyc,,
33
+ aiohttp/__pycache__/multipart.cpython-312.pyc,,
34
+ aiohttp/__pycache__/payload.cpython-312.pyc,,
35
+ aiohttp/__pycache__/payload_streamer.cpython-312.pyc,,
36
+ aiohttp/__pycache__/pytest_plugin.cpython-312.pyc,,
37
+ aiohttp/__pycache__/resolver.cpython-312.pyc,,
38
+ aiohttp/__pycache__/streams.cpython-312.pyc,,
39
+ aiohttp/__pycache__/tcp_helpers.cpython-312.pyc,,
40
+ aiohttp/__pycache__/test_utils.cpython-312.pyc,,
41
+ aiohttp/__pycache__/tracing.cpython-312.pyc,,
42
+ aiohttp/__pycache__/typedefs.cpython-312.pyc,,
43
+ aiohttp/__pycache__/web.cpython-312.pyc,,
44
+ aiohttp/__pycache__/web_app.cpython-312.pyc,,
45
+ aiohttp/__pycache__/web_exceptions.cpython-312.pyc,,
46
+ aiohttp/__pycache__/web_fileresponse.cpython-312.pyc,,
47
+ aiohttp/__pycache__/web_log.cpython-312.pyc,,
48
+ aiohttp/__pycache__/web_middlewares.cpython-312.pyc,,
49
+ aiohttp/__pycache__/web_protocol.cpython-312.pyc,,
50
+ aiohttp/__pycache__/web_request.cpython-312.pyc,,
51
+ aiohttp/__pycache__/web_response.cpython-312.pyc,,
52
+ aiohttp/__pycache__/web_routedef.cpython-312.pyc,,
53
+ aiohttp/__pycache__/web_runner.cpython-312.pyc,,
54
+ aiohttp/__pycache__/web_server.cpython-312.pyc,,
55
+ aiohttp/__pycache__/web_urldispatcher.cpython-312.pyc,,
56
+ aiohttp/__pycache__/web_ws.cpython-312.pyc,,
57
+ aiohttp/__pycache__/worker.cpython-312.pyc,,
58
+ aiohttp/_cparser.pxd,sha256=8jGIg-VJ9p3llwCakUYDsPGxA4HiZe9dmK9Jmtlz-5g,4318
59
+ aiohttp/_find_header.pxd,sha256=0GfwFCPN2zxEKTO1_MA5sYq2UfzsG8kcV3aTqvwlz3g,68
60
+ aiohttp/_headers.pxi,sha256=n701k28dVPjwRnx5j6LpJhLTfj7dqu2vJt7f0O60Oyg,2007
61
+ aiohttp/_http_parser.cpython-312-x86_64-linux-gnu.so,sha256=kZJwKEDTHDTxMWYND9KqRALWr8BaLSyyTBmQ6aRYuRA,2813904
62
+ aiohttp/_http_parser.pyx,sha256=wQdADj5LizwC_7nFGr8nIlk6GpoaQeQ0359H0HMKGuM,28241
63
+ aiohttp/_http_writer.cpython-312-x86_64-linux-gnu.so,sha256=6h2_x6dsZeFsvy_Iurvq9c-RfNLg-heJJAablu4uSSk,492232
64
+ aiohttp/_http_writer.pyx,sha256=fiCck_EVgRiTX7VtAoV2AldjuesJMFPev4TWd9Fx8jo,4597
65
+ aiohttp/_websocket/.hash/mask.pxd.hash,sha256=Y0zBddk_ck3pi9-BFzMcpkcvCKvwvZ4GTtZFb9u1nxQ,128
66
+ aiohttp/_websocket/.hash/mask.pyx.hash,sha256=90owpXYM8_kIma4KUcOxhWSk-Uv4NVMBoCYeFM1B3d0,128
67
+ aiohttp/_websocket/.hash/reader_c.pxd.hash,sha256=EoZjkF_tAFEbGvV0oRY2GZOSuAfWFWFjMhXgq6mQExo,132
68
+ aiohttp/_websocket/__init__.py,sha256=Mar3R9_vBN_Ea4lsW7iTAVXD7OKswKPGqF5xgSyt77k,44
69
+ aiohttp/_websocket/__pycache__/__init__.cpython-312.pyc,,
70
+ aiohttp/_websocket/__pycache__/helpers.cpython-312.pyc,,
71
+ aiohttp/_websocket/__pycache__/models.cpython-312.pyc,,
72
+ aiohttp/_websocket/__pycache__/reader.cpython-312.pyc,,
73
+ aiohttp/_websocket/__pycache__/reader_c.cpython-312.pyc,,
74
+ aiohttp/_websocket/__pycache__/reader_py.cpython-312.pyc,,
75
+ aiohttp/_websocket/__pycache__/writer.cpython-312.pyc,,
76
+ aiohttp/_websocket/helpers.py,sha256=P-XLv8IUaihKzDenVUqfKU5DJbWE5HvG8uhvUZK8Ic4,5038
77
+ aiohttp/_websocket/mask.cpython-312-x86_64-linux-gnu.so,sha256=rdCiOTakoDIcEYBPFRf7ncQUgXmhosFnNMxUYqJn8uA,265432
78
+ aiohttp/_websocket/mask.pxd,sha256=sBmZ1Amym9kW4Ge8lj1fLZ7mPPya4LzLdpkQExQXv5M,112
79
+ aiohttp/_websocket/mask.pyx,sha256=BHjOtV0O0w7xp9p0LNADRJvGmgfPn9sGeJvSs0fL__4,1397
80
+ aiohttp/_websocket/models.py,sha256=XAzjs_8JYszWXIgZ6R3ZRrF-tX9Q_6LiD49WRYojopM,2121
81
+ aiohttp/_websocket/reader.py,sha256=eC4qS0c5sOeQ2ebAHLaBpIaTVFaSKX79pY2xvh3Pqyw,1030
82
+ aiohttp/_websocket/reader_c.cpython-312-x86_64-linux-gnu.so,sha256=wt49Wc5GzT9rRbxIN-8pgo3mQdTthCSVaoQR4NI6OTQ,1871856
83
+ aiohttp/_websocket/reader_c.pxd,sha256=9rMWCpAC1jng7_gtqLjRlqQv9q7UkOn63tIQfq2k8Gc,2444
84
+ aiohttp/_websocket/reader_c.py,sha256=anZsBKZWlL8SO8gArsZMDstH37qBuZOvJA7jtj0Z95M,17975
85
+ aiohttp/_websocket/reader_py.py,sha256=anZsBKZWlL8SO8gArsZMDstH37qBuZOvJA7jtj0Z95M,17975
86
+ aiohttp/_websocket/writer.py,sha256=T3P36iMrzVPPC2XeScserHMD5vd9an6yizWzqDUkRZ0,7077
87
+ aiohttp/abc.py,sha256=JLMOxrKLGTDaPRLfraY1pl-xka53YiHhAH9yaF9QRXQ,6512
88
+ aiohttp/base_protocol.py,sha256=Tp8cxUPQvv9kUPk3w6lAzk6d2MAzV3scwI_3Go3C47c,3025
89
+ aiohttp/client.py,sha256=isdfGlM4O5ILr4F4gBABlybxo4MQ1tNaMm7zjMcrfrM,54309
90
+ aiohttp/client_exceptions.py,sha256=uyKbxI2peZhKl7lELBMx3UeusNkfpemPWpGFq0r6JeM,11367
91
+ aiohttp/client_proto.py,sha256=dV7u9floGWG-_xtD2fLUYqiANG6VsJtq0HMlTjf1g-g,10015
92
+ aiohttp/client_reqrep.py,sha256=VAgh0NxP2HvYWx6nX1Pr8FINc1m-W8-5q2zKeZV68n8,43925
93
+ aiohttp/client_ws.py,sha256=1CIjIXwyzOMIYw6AjUES4-qUwbyVHW1seJKQfg_Rta8,15109
94
+ aiohttp/compression_utils.py,sha256=0J3EAOR-0HehlYIudJXRu_Kr6hrYCY0IfuJ1px9MhQs,5681
95
+ aiohttp/connector.py,sha256=ZAXixLOyIl6zQEnetPLYKkbjP2BoHFEzGeEyvRBoGqI,60734
96
+ aiohttp/cookiejar.py,sha256=PYR1K1mkLa24Hm6c9UEJnAitccNzz97CbsJyQ2ULAlU,17615
97
+ aiohttp/formdata.py,sha256=CUJnCWDNHFcXSYZ_TupaT6rHkY-Q7ghssvWzaYBPIo0,6552
98
+ aiohttp/hdrs.py,sha256=2rj5MyA-6yRdYPhW5UKkW4iNWhEAlGIOSBH5D4FmKNE,5111
99
+ aiohttp/helpers.py,sha256=KqPQECeiJ_EhA93k7-5ZoVdZH0sk_4n0tCoM_E-iMnE,29091
100
+ aiohttp/http.py,sha256=8o8j8xH70OWjnfTWA9V44NR785QPxEPrUtzMXiAVpwc,1842
101
+ aiohttp/http_exceptions.py,sha256=RYmBycJvvPerKkgXXm8v145I1N-fbsgSpcsbNIC-gdE,2961
102
+ aiohttp/http_parser.py,sha256=UqerYPJzA1MqLmeG1jURhTNO1YhwUASK3QVcNEz0me8,36851
103
+ aiohttp/http_websocket.py,sha256=8VXFKw6KQUEmPg48GtRMB37v0gTK7A0inoxXuDxMZEc,842
104
+ aiohttp/http_writer.py,sha256=pRIyfOmL3cZmdWDWBBJ2cZEwEJzLWzlPPAJInaPLThI,7595
105
+ aiohttp/log.py,sha256=BbNKx9e3VMIm0xYjZI0IcBBoS7wjdeIeSaiJE7-qK2g,325
106
+ aiohttp/multipart.py,sha256=SABIvo3vhXzG4bLDZ0C4V3yG_86vAb-3Zb9Li7BVmI8,36944
107
+ aiohttp/payload.py,sha256=rCA9JJI_RMCik_7qNIaC1Bh21aXhABGYK2tsYeaHRQ4,15793
108
+ aiohttp/payload_streamer.py,sha256=ZzEYyfzcjGWkVkK3XR2pBthSCSIykYvY3Wr5cGQ2eTc,2211
109
+ aiohttp/py.typed,sha256=sow9soTwP9T_gEAQSVh7Gb8855h04Nwmhs2We-JRgZM,7
110
+ aiohttp/pytest_plugin.py,sha256=AfJ6VIWzsp5KgpXRREsX3yqGUZrJyfb7zzcMqzWxz7I,12768
111
+ aiohttp/resolver.py,sha256=sJ8-LYCtl_g9f6gn_5X2NFQ9FQ72Q2Mr4_rLxo9NVeI,6375
112
+ aiohttp/streams.py,sha256=U-qTkuAqIfpJChuKEy-vYn8nQ_Z1MVcW0WO2DHiJz_o,22329
113
+ aiohttp/tcp_helpers.py,sha256=BSadqVWaBpMFDRWnhaaR941N9MiDZ7bdTrxgCb0CW-M,961
114
+ aiohttp/test_utils.py,sha256=r7kBasmZtC3tQY5OmyMaIl1B9P8Bnnq1oM3npVcAPKs,22811
115
+ aiohttp/tracing.py,sha256=66XQwtdR5DHv8p953eeNL0l8o6iHDaNwH9bBaybHXD4,15137
116
+ aiohttp/typedefs.py,sha256=wUlqwe9Mw9W8jT3HsYJcYk00qP3EMPz3nTkYXmeNN48,1657
117
+ aiohttp/web.py,sha256=As5nqGQy4QXWMXSaOsh0JudSVVJVIt_nr3n0b8CaMb0,18422
118
+ aiohttp/web_app.py,sha256=Zre0QHM9JAp4d7jrj5NRxlPnfTrKLNuA42Rdsh8Q2TI,19554
119
+ aiohttp/web_exceptions.py,sha256=7nIuiwhZ39vJJ9KrWqArA5QcWbUdqkz2CLwEpJapeN8,10360
120
+ aiohttp/web_fileresponse.py,sha256=FRsS0p9r1KU5y8ceG0QXBYnrL6xggjbxcXSmI6qIR4k,16504
121
+ aiohttp/web_log.py,sha256=rX5D7xLOX2B6BMdiZ-chme_KfJfW5IXEoFwLfkfkajs,7865
122
+ aiohttp/web_middlewares.py,sha256=sFI0AgeNjdyAjuz92QtMIpngmJSOxrqe2Jfbs4BNUu0,4165
123
+ aiohttp/web_protocol.py,sha256=0MYjcaQishUyJxJ4lsH4IfHef4nIvHDf-DSZwI1Con4,25539
124
+ aiohttp/web_request.py,sha256=j_SSX9s-d3ZeNyqUTpFIaPUaNdSqHwb7yfc0ufL8xFA,29750
125
+ aiohttp/web_response.py,sha256=65aliDETi7rZ8P76ksuHQI0ZTu1cKpclCSailNu105M,28696
126
+ aiohttp/web_routedef.py,sha256=VT1GAx6BrawoDh5RwBwBu5wSABSqgWwAe74AUCyZAEo,6110
127
+ aiohttp/web_runner.py,sha256=v1G1nKiOOQgFnTSR4IMc6I9ReEFDMaHtMLvO_roDM-A,11786
128
+ aiohttp/web_server.py,sha256=-9WDKUAiR9ll-rSdwXSqG6YjaoW79d1R4y0BGSqgUMA,2888
129
+ aiohttp/web_urldispatcher.py,sha256=TIMxFmhLjERseG0xcZv2Ef9Xuo_GTBRqBqeMkCgL0K8,43825
130
+ aiohttp/web_ws.py,sha256=Gr-UWgau41P-OoJUb3WJvaNYiDESXzrHmIw1Cqonupc,22612
131
+ aiohttp/worker.py,sha256=0lvxRNMjGM47ddlQWtci53ri9YN42Su1Vdd_Z7zMMH0,8040
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/WHEEL ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.8.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp312-cp312-manylinux_2_17_x86_64
5
+ Tag: cp312-cp312-manylinux2014_x86_64
6
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ aiohttp
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/__init__.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from ._core._eventloop import current_time as current_time
4
+ from ._core._eventloop import get_all_backends as get_all_backends
5
+ from ._core._eventloop import get_cancelled_exc_class as get_cancelled_exc_class
6
+ from ._core._eventloop import run as run
7
+ from ._core._eventloop import sleep as sleep
8
+ from ._core._eventloop import sleep_forever as sleep_forever
9
+ from ._core._eventloop import sleep_until as sleep_until
10
+ from ._core._exceptions import BrokenResourceError as BrokenResourceError
11
+ from ._core._exceptions import BrokenWorkerIntepreter as BrokenWorkerIntepreter
12
+ from ._core._exceptions import BrokenWorkerProcess as BrokenWorkerProcess
13
+ from ._core._exceptions import BusyResourceError as BusyResourceError
14
+ from ._core._exceptions import ClosedResourceError as ClosedResourceError
15
+ from ._core._exceptions import DelimiterNotFound as DelimiterNotFound
16
+ from ._core._exceptions import EndOfStream as EndOfStream
17
+ from ._core._exceptions import IncompleteRead as IncompleteRead
18
+ from ._core._exceptions import TypedAttributeLookupError as TypedAttributeLookupError
19
+ from ._core._exceptions import WouldBlock as WouldBlock
20
+ from ._core._fileio import AsyncFile as AsyncFile
21
+ from ._core._fileio import Path as Path
22
+ from ._core._fileio import open_file as open_file
23
+ from ._core._fileio import wrap_file as wrap_file
24
+ from ._core._resources import aclose_forcefully as aclose_forcefully
25
+ from ._core._signals import open_signal_receiver as open_signal_receiver
26
+ from ._core._sockets import connect_tcp as connect_tcp
27
+ from ._core._sockets import connect_unix as connect_unix
28
+ from ._core._sockets import create_connected_udp_socket as create_connected_udp_socket
29
+ from ._core._sockets import (
30
+ create_connected_unix_datagram_socket as create_connected_unix_datagram_socket,
31
+ )
32
+ from ._core._sockets import create_tcp_listener as create_tcp_listener
33
+ from ._core._sockets import create_udp_socket as create_udp_socket
34
+ from ._core._sockets import create_unix_datagram_socket as create_unix_datagram_socket
35
+ from ._core._sockets import create_unix_listener as create_unix_listener
36
+ from ._core._sockets import getaddrinfo as getaddrinfo
37
+ from ._core._sockets import getnameinfo as getnameinfo
38
+ from ._core._sockets import wait_readable as wait_readable
39
+ from ._core._sockets import wait_socket_readable as wait_socket_readable
40
+ from ._core._sockets import wait_socket_writable as wait_socket_writable
41
+ from ._core._sockets import wait_writable as wait_writable
42
+ from ._core._streams import create_memory_object_stream as create_memory_object_stream
43
+ from ._core._subprocesses import open_process as open_process
44
+ from ._core._subprocesses import run_process as run_process
45
+ from ._core._synchronization import CapacityLimiter as CapacityLimiter
46
+ from ._core._synchronization import (
47
+ CapacityLimiterStatistics as CapacityLimiterStatistics,
48
+ )
49
+ from ._core._synchronization import Condition as Condition
50
+ from ._core._synchronization import ConditionStatistics as ConditionStatistics
51
+ from ._core._synchronization import Event as Event
52
+ from ._core._synchronization import EventStatistics as EventStatistics
53
+ from ._core._synchronization import Lock as Lock
54
+ from ._core._synchronization import LockStatistics as LockStatistics
55
+ from ._core._synchronization import ResourceGuard as ResourceGuard
56
+ from ._core._synchronization import Semaphore as Semaphore
57
+ from ._core._synchronization import SemaphoreStatistics as SemaphoreStatistics
58
+ from ._core._tasks import TASK_STATUS_IGNORED as TASK_STATUS_IGNORED
59
+ from ._core._tasks import CancelScope as CancelScope
60
+ from ._core._tasks import create_task_group as create_task_group
61
+ from ._core._tasks import current_effective_deadline as current_effective_deadline
62
+ from ._core._tasks import fail_after as fail_after
63
+ from ._core._tasks import move_on_after as move_on_after
64
+ from ._core._testing import TaskInfo as TaskInfo
65
+ from ._core._testing import get_current_task as get_current_task
66
+ from ._core._testing import get_running_tasks as get_running_tasks
67
+ from ._core._testing import wait_all_tasks_blocked as wait_all_tasks_blocked
68
+ from ._core._typedattr import TypedAttributeProvider as TypedAttributeProvider
69
+ from ._core._typedattr import TypedAttributeSet as TypedAttributeSet
70
+ from ._core._typedattr import typed_attribute as typed_attribute
71
+
72
+ # Re-export imports so they look like they live directly in this package
73
+ for __value in list(locals().values()):
74
+ if getattr(__value, "__module__", "").startswith("anyio."):
75
+ __value.__module__ = __name__
76
+
77
+ del __value
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/from_thread.py ADDED
@@ -0,0 +1,527 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from collections.abc import Awaitable, Callable, Generator
5
+ from concurrent.futures import Future
6
+ from contextlib import (
7
+ AbstractAsyncContextManager,
8
+ AbstractContextManager,
9
+ contextmanager,
10
+ )
11
+ from dataclasses import dataclass, field
12
+ from inspect import isawaitable
13
+ from threading import Lock, Thread, get_ident
14
+ from types import TracebackType
15
+ from typing import (
16
+ Any,
17
+ Generic,
18
+ TypeVar,
19
+ cast,
20
+ overload,
21
+ )
22
+
23
+ from ._core import _eventloop
24
+ from ._core._eventloop import get_async_backend, get_cancelled_exc_class, threadlocals
25
+ from ._core._synchronization import Event
26
+ from ._core._tasks import CancelScope, create_task_group
27
+ from .abc import AsyncBackend
28
+ from .abc._tasks import TaskStatus
29
+
30
+ if sys.version_info >= (3, 11):
31
+ from typing import TypeVarTuple, Unpack
32
+ else:
33
+ from typing_extensions import TypeVarTuple, Unpack
34
+
35
+ T_Retval = TypeVar("T_Retval")
36
+ T_co = TypeVar("T_co", covariant=True)
37
+ PosArgsT = TypeVarTuple("PosArgsT")
38
+
39
+
40
+ def run(
41
+ func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval]], *args: Unpack[PosArgsT]
42
+ ) -> T_Retval:
43
+ """
44
+ Call a coroutine function from a worker thread.
45
+
46
+ :param func: a coroutine function
47
+ :param args: positional arguments for the callable
48
+ :return: the return value of the coroutine function
49
+
50
+ """
51
+ try:
52
+ async_backend = threadlocals.current_async_backend
53
+ token = threadlocals.current_token
54
+ except AttributeError:
55
+ raise RuntimeError(
56
+ "This function can only be run from an AnyIO worker thread"
57
+ ) from None
58
+
59
+ return async_backend.run_async_from_thread(func, args, token=token)
60
+
61
+
62
+ def run_sync(
63
+ func: Callable[[Unpack[PosArgsT]], T_Retval], *args: Unpack[PosArgsT]
64
+ ) -> T_Retval:
65
+ """
66
+ Call a function in the event loop thread from a worker thread.
67
+
68
+ :param func: a callable
69
+ :param args: positional arguments for the callable
70
+ :return: the return value of the callable
71
+
72
+ """
73
+ try:
74
+ async_backend = threadlocals.current_async_backend
75
+ token = threadlocals.current_token
76
+ except AttributeError:
77
+ raise RuntimeError(
78
+ "This function can only be run from an AnyIO worker thread"
79
+ ) from None
80
+
81
+ return async_backend.run_sync_from_thread(func, args, token=token)
82
+
83
+
84
+ class _BlockingAsyncContextManager(Generic[T_co], AbstractContextManager):
85
+ _enter_future: Future[T_co]
86
+ _exit_future: Future[bool | None]
87
+ _exit_event: Event
88
+ _exit_exc_info: tuple[
89
+ type[BaseException] | None, BaseException | None, TracebackType | None
90
+ ] = (None, None, None)
91
+
92
+ def __init__(
93
+ self, async_cm: AbstractAsyncContextManager[T_co], portal: BlockingPortal
94
+ ):
95
+ self._async_cm = async_cm
96
+ self._portal = portal
97
+
98
+ async def run_async_cm(self) -> bool | None:
99
+ try:
100
+ self._exit_event = Event()
101
+ value = await self._async_cm.__aenter__()
102
+ except BaseException as exc:
103
+ self._enter_future.set_exception(exc)
104
+ raise
105
+ else:
106
+ self._enter_future.set_result(value)
107
+
108
+ try:
109
+ # Wait for the sync context manager to exit.
110
+ # This next statement can raise `get_cancelled_exc_class()` if
111
+ # something went wrong in a task group in this async context
112
+ # manager.
113
+ await self._exit_event.wait()
114
+ finally:
115
+ # In case of cancellation, it could be that we end up here before
116
+ # `_BlockingAsyncContextManager.__exit__` is called, and an
117
+ # `_exit_exc_info` has been set.
118
+ result = await self._async_cm.__aexit__(*self._exit_exc_info)
119
+ return result
120
+
121
+ def __enter__(self) -> T_co:
122
+ self._enter_future = Future()
123
+ self._exit_future = self._portal.start_task_soon(self.run_async_cm)
124
+ return self._enter_future.result()
125
+
126
+ def __exit__(
127
+ self,
128
+ __exc_type: type[BaseException] | None,
129
+ __exc_value: BaseException | None,
130
+ __traceback: TracebackType | None,
131
+ ) -> bool | None:
132
+ self._exit_exc_info = __exc_type, __exc_value, __traceback
133
+ self._portal.call(self._exit_event.set)
134
+ return self._exit_future.result()
135
+
136
+
137
+ class _BlockingPortalTaskStatus(TaskStatus):
138
+ def __init__(self, future: Future):
139
+ self._future = future
140
+
141
+ def started(self, value: object = None) -> None:
142
+ self._future.set_result(value)
143
+
144
+
145
+ class BlockingPortal:
146
+ """An object that lets external threads run code in an asynchronous event loop."""
147
+
148
+ def __new__(cls) -> BlockingPortal:
149
+ return get_async_backend().create_blocking_portal()
150
+
151
+ def __init__(self) -> None:
152
+ self._event_loop_thread_id: int | None = get_ident()
153
+ self._stop_event = Event()
154
+ self._task_group = create_task_group()
155
+ self._cancelled_exc_class = get_cancelled_exc_class()
156
+
157
+ async def __aenter__(self) -> BlockingPortal:
158
+ await self._task_group.__aenter__()
159
+ return self
160
+
161
+ async def __aexit__(
162
+ self,
163
+ exc_type: type[BaseException] | None,
164
+ exc_val: BaseException | None,
165
+ exc_tb: TracebackType | None,
166
+ ) -> bool | None:
167
+ await self.stop()
168
+ return await self._task_group.__aexit__(exc_type, exc_val, exc_tb)
169
+
170
+ def _check_running(self) -> None:
171
+ if self._event_loop_thread_id is None:
172
+ raise RuntimeError("This portal is not running")
173
+ if self._event_loop_thread_id == get_ident():
174
+ raise RuntimeError(
175
+ "This method cannot be called from the event loop thread"
176
+ )
177
+
178
+ async def sleep_until_stopped(self) -> None:
179
+ """Sleep until :meth:`stop` is called."""
180
+ await self._stop_event.wait()
181
+
182
+ async def stop(self, cancel_remaining: bool = False) -> None:
183
+ """
184
+ Signal the portal to shut down.
185
+
186
+ This marks the portal as no longer accepting new calls and exits from
187
+ :meth:`sleep_until_stopped`.
188
+
189
+ :param cancel_remaining: ``True`` to cancel all the remaining tasks, ``False``
190
+ to let them finish before returning
191
+
192
+ """
193
+ self._event_loop_thread_id = None
194
+ self._stop_event.set()
195
+ if cancel_remaining:
196
+ self._task_group.cancel_scope.cancel()
197
+
198
+ async def _call_func(
199
+ self,
200
+ func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
201
+ args: tuple[Unpack[PosArgsT]],
202
+ kwargs: dict[str, Any],
203
+ future: Future[T_Retval],
204
+ ) -> None:
205
+ def callback(f: Future[T_Retval]) -> None:
206
+ if f.cancelled() and self._event_loop_thread_id not in (
207
+ None,
208
+ get_ident(),
209
+ ):
210
+ self.call(scope.cancel)
211
+
212
+ try:
213
+ retval_or_awaitable = func(*args, **kwargs)
214
+ if isawaitable(retval_or_awaitable):
215
+ with CancelScope() as scope:
216
+ if future.cancelled():
217
+ scope.cancel()
218
+ else:
219
+ future.add_done_callback(callback)
220
+
221
+ retval = await retval_or_awaitable
222
+ else:
223
+ retval = retval_or_awaitable
224
+ except self._cancelled_exc_class:
225
+ future.cancel()
226
+ future.set_running_or_notify_cancel()
227
+ except BaseException as exc:
228
+ if not future.cancelled():
229
+ future.set_exception(exc)
230
+
231
+ # Let base exceptions fall through
232
+ if not isinstance(exc, Exception):
233
+ raise
234
+ else:
235
+ if not future.cancelled():
236
+ future.set_result(retval)
237
+ finally:
238
+ scope = None # type: ignore[assignment]
239
+
240
+ def _spawn_task_from_thread(
241
+ self,
242
+ func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
243
+ args: tuple[Unpack[PosArgsT]],
244
+ kwargs: dict[str, Any],
245
+ name: object,
246
+ future: Future[T_Retval],
247
+ ) -> None:
248
+ """
249
+ Spawn a new task using the given callable.
250
+
251
+ Implementors must ensure that the future is resolved when the task finishes.
252
+
253
+ :param func: a callable
254
+ :param args: positional arguments to be passed to the callable
255
+ :param kwargs: keyword arguments to be passed to the callable
256
+ :param name: name of the task (will be coerced to a string if not ``None``)
257
+ :param future: a future that will resolve to the return value of the callable,
258
+ or the exception raised during its execution
259
+
260
+ """
261
+ raise NotImplementedError
262
+
263
+ @overload
264
+ def call(
265
+ self,
266
+ func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval]],
267
+ *args: Unpack[PosArgsT],
268
+ ) -> T_Retval: ...
269
+
270
+ @overload
271
+ def call(
272
+ self, func: Callable[[Unpack[PosArgsT]], T_Retval], *args: Unpack[PosArgsT]
273
+ ) -> T_Retval: ...
274
+
275
+ def call(
276
+ self,
277
+ func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
278
+ *args: Unpack[PosArgsT],
279
+ ) -> T_Retval:
280
+ """
281
+ Call the given function in the event loop thread.
282
+
283
+ If the callable returns a coroutine object, it is awaited on.
284
+
285
+ :param func: any callable
286
+ :raises RuntimeError: if the portal is not running or if this method is called
287
+ from within the event loop thread
288
+
289
+ """
290
+ return cast(T_Retval, self.start_task_soon(func, *args).result())
291
+
292
+ @overload
293
+ def start_task_soon(
294
+ self,
295
+ func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval]],
296
+ *args: Unpack[PosArgsT],
297
+ name: object = None,
298
+ ) -> Future[T_Retval]: ...
299
+
300
+ @overload
301
+ def start_task_soon(
302
+ self,
303
+ func: Callable[[Unpack[PosArgsT]], T_Retval],
304
+ *args: Unpack[PosArgsT],
305
+ name: object = None,
306
+ ) -> Future[T_Retval]: ...
307
+
308
+ def start_task_soon(
309
+ self,
310
+ func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
311
+ *args: Unpack[PosArgsT],
312
+ name: object = None,
313
+ ) -> Future[T_Retval]:
314
+ """
315
+ Start a task in the portal's task group.
316
+
317
+ The task will be run inside a cancel scope which can be cancelled by cancelling
318
+ the returned future.
319
+
320
+ :param func: the target function
321
+ :param args: positional arguments passed to ``func``
322
+ :param name: name of the task (will be coerced to a string if not ``None``)
323
+ :return: a future that resolves with the return value of the callable if the
324
+ task completes successfully, or with the exception raised in the task
325
+ :raises RuntimeError: if the portal is not running or if this method is called
326
+ from within the event loop thread
327
+ :rtype: concurrent.futures.Future[T_Retval]
328
+
329
+ .. versionadded:: 3.0
330
+
331
+ """
332
+ self._check_running()
333
+ f: Future[T_Retval] = Future()
334
+ self._spawn_task_from_thread(func, args, {}, name, f)
335
+ return f
336
+
337
+ def start_task(
338
+ self,
339
+ func: Callable[..., Awaitable[T_Retval]],
340
+ *args: object,
341
+ name: object = None,
342
+ ) -> tuple[Future[T_Retval], Any]:
343
+ """
344
+ Start a task in the portal's task group and wait until it signals for readiness.
345
+
346
+ This method works the same way as :meth:`.abc.TaskGroup.start`.
347
+
348
+ :param func: the target function
349
+ :param args: positional arguments passed to ``func``
350
+ :param name: name of the task (will be coerced to a string if not ``None``)
351
+ :return: a tuple of (future, task_status_value) where the ``task_status_value``
352
+ is the value passed to ``task_status.started()`` from within the target
353
+ function
354
+ :rtype: tuple[concurrent.futures.Future[T_Retval], Any]
355
+
356
+ .. versionadded:: 3.0
357
+
358
+ """
359
+
360
+ def task_done(future: Future[T_Retval]) -> None:
361
+ if not task_status_future.done():
362
+ if future.cancelled():
363
+ task_status_future.cancel()
364
+ elif future.exception():
365
+ task_status_future.set_exception(future.exception())
366
+ else:
367
+ exc = RuntimeError(
368
+ "Task exited without calling task_status.started()"
369
+ )
370
+ task_status_future.set_exception(exc)
371
+
372
+ self._check_running()
373
+ task_status_future: Future = Future()
374
+ task_status = _BlockingPortalTaskStatus(task_status_future)
375
+ f: Future = Future()
376
+ f.add_done_callback(task_done)
377
+ self._spawn_task_from_thread(func, args, {"task_status": task_status}, name, f)
378
+ return f, task_status_future.result()
379
+
380
+ def wrap_async_context_manager(
381
+ self, cm: AbstractAsyncContextManager[T_co]
382
+ ) -> AbstractContextManager[T_co]:
383
+ """
384
+ Wrap an async context manager as a synchronous context manager via this portal.
385
+
386
+ Spawns a task that will call both ``__aenter__()`` and ``__aexit__()``, stopping
387
+ in the middle until the synchronous context manager exits.
388
+
389
+ :param cm: an asynchronous context manager
390
+ :return: a synchronous context manager
391
+
392
+ .. versionadded:: 2.1
393
+
394
+ """
395
+ return _BlockingAsyncContextManager(cm, self)
396
+
397
+
398
+ @dataclass
399
+ class BlockingPortalProvider:
400
+ """
401
+ A manager for a blocking portal. Used as a context manager. The first thread to
402
+ enter this context manager causes a blocking portal to be started with the specific
403
+ parameters, and the last thread to exit causes the portal to be shut down. Thus,
404
+ there will be exactly one blocking portal running in this context as long as at
405
+ least one thread has entered this context manager.
406
+
407
+ The parameters are the same as for :func:`~anyio.run`.
408
+
409
+ :param backend: name of the backend
410
+ :param backend_options: backend options
411
+
412
+ .. versionadded:: 4.4
413
+ """
414
+
415
+ backend: str = "asyncio"
416
+ backend_options: dict[str, Any] | None = None
417
+ _lock: Lock = field(init=False, default_factory=Lock)
418
+ _leases: int = field(init=False, default=0)
419
+ _portal: BlockingPortal = field(init=False)
420
+ _portal_cm: AbstractContextManager[BlockingPortal] | None = field(
421
+ init=False, default=None
422
+ )
423
+
424
+ def __enter__(self) -> BlockingPortal:
425
+ with self._lock:
426
+ if self._portal_cm is None:
427
+ self._portal_cm = start_blocking_portal(
428
+ self.backend, self.backend_options
429
+ )
430
+ self._portal = self._portal_cm.__enter__()
431
+
432
+ self._leases += 1
433
+ return self._portal
434
+
435
+ def __exit__(
436
+ self,
437
+ exc_type: type[BaseException] | None,
438
+ exc_val: BaseException | None,
439
+ exc_tb: TracebackType | None,
440
+ ) -> None:
441
+ portal_cm: AbstractContextManager[BlockingPortal] | None = None
442
+ with self._lock:
443
+ assert self._portal_cm
444
+ assert self._leases > 0
445
+ self._leases -= 1
446
+ if not self._leases:
447
+ portal_cm = self._portal_cm
448
+ self._portal_cm = None
449
+ del self._portal
450
+
451
+ if portal_cm:
452
+ portal_cm.__exit__(None, None, None)
453
+
454
+
455
+ @contextmanager
456
+ def start_blocking_portal(
457
+ backend: str = "asyncio", backend_options: dict[str, Any] | None = None
458
+ ) -> Generator[BlockingPortal, Any, None]:
459
+ """
460
+ Start a new event loop in a new thread and run a blocking portal in its main task.
461
+
462
+ The parameters are the same as for :func:`~anyio.run`.
463
+
464
+ :param backend: name of the backend
465
+ :param backend_options: backend options
466
+ :return: a context manager that yields a blocking portal
467
+
468
+ .. versionchanged:: 3.0
469
+ Usage as a context manager is now required.
470
+
471
+ """
472
+
473
+ async def run_portal() -> None:
474
+ async with BlockingPortal() as portal_:
475
+ future.set_result(portal_)
476
+ await portal_.sleep_until_stopped()
477
+
478
+ def run_blocking_portal() -> None:
479
+ if future.set_running_or_notify_cancel():
480
+ try:
481
+ _eventloop.run(
482
+ run_portal, backend=backend, backend_options=backend_options
483
+ )
484
+ except BaseException as exc:
485
+ if not future.done():
486
+ future.set_exception(exc)
487
+
488
+ future: Future[BlockingPortal] = Future()
489
+ thread = Thread(target=run_blocking_portal, daemon=True)
490
+ thread.start()
491
+ try:
492
+ cancel_remaining_tasks = False
493
+ portal = future.result()
494
+ try:
495
+ yield portal
496
+ except BaseException:
497
+ cancel_remaining_tasks = True
498
+ raise
499
+ finally:
500
+ try:
501
+ portal.call(portal.stop, cancel_remaining_tasks)
502
+ except RuntimeError:
503
+ pass
504
+ finally:
505
+ thread.join()
506
+
507
+
508
+ def check_cancelled() -> None:
509
+ """
510
+ Check if the cancel scope of the host task's running the current worker thread has
511
+ been cancelled.
512
+
513
+ If the host task's current cancel scope has indeed been cancelled, the
514
+ backend-specific cancellation exception will be raised.
515
+
516
+ :raises RuntimeError: if the current thread was not spawned by
517
+ :func:`.to_thread.run_sync`
518
+
519
+ """
520
+ try:
521
+ async_backend: AsyncBackend = threadlocals.current_async_backend
522
+ except AttributeError:
523
+ raise RuntimeError(
524
+ "This function can only be run from an AnyIO worker thread"
525
+ ) from None
526
+
527
+ async_backend.check_cancelled()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/lowlevel.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import enum
4
+ from dataclasses import dataclass
5
+ from typing import Any, Generic, Literal, TypeVar, overload
6
+ from weakref import WeakKeyDictionary
7
+
8
+ from ._core._eventloop import get_async_backend
9
+
10
+ T = TypeVar("T")
11
+ D = TypeVar("D")
12
+
13
+
14
+ async def checkpoint() -> None:
15
+ """
16
+ Check for cancellation and allow the scheduler to switch to another task.
17
+
18
+ Equivalent to (but more efficient than)::
19
+
20
+ await checkpoint_if_cancelled()
21
+ await cancel_shielded_checkpoint()
22
+
23
+
24
+ .. versionadded:: 3.0
25
+
26
+ """
27
+ await get_async_backend().checkpoint()
28
+
29
+
30
+ async def checkpoint_if_cancelled() -> None:
31
+ """
32
+ Enter a checkpoint if the enclosing cancel scope has been cancelled.
33
+
34
+ This does not allow the scheduler to switch to a different task.
35
+
36
+ .. versionadded:: 3.0
37
+
38
+ """
39
+ await get_async_backend().checkpoint_if_cancelled()
40
+
41
+
42
+ async def cancel_shielded_checkpoint() -> None:
43
+ """
44
+ Allow the scheduler to switch to another task but without checking for cancellation.
45
+
46
+ Equivalent to (but potentially more efficient than)::
47
+
48
+ with CancelScope(shield=True):
49
+ await checkpoint()
50
+
51
+
52
+ .. versionadded:: 3.0
53
+
54
+ """
55
+ await get_async_backend().cancel_shielded_checkpoint()
56
+
57
+
58
+ def current_token() -> object:
59
+ """
60
+ Return a backend specific token object that can be used to get back to the event
61
+ loop.
62
+
63
+ """
64
+ return get_async_backend().current_token()
65
+
66
+
67
+ _run_vars: WeakKeyDictionary[Any, dict[str, Any]] = WeakKeyDictionary()
68
+ _token_wrappers: dict[Any, _TokenWrapper] = {}
69
+
70
+
71
+ @dataclass(frozen=True)
72
+ class _TokenWrapper:
73
+ __slots__ = "_token", "__weakref__"
74
+ _token: object
75
+
76
+
77
+ class _NoValueSet(enum.Enum):
78
+ NO_VALUE_SET = enum.auto()
79
+
80
+
81
+ class RunvarToken(Generic[T]):
82
+ __slots__ = "_var", "_value", "_redeemed"
83
+
84
+ def __init__(self, var: RunVar[T], value: T | Literal[_NoValueSet.NO_VALUE_SET]):
85
+ self._var = var
86
+ self._value: T | Literal[_NoValueSet.NO_VALUE_SET] = value
87
+ self._redeemed = False
88
+
89
+
90
+ class RunVar(Generic[T]):
91
+ """
92
+ Like a :class:`~contextvars.ContextVar`, except scoped to the running event loop.
93
+ """
94
+
95
+ __slots__ = "_name", "_default"
96
+
97
+ NO_VALUE_SET: Literal[_NoValueSet.NO_VALUE_SET] = _NoValueSet.NO_VALUE_SET
98
+
99
+ _token_wrappers: set[_TokenWrapper] = set()
100
+
101
+ def __init__(
102
+ self, name: str, default: T | Literal[_NoValueSet.NO_VALUE_SET] = NO_VALUE_SET
103
+ ):
104
+ self._name = name
105
+ self._default = default
106
+
107
+ @property
108
+ def _current_vars(self) -> dict[str, T]:
109
+ token = current_token()
110
+ try:
111
+ return _run_vars[token]
112
+ except KeyError:
113
+ run_vars = _run_vars[token] = {}
114
+ return run_vars
115
+
116
+ @overload
117
+ def get(self, default: D) -> T | D: ...
118
+
119
+ @overload
120
+ def get(self) -> T: ...
121
+
122
+ def get(
123
+ self, default: D | Literal[_NoValueSet.NO_VALUE_SET] = NO_VALUE_SET
124
+ ) -> T | D:
125
+ try:
126
+ return self._current_vars[self._name]
127
+ except KeyError:
128
+ if default is not RunVar.NO_VALUE_SET:
129
+ return default
130
+ elif self._default is not RunVar.NO_VALUE_SET:
131
+ return self._default
132
+
133
+ raise LookupError(
134
+ f'Run variable "{self._name}" has no value and no default set'
135
+ )
136
+
137
+ def set(self, value: T) -> RunvarToken[T]:
138
+ current_vars = self._current_vars
139
+ token = RunvarToken(self, current_vars.get(self._name, RunVar.NO_VALUE_SET))
140
+ current_vars[self._name] = value
141
+ return token
142
+
143
+ def reset(self, token: RunvarToken[T]) -> None:
144
+ if token._var is not self:
145
+ raise ValueError("This token does not belong to this RunVar")
146
+
147
+ if token._redeemed:
148
+ raise ValueError("This token has already been used")
149
+
150
+ if token._value is _NoValueSet.NO_VALUE_SET:
151
+ try:
152
+ del self._current_vars[self._name]
153
+ except KeyError:
154
+ pass
155
+ else:
156
+ self._current_vars[self._name] = token._value
157
+
158
+ token._redeemed = True
159
+
160
+ def __repr__(self) -> str:
161
+ return f"<RunVar name={self._name!r}>"
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/py.typed ADDED
File without changes
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/pytest_plugin.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from collections.abc import Generator, Iterator
5
+ from contextlib import ExitStack, contextmanager
6
+ from inspect import isasyncgenfunction, iscoroutinefunction, ismethod
7
+ from typing import Any, cast
8
+
9
+ import pytest
10
+ import sniffio
11
+ from _pytest.fixtures import SubRequest
12
+ from _pytest.outcomes import Exit
13
+
14
+ from ._core._eventloop import get_all_backends, get_async_backend
15
+ from ._core._exceptions import iterate_exceptions
16
+ from .abc import TestRunner
17
+
18
+ if sys.version_info < (3, 11):
19
+ from exceptiongroup import ExceptionGroup
20
+
21
+ _current_runner: TestRunner | None = None
22
+ _runner_stack: ExitStack | None = None
23
+ _runner_leases = 0
24
+
25
+
26
+ def extract_backend_and_options(backend: object) -> tuple[str, dict[str, Any]]:
27
+ if isinstance(backend, str):
28
+ return backend, {}
29
+ elif isinstance(backend, tuple) and len(backend) == 2:
30
+ if isinstance(backend[0], str) and isinstance(backend[1], dict):
31
+ return cast(tuple[str, dict[str, Any]], backend)
32
+
33
+ raise TypeError("anyio_backend must be either a string or tuple of (string, dict)")
34
+
35
+
36
+ @contextmanager
37
+ def get_runner(
38
+ backend_name: str, backend_options: dict[str, Any]
39
+ ) -> Iterator[TestRunner]:
40
+ global _current_runner, _runner_leases, _runner_stack
41
+ if _current_runner is None:
42
+ asynclib = get_async_backend(backend_name)
43
+ _runner_stack = ExitStack()
44
+ if sniffio.current_async_library_cvar.get(None) is None:
45
+ # Since we're in control of the event loop, we can cache the name of the
46
+ # async library
47
+ token = sniffio.current_async_library_cvar.set(backend_name)
48
+ _runner_stack.callback(sniffio.current_async_library_cvar.reset, token)
49
+
50
+ backend_options = backend_options or {}
51
+ _current_runner = _runner_stack.enter_context(
52
+ asynclib.create_test_runner(backend_options)
53
+ )
54
+
55
+ _runner_leases += 1
56
+ try:
57
+ yield _current_runner
58
+ finally:
59
+ _runner_leases -= 1
60
+ if not _runner_leases:
61
+ assert _runner_stack is not None
62
+ _runner_stack.close()
63
+ _runner_stack = _current_runner = None
64
+
65
+
66
+ def pytest_configure(config: Any) -> None:
67
+ config.addinivalue_line(
68
+ "markers",
69
+ "anyio: mark the (coroutine function) test to be run "
70
+ "asynchronously via anyio.",
71
+ )
72
+
73
+
74
+ @pytest.hookimpl(hookwrapper=True)
75
+ def pytest_fixture_setup(fixturedef: Any, request: Any) -> Generator[Any]:
76
+ def wrapper(
77
+ *args: Any, anyio_backend: Any, request: SubRequest, **kwargs: Any
78
+ ) -> Any:
79
+ # Rebind any fixture methods to the request instance
80
+ if (
81
+ request.instance
82
+ and ismethod(func)
83
+ and type(func.__self__) is type(request.instance)
84
+ ):
85
+ local_func = func.__func__.__get__(request.instance)
86
+ else:
87
+ local_func = func
88
+
89
+ backend_name, backend_options = extract_backend_and_options(anyio_backend)
90
+ if has_backend_arg:
91
+ kwargs["anyio_backend"] = anyio_backend
92
+
93
+ if has_request_arg:
94
+ kwargs["request"] = request
95
+
96
+ with get_runner(backend_name, backend_options) as runner:
97
+ if isasyncgenfunction(local_func):
98
+ yield from runner.run_asyncgen_fixture(local_func, kwargs)
99
+ else:
100
+ yield runner.run_fixture(local_func, kwargs)
101
+
102
+ # Only apply this to coroutine functions and async generator functions in requests
103
+ # that involve the anyio_backend fixture
104
+ func = fixturedef.func
105
+ if isasyncgenfunction(func) or iscoroutinefunction(func):
106
+ if "anyio_backend" in request.fixturenames:
107
+ fixturedef.func = wrapper
108
+ original_argname = fixturedef.argnames
109
+
110
+ if not (has_backend_arg := "anyio_backend" in fixturedef.argnames):
111
+ fixturedef.argnames += ("anyio_backend",)
112
+
113
+ if not (has_request_arg := "request" in fixturedef.argnames):
114
+ fixturedef.argnames += ("request",)
115
+
116
+ try:
117
+ return (yield)
118
+ finally:
119
+ fixturedef.func = func
120
+ fixturedef.argnames = original_argname
121
+
122
+ return (yield)
123
+
124
+
125
+ @pytest.hookimpl(tryfirst=True)
126
+ def pytest_pycollect_makeitem(collector: Any, name: Any, obj: Any) -> None:
127
+ if collector.istestfunction(obj, name):
128
+ inner_func = obj.hypothesis.inner_test if hasattr(obj, "hypothesis") else obj
129
+ if iscoroutinefunction(inner_func):
130
+ marker = collector.get_closest_marker("anyio")
131
+ own_markers = getattr(obj, "pytestmark", ())
132
+ if marker or any(marker.name == "anyio" for marker in own_markers):
133
+ pytest.mark.usefixtures("anyio_backend")(obj)
134
+
135
+
136
+ @pytest.hookimpl(tryfirst=True)
137
+ def pytest_pyfunc_call(pyfuncitem: Any) -> bool | None:
138
+ def run_with_hypothesis(**kwargs: Any) -> None:
139
+ with get_runner(backend_name, backend_options) as runner:
140
+ runner.run_test(original_func, kwargs)
141
+
142
+ backend = pyfuncitem.funcargs.get("anyio_backend")
143
+ if backend:
144
+ backend_name, backend_options = extract_backend_and_options(backend)
145
+
146
+ if hasattr(pyfuncitem.obj, "hypothesis"):
147
+ # Wrap the inner test function unless it's already wrapped
148
+ original_func = pyfuncitem.obj.hypothesis.inner_test
149
+ if original_func.__qualname__ != run_with_hypothesis.__qualname__:
150
+ if iscoroutinefunction(original_func):
151
+ pyfuncitem.obj.hypothesis.inner_test = run_with_hypothesis
152
+
153
+ return None
154
+
155
+ if iscoroutinefunction(pyfuncitem.obj):
156
+ funcargs = pyfuncitem.funcargs
157
+ testargs = {arg: funcargs[arg] for arg in pyfuncitem._fixtureinfo.argnames}
158
+ with get_runner(backend_name, backend_options) as runner:
159
+ try:
160
+ runner.run_test(pyfuncitem.obj, testargs)
161
+ except ExceptionGroup as excgrp:
162
+ for exc in iterate_exceptions(excgrp):
163
+ if isinstance(exc, (Exit, KeyboardInterrupt, SystemExit)):
164
+ raise exc from excgrp
165
+
166
+ raise
167
+
168
+ return True
169
+
170
+ return None
171
+
172
+
173
+ @pytest.fixture(scope="module", params=get_all_backends())
174
+ def anyio_backend(request: Any) -> Any:
175
+ return request.param
176
+
177
+
178
+ @pytest.fixture
179
+ def anyio_backend_name(anyio_backend: Any) -> str:
180
+ if isinstance(anyio_backend, str):
181
+ return anyio_backend
182
+ else:
183
+ return anyio_backend[0]
184
+
185
+
186
+ @pytest.fixture
187
+ def anyio_backend_options(anyio_backend: Any) -> dict[str, Any]:
188
+ if isinstance(anyio_backend, str):
189
+ return {}
190
+ else:
191
+ return anyio_backend[1]
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_interpreter.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import atexit
4
+ import os
5
+ import pickle
6
+ import sys
7
+ from collections import deque
8
+ from collections.abc import Callable
9
+ from textwrap import dedent
10
+ from typing import Any, Final, TypeVar
11
+
12
+ from . import current_time, to_thread
13
+ from ._core._exceptions import BrokenWorkerIntepreter
14
+ from ._core._synchronization import CapacityLimiter
15
+ from .lowlevel import RunVar
16
+
17
+ if sys.version_info >= (3, 11):
18
+ from typing import TypeVarTuple, Unpack
19
+ else:
20
+ from typing_extensions import TypeVarTuple, Unpack
21
+
22
+ UNBOUND: Final = 2 # I have no clue how this works, but it was used in the stdlib
23
+ FMT_UNPICKLED: Final = 0
24
+ FMT_PICKLED: Final = 1
25
+ DEFAULT_CPU_COUNT: Final = 8 # this is just an arbitrarily selected value
26
+ MAX_WORKER_IDLE_TIME = (
27
+ 30 # seconds a subinterpreter can be idle before becoming eligible for pruning
28
+ )
29
+
30
+ T_Retval = TypeVar("T_Retval")
31
+ PosArgsT = TypeVarTuple("PosArgsT")
32
+
33
+ _idle_workers = RunVar[deque["Worker"]]("_available_workers")
34
+ _default_interpreter_limiter = RunVar[CapacityLimiter]("_default_interpreter_limiter")
35
+
36
+
37
+ class Worker:
38
+ _run_func = compile(
39
+ dedent("""
40
+ import _interpqueues as queues
41
+ import _interpreters as interpreters
42
+ from pickle import loads, dumps, HIGHEST_PROTOCOL
43
+
44
+ item = queues.get(queue_id)[0]
45
+ try:
46
+ func, args = loads(item)
47
+ retval = func(*args)
48
+ except BaseException as exc:
49
+ is_exception = True
50
+ retval = exc
51
+ else:
52
+ is_exception = False
53
+
54
+ try:
55
+ queues.put(queue_id, (retval, is_exception), FMT_UNPICKLED, UNBOUND)
56
+ except interpreters.NotShareableError:
57
+ retval = dumps(retval, HIGHEST_PROTOCOL)
58
+ queues.put(queue_id, (retval, is_exception), FMT_PICKLED, UNBOUND)
59
+ """),
60
+ "<string>",
61
+ "exec",
62
+ )
63
+
64
+ last_used: float = 0
65
+
66
+ _initialized: bool = False
67
+ _interpreter_id: int
68
+ _queue_id: int
69
+
70
+ def initialize(self) -> None:
71
+ import _interpqueues as queues
72
+ import _interpreters as interpreters
73
+
74
+ self._interpreter_id = interpreters.create()
75
+ self._queue_id = queues.create(2, FMT_UNPICKLED, UNBOUND) # type: ignore[call-arg]
76
+ self._initialized = True
77
+ interpreters.set___main___attrs(
78
+ self._interpreter_id,
79
+ {
80
+ "queue_id": self._queue_id,
81
+ "FMT_PICKLED": FMT_PICKLED,
82
+ "FMT_UNPICKLED": FMT_UNPICKLED,
83
+ "UNBOUND": UNBOUND,
84
+ },
85
+ )
86
+
87
+ def destroy(self) -> None:
88
+ import _interpqueues as queues
89
+ import _interpreters as interpreters
90
+
91
+ if self._initialized:
92
+ interpreters.destroy(self._interpreter_id)
93
+ queues.destroy(self._queue_id)
94
+
95
+ def _call(
96
+ self,
97
+ func: Callable[..., T_Retval],
98
+ args: tuple[Any],
99
+ ) -> tuple[Any, bool]:
100
+ import _interpqueues as queues
101
+ import _interpreters as interpreters
102
+
103
+ if not self._initialized:
104
+ self.initialize()
105
+
106
+ payload = pickle.dumps((func, args), pickle.HIGHEST_PROTOCOL)
107
+ queues.put(self._queue_id, payload, FMT_PICKLED, UNBOUND) # type: ignore[call-arg]
108
+
109
+ res: Any
110
+ is_exception: bool
111
+ if exc_info := interpreters.exec(self._interpreter_id, self._run_func): # type: ignore[func-returns-value,arg-type]
112
+ raise BrokenWorkerIntepreter(exc_info)
113
+
114
+ (res, is_exception), fmt = queues.get(self._queue_id)[:2]
115
+ if fmt == FMT_PICKLED:
116
+ res = pickle.loads(res)
117
+
118
+ return res, is_exception
119
+
120
+ async def call(
121
+ self,
122
+ func: Callable[..., T_Retval],
123
+ args: tuple[Any],
124
+ limiter: CapacityLimiter,
125
+ ) -> T_Retval:
126
+ result, is_exception = await to_thread.run_sync(
127
+ self._call,
128
+ func,
129
+ args,
130
+ limiter=limiter,
131
+ )
132
+ if is_exception:
133
+ raise result
134
+
135
+ return result
136
+
137
+
138
+ def _stop_workers(workers: deque[Worker]) -> None:
139
+ for worker in workers:
140
+ worker.destroy()
141
+
142
+ workers.clear()
143
+
144
+
145
+ async def run_sync(
146
+ func: Callable[[Unpack[PosArgsT]], T_Retval],
147
+ *args: Unpack[PosArgsT],
148
+ limiter: CapacityLimiter | None = None,
149
+ ) -> T_Retval:
150
+ """
151
+ Call the given function with the given arguments in a subinterpreter.
152
+
153
+ If the ``cancellable`` option is enabled and the task waiting for its completion is
154
+ cancelled, the call will still run its course but its return value (or any raised
155
+ exception) will be ignored.
156
+
157
+ .. warning:: This feature is **experimental**. The upstream interpreter API has not
158
+ yet been finalized or thoroughly tested, so don't rely on this for anything
159
+ mission critical.
160
+
161
+ :param func: a callable
162
+ :param args: positional arguments for the callable
163
+ :param limiter: capacity limiter to use to limit the total amount of subinterpreters
164
+ running (if omitted, the default limiter is used)
165
+ :return: the result of the call
166
+ :raises BrokenWorkerIntepreter: if there's an internal error in a subinterpreter
167
+
168
+ """
169
+ if sys.version_info <= (3, 13):
170
+ raise RuntimeError("subinterpreters require at least Python 3.13")
171
+
172
+ if limiter is None:
173
+ limiter = current_default_interpreter_limiter()
174
+
175
+ try:
176
+ idle_workers = _idle_workers.get()
177
+ except LookupError:
178
+ idle_workers = deque()
179
+ _idle_workers.set(idle_workers)
180
+ atexit.register(_stop_workers, idle_workers)
181
+
182
+ async with limiter:
183
+ try:
184
+ worker = idle_workers.pop()
185
+ except IndexError:
186
+ worker = Worker()
187
+
188
+ try:
189
+ return await worker.call(func, args, limiter)
190
+ finally:
191
+ # Prune workers that have been idle for too long
192
+ now = current_time()
193
+ while idle_workers:
194
+ if now - idle_workers[0].last_used <= MAX_WORKER_IDLE_TIME:
195
+ break
196
+
197
+ await to_thread.run_sync(idle_workers.popleft().destroy, limiter=limiter)
198
+
199
+ worker.last_used = current_time()
200
+ idle_workers.append(worker)
201
+
202
+
203
+ def current_default_interpreter_limiter() -> CapacityLimiter:
204
+ """
205
+ Return the capacity limiter that is used by default to limit the number of
206
+ concurrently running subinterpreters.
207
+
208
+ Defaults to the number of CPU cores.
209
+
210
+ :return: a capacity limiter object
211
+
212
+ """
213
+ try:
214
+ return _default_interpreter_limiter.get()
215
+ except LookupError:
216
+ limiter = CapacityLimiter(os.cpu_count() or DEFAULT_CPU_COUNT)
217
+ _default_interpreter_limiter.set(limiter)
218
+ return limiter
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_process.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import pickle
5
+ import subprocess
6
+ import sys
7
+ from collections import deque
8
+ from collections.abc import Callable
9
+ from importlib.util import module_from_spec, spec_from_file_location
10
+ from typing import TypeVar, cast
11
+
12
+ from ._core._eventloop import current_time, get_async_backend, get_cancelled_exc_class
13
+ from ._core._exceptions import BrokenWorkerProcess
14
+ from ._core._subprocesses import open_process
15
+ from ._core._synchronization import CapacityLimiter
16
+ from ._core._tasks import CancelScope, fail_after
17
+ from .abc import ByteReceiveStream, ByteSendStream, Process
18
+ from .lowlevel import RunVar, checkpoint_if_cancelled
19
+ from .streams.buffered import BufferedByteReceiveStream
20
+
21
+ if sys.version_info >= (3, 11):
22
+ from typing import TypeVarTuple, Unpack
23
+ else:
24
+ from typing_extensions import TypeVarTuple, Unpack
25
+
26
+ WORKER_MAX_IDLE_TIME = 300 # 5 minutes
27
+
28
+ T_Retval = TypeVar("T_Retval")
29
+ PosArgsT = TypeVarTuple("PosArgsT")
30
+
31
+ _process_pool_workers: RunVar[set[Process]] = RunVar("_process_pool_workers")
32
+ _process_pool_idle_workers: RunVar[deque[tuple[Process, float]]] = RunVar(
33
+ "_process_pool_idle_workers"
34
+ )
35
+ _default_process_limiter: RunVar[CapacityLimiter] = RunVar("_default_process_limiter")
36
+
37
+
38
+ async def run_sync( # type: ignore[return]
39
+ func: Callable[[Unpack[PosArgsT]], T_Retval],
40
+ *args: Unpack[PosArgsT],
41
+ cancellable: bool = False,
42
+ limiter: CapacityLimiter | None = None,
43
+ ) -> T_Retval:
44
+ """
45
+ Call the given function with the given arguments in a worker process.
46
+
47
+ If the ``cancellable`` option is enabled and the task waiting for its completion is
48
+ cancelled, the worker process running it will be abruptly terminated using SIGKILL
49
+ (or ``terminateProcess()`` on Windows).
50
+
51
+ :param func: a callable
52
+ :param args: positional arguments for the callable
53
+ :param cancellable: ``True`` to allow cancellation of the operation while it's
54
+ running
55
+ :param limiter: capacity limiter to use to limit the total amount of processes
56
+ running (if omitted, the default limiter is used)
57
+ :return: an awaitable that yields the return value of the function.
58
+
59
+ """
60
+
61
+ async def send_raw_command(pickled_cmd: bytes) -> object:
62
+ try:
63
+ await stdin.send(pickled_cmd)
64
+ response = await buffered.receive_until(b"\n", 50)
65
+ status, length = response.split(b" ")
66
+ if status not in (b"RETURN", b"EXCEPTION"):
67
+ raise RuntimeError(
68
+ f"Worker process returned unexpected response: {response!r}"
69
+ )
70
+
71
+ pickled_response = await buffered.receive_exactly(int(length))
72
+ except BaseException as exc:
73
+ workers.discard(process)
74
+ try:
75
+ process.kill()
76
+ with CancelScope(shield=True):
77
+ await process.aclose()
78
+ except ProcessLookupError:
79
+ pass
80
+
81
+ if isinstance(exc, get_cancelled_exc_class()):
82
+ raise
83
+ else:
84
+ raise BrokenWorkerProcess from exc
85
+
86
+ retval = pickle.loads(pickled_response)
87
+ if status == b"EXCEPTION":
88
+ assert isinstance(retval, BaseException)
89
+ raise retval
90
+ else:
91
+ return retval
92
+
93
+ # First pickle the request before trying to reserve a worker process
94
+ await checkpoint_if_cancelled()
95
+ request = pickle.dumps(("run", func, args), protocol=pickle.HIGHEST_PROTOCOL)
96
+
97
+ # If this is the first run in this event loop thread, set up the necessary variables
98
+ try:
99
+ workers = _process_pool_workers.get()
100
+ idle_workers = _process_pool_idle_workers.get()
101
+ except LookupError:
102
+ workers = set()
103
+ idle_workers = deque()
104
+ _process_pool_workers.set(workers)
105
+ _process_pool_idle_workers.set(idle_workers)
106
+ get_async_backend().setup_process_pool_exit_at_shutdown(workers)
107
+
108
+ async with limiter or current_default_process_limiter():
109
+ # Pop processes from the pool (starting from the most recently used) until we
110
+ # find one that hasn't exited yet
111
+ process: Process
112
+ while idle_workers:
113
+ process, idle_since = idle_workers.pop()
114
+ if process.returncode is None:
115
+ stdin = cast(ByteSendStream, process.stdin)
116
+ buffered = BufferedByteReceiveStream(
117
+ cast(ByteReceiveStream, process.stdout)
118
+ )
119
+
120
+ # Prune any other workers that have been idle for WORKER_MAX_IDLE_TIME
121
+ # seconds or longer
122
+ now = current_time()
123
+ killed_processes: list[Process] = []
124
+ while idle_workers:
125
+ if now - idle_workers[0][1] < WORKER_MAX_IDLE_TIME:
126
+ break
127
+
128
+ process_to_kill, idle_since = idle_workers.popleft()
129
+ process_to_kill.kill()
130
+ workers.remove(process_to_kill)
131
+ killed_processes.append(process_to_kill)
132
+
133
+ with CancelScope(shield=True):
134
+ for killed_process in killed_processes:
135
+ await killed_process.aclose()
136
+
137
+ break
138
+
139
+ workers.remove(process)
140
+ else:
141
+ command = [sys.executable, "-u", "-m", __name__]
142
+ process = await open_process(
143
+ command, stdin=subprocess.PIPE, stdout=subprocess.PIPE
144
+ )
145
+ try:
146
+ stdin = cast(ByteSendStream, process.stdin)
147
+ buffered = BufferedByteReceiveStream(
148
+ cast(ByteReceiveStream, process.stdout)
149
+ )
150
+ with fail_after(20):
151
+ message = await buffered.receive(6)
152
+
153
+ if message != b"READY\n":
154
+ raise BrokenWorkerProcess(
155
+ f"Worker process returned unexpected response: {message!r}"
156
+ )
157
+
158
+ main_module_path = getattr(sys.modules["__main__"], "__file__", None)
159
+ pickled = pickle.dumps(
160
+ ("init", sys.path, main_module_path),
161
+ protocol=pickle.HIGHEST_PROTOCOL,
162
+ )
163
+ await send_raw_command(pickled)
164
+ except (BrokenWorkerProcess, get_cancelled_exc_class()):
165
+ raise
166
+ except BaseException as exc:
167
+ process.kill()
168
+ raise BrokenWorkerProcess(
169
+ "Error during worker process initialization"
170
+ ) from exc
171
+
172
+ workers.add(process)
173
+
174
+ with CancelScope(shield=not cancellable):
175
+ try:
176
+ return cast(T_Retval, await send_raw_command(request))
177
+ finally:
178
+ if process in workers:
179
+ idle_workers.append((process, current_time()))
180
+
181
+
182
+ def current_default_process_limiter() -> CapacityLimiter:
183
+ """
184
+ Return the capacity limiter that is used by default to limit the number of worker
185
+ processes.
186
+
187
+ :return: a capacity limiter object
188
+
189
+ """
190
+ try:
191
+ return _default_process_limiter.get()
192
+ except LookupError:
193
+ limiter = CapacityLimiter(os.cpu_count() or 2)
194
+ _default_process_limiter.set(limiter)
195
+ return limiter
196
+
197
+
198
+ def process_worker() -> None:
199
+ # Redirect standard streams to os.devnull so that user code won't interfere with the
200
+ # parent-worker communication
201
+ stdin = sys.stdin
202
+ stdout = sys.stdout
203
+ sys.stdin = open(os.devnull)
204
+ sys.stdout = open(os.devnull, "w")
205
+
206
+ stdout.buffer.write(b"READY\n")
207
+ while True:
208
+ retval = exception = None
209
+ try:
210
+ command, *args = pickle.load(stdin.buffer)
211
+ except EOFError:
212
+ return
213
+ except BaseException as exc:
214
+ exception = exc
215
+ else:
216
+ if command == "run":
217
+ func, args = args
218
+ try:
219
+ retval = func(*args)
220
+ except BaseException as exc:
221
+ exception = exc
222
+ elif command == "init":
223
+ main_module_path: str | None
224
+ sys.path, main_module_path = args
225
+ del sys.modules["__main__"]
226
+ if main_module_path and os.path.isfile(main_module_path):
227
+ # Load the parent's main module but as __mp_main__ instead of
228
+ # __main__ (like multiprocessing does) to avoid infinite recursion
229
+ try:
230
+ spec = spec_from_file_location("__mp_main__", main_module_path)
231
+ if spec and spec.loader:
232
+ main = module_from_spec(spec)
233
+ spec.loader.exec_module(main)
234
+ sys.modules["__main__"] = main
235
+ except BaseException as exc:
236
+ exception = exc
237
+ try:
238
+ if exception is not None:
239
+ status = b"EXCEPTION"
240
+ pickled = pickle.dumps(exception, pickle.HIGHEST_PROTOCOL)
241
+ else:
242
+ status = b"RETURN"
243
+ pickled = pickle.dumps(retval, pickle.HIGHEST_PROTOCOL)
244
+ except BaseException as exc:
245
+ exception = exc
246
+ status = b"EXCEPTION"
247
+ pickled = pickle.dumps(exc, pickle.HIGHEST_PROTOCOL)
248
+
249
+ stdout.buffer.write(b"%s %d\n" % (status, len(pickled)))
250
+ stdout.buffer.write(pickled)
251
+
252
+ # Respect SIGTERM
253
+ if isinstance(exception, SystemExit):
254
+ raise exception
255
+
256
+
257
+ if __name__ == "__main__":
258
+ process_worker()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_thread.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from collections.abc import Callable
5
+ from typing import TypeVar
6
+ from warnings import warn
7
+
8
+ from ._core._eventloop import get_async_backend
9
+ from .abc import CapacityLimiter
10
+
11
+ if sys.version_info >= (3, 11):
12
+ from typing import TypeVarTuple, Unpack
13
+ else:
14
+ from typing_extensions import TypeVarTuple, Unpack
15
+
16
+ T_Retval = TypeVar("T_Retval")
17
+ PosArgsT = TypeVarTuple("PosArgsT")
18
+
19
+
20
+ async def run_sync(
21
+ func: Callable[[Unpack[PosArgsT]], T_Retval],
22
+ *args: Unpack[PosArgsT],
23
+ abandon_on_cancel: bool = False,
24
+ cancellable: bool | None = None,
25
+ limiter: CapacityLimiter | None = None,
26
+ ) -> T_Retval:
27
+ """
28
+ Call the given function with the given arguments in a worker thread.
29
+
30
+ If the ``cancellable`` option is enabled and the task waiting for its completion is
31
+ cancelled, the thread will still run its course but its return value (or any raised
32
+ exception) will be ignored.
33
+
34
+ :param func: a callable
35
+ :param args: positional arguments for the callable
36
+ :param abandon_on_cancel: ``True`` to abandon the thread (leaving it to run
37
+ unchecked on own) if the host task is cancelled, ``False`` to ignore
38
+ cancellations in the host task until the operation has completed in the worker
39
+ thread
40
+ :param cancellable: deprecated alias of ``abandon_on_cancel``; will override
41
+ ``abandon_on_cancel`` if both parameters are passed
42
+ :param limiter: capacity limiter to use to limit the total amount of threads running
43
+ (if omitted, the default limiter is used)
44
+ :return: an awaitable that yields the return value of the function.
45
+
46
+ """
47
+ if cancellable is not None:
48
+ abandon_on_cancel = cancellable
49
+ warn(
50
+ "The `cancellable=` keyword argument to `anyio.to_thread.run_sync` is "
51
+ "deprecated since AnyIO 4.1.0; use `abandon_on_cancel=` instead",
52
+ DeprecationWarning,
53
+ stacklevel=2,
54
+ )
55
+
56
+ return await get_async_backend().run_sync_in_worker_thread(
57
+ func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter
58
+ )
59
+
60
+
61
+ def current_default_thread_limiter() -> CapacityLimiter:
62
+ """
63
+ Return the capacity limiter that is used by default to limit the number of
64
+ concurrent threads.
65
+
66
+ :return: a capacity limiter object
67
+
68
+ """
69
+ return get_async_backend().current_default_thread_limiter()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/_compat.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping
4
+ import sys
5
+ from typing import Any
6
+
7
+ DATACLASS_KWARGS: Mapping[str, Any]
8
+ if sys.version_info >= (3, 10):
9
+ DATACLASS_KWARGS = {"slots": True}
10
+ else:
11
+ DATACLASS_KWARGS = {}
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/parser_inline.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tokenizes paragraph content.
2
+ """
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Callable
6
+
7
+ from . import rules_inline
8
+ from .ruler import Ruler
9
+ from .rules_inline.state_inline import StateInline
10
+ from .token import Token
11
+ from .utils import EnvType
12
+
13
+ if TYPE_CHECKING:
14
+ from markdown_it import MarkdownIt
15
+
16
+
17
+ # Parser rules
18
+ RuleFuncInlineType = Callable[[StateInline, bool], bool]
19
+ """(state: StateInline, silent: bool) -> matched: bool)
20
+
21
+ `silent` disables token generation, useful for lookahead.
22
+ """
23
+ _rules: list[tuple[str, RuleFuncInlineType]] = [
24
+ ("text", rules_inline.text),
25
+ ("linkify", rules_inline.linkify),
26
+ ("newline", rules_inline.newline),
27
+ ("escape", rules_inline.escape),
28
+ ("backticks", rules_inline.backtick),
29
+ ("strikethrough", rules_inline.strikethrough.tokenize),
30
+ ("emphasis", rules_inline.emphasis.tokenize),
31
+ ("link", rules_inline.link),
32
+ ("image", rules_inline.image),
33
+ ("autolink", rules_inline.autolink),
34
+ ("html_inline", rules_inline.html_inline),
35
+ ("entity", rules_inline.entity),
36
+ ]
37
+
38
+ # Note `rule2` ruleset was created specifically for emphasis/strikethrough
39
+ # post-processing and may be changed in the future.
40
+ #
41
+ # Don't use this for anything except pairs (plugins working with `balance_pairs`).
42
+ #
43
+ RuleFuncInline2Type = Callable[[StateInline], None]
44
+ _rules2: list[tuple[str, RuleFuncInline2Type]] = [
45
+ ("balance_pairs", rules_inline.link_pairs),
46
+ ("strikethrough", rules_inline.strikethrough.postProcess),
47
+ ("emphasis", rules_inline.emphasis.postProcess),
48
+ # rules for pairs separate '**' into its own text tokens, which may be left unused,
49
+ # rule below merges unused segments back with the rest of the text
50
+ ("fragments_join", rules_inline.fragments_join),
51
+ ]
52
+
53
+
54
+ class ParserInline:
55
+ def __init__(self) -> None:
56
+ self.ruler = Ruler[RuleFuncInlineType]()
57
+ for name, rule in _rules:
58
+ self.ruler.push(name, rule)
59
+ # Second ruler used for post-processing (e.g. in emphasis-like rules)
60
+ self.ruler2 = Ruler[RuleFuncInline2Type]()
61
+ for name, rule2 in _rules2:
62
+ self.ruler2.push(name, rule2)
63
+
64
+ def skipToken(self, state: StateInline) -> None:
65
+ """Skip single token by running all rules in validation mode;
66
+ returns `True` if any rule reported success
67
+ """
68
+ ok = False
69
+ pos = state.pos
70
+ rules = self.ruler.getRules("")
71
+ maxNesting = state.md.options["maxNesting"]
72
+ cache = state.cache
73
+
74
+ if pos in cache:
75
+ state.pos = cache[pos]
76
+ return
77
+
78
+ if state.level < maxNesting:
79
+ for rule in rules:
80
+ # Increment state.level and decrement it later to limit recursion.
81
+ # It's harmless to do here, because no tokens are created.
82
+ # But ideally, we'd need a separate private state variable for this purpose.
83
+ state.level += 1
84
+ ok = rule(state, True)
85
+ state.level -= 1
86
+ if ok:
87
+ break
88
+ else:
89
+ # Too much nesting, just skip until the end of the paragraph.
90
+ #
91
+ # NOTE: this will cause links to behave incorrectly in the following case,
92
+ # when an amount of `[` is exactly equal to `maxNesting + 1`:
93
+ #
94
+ # [[[[[[[[[[[[[[[[[[[[[foo]()
95
+ #
96
+ # TODO: remove this workaround when CM standard will allow nested links
97
+ # (we can replace it by preventing links from being parsed in
98
+ # validation mode)
99
+ #
100
+ state.pos = state.posMax
101
+
102
+ if not ok:
103
+ state.pos += 1
104
+ cache[pos] = state.pos
105
+
106
+ def tokenize(self, state: StateInline) -> None:
107
+ """Generate tokens for input range."""
108
+ ok = False
109
+ rules = self.ruler.getRules("")
110
+ end = state.posMax
111
+ maxNesting = state.md.options["maxNesting"]
112
+
113
+ while state.pos < end:
114
+ # Try all possible rules.
115
+ # On success, rule should:
116
+ #
117
+ # - update `state.pos`
118
+ # - update `state.tokens`
119
+ # - return true
120
+
121
+ if state.level < maxNesting:
122
+ for rule in rules:
123
+ ok = rule(state, False)
124
+ if ok:
125
+ break
126
+
127
+ if ok:
128
+ if state.pos >= end:
129
+ break
130
+ continue
131
+
132
+ state.pending += state.src[state.pos]
133
+ state.pos += 1
134
+
135
+ if state.pending:
136
+ state.pushPending()
137
+
138
+ def parse(
139
+ self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
140
+ ) -> list[Token]:
141
+ """Process input string and push inline tokens into `tokens`"""
142
+ state = StateInline(src, md, env, tokens)
143
+ self.tokenize(state)
144
+ rules2 = self.ruler2.getRules("")
145
+ for rule in rules2:
146
+ rule(state)
147
+ return state.tokens
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/port.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - package: markdown-it/markdown-it
2
+ version: 13.0.1
3
+ commit: e843acc9edad115cbf8cf85e676443f01658be08
4
+ date: May 3, 2022
5
+ notes:
6
+ - Rename variables that use python built-in names, e.g.
7
+ - `max` -> `maximum`
8
+ - `len` -> `length`
9
+ - `str` -> `string`
10
+ - |
11
+ Convert JS `for` loops to `while` loops
12
+ this is generally the main difference between the codes,
13
+ because in python you can't do e.g. `for {i=1;i<x;i++} {}`
14
+ - |
15
+ `env` is a common Python dictionary, and so does not have attribute access to keys,
16
+ as with JavaScript dictionaries.
17
+ `options` have attribute access only to core markdownit configuration options
18
+ - |
19
+ `Token.attrs` is a dictionary, instead of a list of lists.
20
+ Upstream the list format is only used to guarantee order: https://github.com/markdown-it/markdown-it/issues/142,
21
+ but in Python 3.7+ order of dictionaries is guaranteed.
22
+ One should anyhow use the `attrGet`, `attrSet`, `attrPush` and `attrJoin` methods
23
+ to manipulate `Token.attrs`, which have an identical signature to those upstream.
24
+ - Use python version of `charCodeAt`
25
+ - |
26
+ Use `str` units instead of `int`s to represent Unicode codepoints.
27
+ This provides a significant performance boost
28
+ - |
29
+ In markdown_it/rules_block/reference.py,
30
+ record line range in state.env["references"] and add state.env["duplicate_refs"]
31
+ This is to allow renderers to report on issues regarding references
32
+ - |
33
+ The `MarkdownIt.__init__` signature is slightly different for updating options,
34
+ since you must always specify the config first, e.g.
35
+ use `MarkdownIt("commonmark", {"html": False})` instead of `MarkdownIt({"html": False})`
36
+ - The default configuration preset for `MarkdownIt` is "commonmark" not "default"
37
+ - Allow custom renderer to be passed to `MarkdownIt`
38
+ - |
39
+ change render method signatures
40
+ `func(tokens, idx, options, env, slf)` to
41
+ `func(self, tokens, idx, options, env)`
42
+ - |
43
+ Extensions add render methods by format
44
+ `MarkdownIt.add_render_rule(name, function, fmt="html")`,
45
+ rather than `MarkdownIt.renderer.rules[name] = function`
46
+ and renderers should declare a class property `__output__ = "html"`.
47
+ This allows for extensibility to more than just HTML renderers
48
+ - inline tokens in tables are assigned a map (this is helpful for propagation to children)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/token.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, MutableMapping
4
+ import dataclasses as dc
5
+ from typing import Any, Literal
6
+ import warnings
7
+
8
+ from markdown_it._compat import DATACLASS_KWARGS
9
+
10
+
11
+ def convert_attrs(value: Any) -> Any:
12
+ """Convert Token.attrs set as ``None`` or ``[[key, value], ...]`` to a dict.
13
+
14
+ This improves compatibility with upstream markdown-it.
15
+ """
16
+ if not value:
17
+ return {}
18
+ if isinstance(value, list):
19
+ return dict(value)
20
+ return value
21
+
22
+
23
+ @dc.dataclass(**DATACLASS_KWARGS)
24
+ class Token:
25
+ type: str
26
+ """Type of the token (string, e.g. "paragraph_open")"""
27
+
28
+ tag: str
29
+ """HTML tag name, e.g. 'p'"""
30
+
31
+ nesting: Literal[-1, 0, 1]
32
+ """Level change (number in {-1, 0, 1} set), where:
33
+ - `1` means the tag is opening
34
+ - `0` means the tag is self-closing
35
+ - `-1` means the tag is closing
36
+ """
37
+
38
+ attrs: dict[str, str | int | float] = dc.field(default_factory=dict)
39
+ """HTML attributes.
40
+ Note this differs from the upstream "list of lists" format,
41
+ although than an instance can still be initialised with this format.
42
+ """
43
+
44
+ map: list[int] | None = None
45
+ """Source map info. Format: `[ line_begin, line_end ]`"""
46
+
47
+ level: int = 0
48
+ """Nesting level, the same as `state.level`"""
49
+
50
+ children: list[Token] | None = None
51
+ """Array of child nodes (inline and img tokens)."""
52
+
53
+ content: str = ""
54
+ """Inner content, in the case of a self-closing tag (code, html, fence, etc.),"""
55
+
56
+ markup: str = ""
57
+ """'*' or '_' for emphasis, fence string for fence, etc."""
58
+
59
+ info: str = ""
60
+ """Additional information:
61
+ - Info string for "fence" tokens
62
+ - The value "auto" for autolink "link_open" and "link_close" tokens
63
+ - The string value of the item marker for ordered-list "list_item_open" tokens
64
+ """
65
+
66
+ meta: dict[Any, Any] = dc.field(default_factory=dict)
67
+ """A place for plugins to store any arbitrary data"""
68
+
69
+ block: bool = False
70
+ """True for block-level tokens, false for inline tokens.
71
+ Used in renderer to calculate line breaks
72
+ """
73
+
74
+ hidden: bool = False
75
+ """If true, ignore this element when rendering.
76
+ Used for tight lists to hide paragraphs.
77
+ """
78
+
79
+ def __post_init__(self) -> None:
80
+ self.attrs = convert_attrs(self.attrs)
81
+
82
+ def attrIndex(self, name: str) -> int:
83
+ warnings.warn( # noqa: B028
84
+ "Token.attrIndex should not be used, since Token.attrs is a dictionary",
85
+ UserWarning,
86
+ )
87
+ if name not in self.attrs:
88
+ return -1
89
+ return list(self.attrs.keys()).index(name)
90
+
91
+ def attrItems(self) -> list[tuple[str, str | int | float]]:
92
+ """Get (key, value) list of attrs."""
93
+ return list(self.attrs.items())
94
+
95
+ def attrPush(self, attrData: tuple[str, str | int | float]) -> None:
96
+ """Add `[ name, value ]` attribute to list. Init attrs if necessary."""
97
+ name, value = attrData
98
+ self.attrSet(name, value)
99
+
100
+ def attrSet(self, name: str, value: str | int | float) -> None:
101
+ """Set `name` attribute to `value`. Override old value if exists."""
102
+ self.attrs[name] = value
103
+
104
+ def attrGet(self, name: str) -> None | str | int | float:
105
+ """Get the value of attribute `name`, or null if it does not exist."""
106
+ return self.attrs.get(name, None)
107
+
108
+ def attrJoin(self, name: str, value: str) -> None:
109
+ """Join value to existing attribute via space.
110
+ Or create new attribute if not exists.
111
+ Useful to operate with token classes.
112
+ """
113
+ if name in self.attrs:
114
+ current = self.attrs[name]
115
+ if not isinstance(current, str):
116
+ raise TypeError(
117
+ f"existing attr 'name' is not a str: {self.attrs[name]}"
118
+ )
119
+ self.attrs[name] = f"{current} {value}"
120
+ else:
121
+ self.attrs[name] = value
122
+
123
+ def copy(self, **changes: Any) -> Token:
124
+ """Return a shallow copy of the instance."""
125
+ return dc.replace(self, **changes)
126
+
127
+ def as_dict(
128
+ self,
129
+ *,
130
+ children: bool = True,
131
+ as_upstream: bool = True,
132
+ meta_serializer: Callable[[dict[Any, Any]], Any] | None = None,
133
+ filter: Callable[[str, Any], bool] | None = None,
134
+ dict_factory: Callable[..., MutableMapping[str, Any]] = dict,
135
+ ) -> MutableMapping[str, Any]:
136
+ """Return the token as a dictionary.
137
+
138
+ :param children: Also convert children to dicts
139
+ :param as_upstream: Ensure the output dictionary is equal to that created by markdown-it
140
+ For example, attrs are converted to null or lists
141
+ :param meta_serializer: hook for serializing ``Token.meta``
142
+ :param filter: A callable whose return code determines whether an
143
+ attribute or element is included (``True``) or dropped (``False``).
144
+ Is called with the (key, value) pair.
145
+ :param dict_factory: A callable to produce dictionaries from.
146
+ For example, to produce ordered dictionaries instead of normal Python
147
+ dictionaries, pass in ``collections.OrderedDict``.
148
+
149
+ """
150
+ mapping = dict_factory((f.name, getattr(self, f.name)) for f in dc.fields(self))
151
+ if filter:
152
+ mapping = dict_factory((k, v) for k, v in mapping.items() if filter(k, v))
153
+ if as_upstream and "attrs" in mapping:
154
+ mapping["attrs"] = (
155
+ None
156
+ if not mapping["attrs"]
157
+ else [[k, v] for k, v in mapping["attrs"].items()]
158
+ )
159
+ if meta_serializer and "meta" in mapping:
160
+ mapping["meta"] = meta_serializer(mapping["meta"])
161
+ if children and mapping.get("children", None):
162
+ mapping["children"] = [
163
+ child.as_dict(
164
+ children=children,
165
+ filter=filter,
166
+ dict_factory=dict_factory,
167
+ as_upstream=as_upstream,
168
+ meta_serializer=meta_serializer,
169
+ )
170
+ for child in mapping["children"]
171
+ ]
172
+ return mapping
173
+
174
+ @classmethod
175
+ def from_dict(cls, dct: MutableMapping[str, Any]) -> Token:
176
+ """Convert a dict to a Token."""
177
+ token = cls(**dct)
178
+ if token.children:
179
+ token.children = [cls.from_dict(c) for c in token.children] # type: ignore[arg-type]
180
+ return token
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/METADATA ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.2
2
+ Name: propcache
3
+ Version: 0.3.0
4
+ Summary: Accelerated property cache
5
+ Home-page: https://github.com/aio-libs/propcache
6
+ Author: Andrew Svetlov
7
+ Author-email: andrew.svetlov@gmail.com
8
+ Maintainer: aiohttp team <team@aiohttp.org>
9
+ Maintainer-email: team@aiohttp.org
10
+ License: Apache-2.0
11
+ Project-URL: Chat: Matrix, https://matrix.to/#/#aio-libs:matrix.org
12
+ Project-URL: Chat: Matrix Space, https://matrix.to/#/#aio-libs-space:matrix.org
13
+ Project-URL: CI: GitHub Workflows, https://github.com/aio-libs/propcache/actions?query=branch:master
14
+ Project-URL: Code of Conduct, https://github.com/aio-libs/.github/blob/master/CODE_OF_CONDUCT.md
15
+ Project-URL: Coverage: codecov, https://codecov.io/github/aio-libs/propcache
16
+ Project-URL: Docs: Changelog, https://propcache.readthedocs.io/en/latest/changes/
17
+ Project-URL: Docs: RTD, https://propcache.readthedocs.io
18
+ Project-URL: GitHub: issues, https://github.com/aio-libs/propcache/issues
19
+ Project-URL: GitHub: repo, https://github.com/aio-libs/propcache
20
+ Keywords: cython,cext,propcache
21
+ Classifier: Development Status :: 5 - Production/Stable
22
+ Classifier: Intended Audience :: Developers
23
+ Classifier: License :: OSI Approved :: Apache Software License
24
+ Classifier: Programming Language :: Cython
25
+ Classifier: Programming Language :: Python
26
+ Classifier: Programming Language :: Python :: 3
27
+ Classifier: Programming Language :: Python :: 3.9
28
+ Classifier: Programming Language :: Python :: 3.10
29
+ Classifier: Programming Language :: Python :: 3.11
30
+ Classifier: Programming Language :: Python :: 3.12
31
+ Classifier: Programming Language :: Python :: 3.13
32
+ Classifier: Topic :: Internet :: WWW/HTTP
33
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
34
+ Requires-Python: >=3.9
35
+ Description-Content-Type: text/x-rst
36
+ License-File: LICENSE
37
+ License-File: NOTICE
38
+
39
+ propcache
40
+ =========
41
+
42
+ The module provides a fast implementation of cached properties for Python 3.9+.
43
+
44
+ .. image:: https://github.com/aio-libs/propcache/actions/workflows/ci-cd.yml/badge.svg
45
+ :target: https://github.com/aio-libs/propcache/actions?query=workflow%3ACI
46
+ :align: right
47
+
48
+ .. image:: https://codecov.io/gh/aio-libs/propcache/branch/master/graph/badge.svg
49
+ :target: https://codecov.io/gh/aio-libs/propcache
50
+
51
+ .. image:: https://badge.fury.io/py/propcache.svg
52
+ :target: https://badge.fury.io/py/propcache
53
+
54
+
55
+ .. image:: https://readthedocs.org/projects/propcache/badge/?version=latest
56
+ :target: https://propcache.readthedocs.io
57
+
58
+
59
+ .. image:: https://img.shields.io/pypi/pyversions/propcache.svg
60
+ :target: https://pypi.python.org/pypi/propcache
61
+
62
+ .. image:: https://img.shields.io/matrix/aio-libs:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
63
+ :target: https://matrix.to/#/%23aio-libs:matrix.org
64
+ :alt: Matrix Room — #aio-libs:matrix.org
65
+
66
+ .. image:: https://img.shields.io/matrix/aio-libs-space:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs-space%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
67
+ :target: https://matrix.to/#/%23aio-libs-space:matrix.org
68
+ :alt: Matrix Space — #aio-libs-space:matrix.org
69
+
70
+ Introduction
71
+ ------------
72
+
73
+ The API is designed to be nearly identical to the built-in ``functools.cached_property`` class,
74
+ except for the additional ``under_cached_property`` class which uses ``self._cache``
75
+ instead of ``self.__dict__`` to store the cached values and prevents ``__set__`` from being called.
76
+
77
+ For full documentation please read https://propcache.readthedocs.io.
78
+
79
+ Installation
80
+ ------------
81
+
82
+ ::
83
+
84
+ $ pip install propcache
85
+
86
+ The library is Python 3 only!
87
+
88
+ PyPI contains binary wheels for Linux, Windows and MacOS. If you want to install
89
+ ``propcache`` on another operating system where wheels are not provided,
90
+ the the tarball will be used to compile the library from
91
+ the source code. It requires a C compiler and and Python headers installed.
92
+
93
+ To skip the compilation you must explicitly opt-in by using a PEP 517
94
+ configuration setting ``pure-python``, or setting the ``PROPCACHE_NO_EXTENSIONS``
95
+ environment variable to a non-empty value, e.g.:
96
+
97
+ .. code-block:: console
98
+
99
+ $ pip install propcache --config-settings=pure-python=false
100
+
101
+ Please note that the pure-Python (uncompiled) version is much slower. However,
102
+ PyPy always uses a pure-Python implementation, and, as such, it is unaffected
103
+ by this variable.
104
+
105
+
106
+ API documentation
107
+ ------------------
108
+
109
+ The documentation is located at https://propcache.readthedocs.io.
110
+
111
+ Source code
112
+ -----------
113
+
114
+ The project is hosted on GitHub_
115
+
116
+ Please file an issue on the `bug tracker
117
+ <https://github.com/aio-libs/propcache/issues>`_ if you have found a bug
118
+ or have some suggestion in order to improve the library.
119
+
120
+ Discussion list
121
+ ---------------
122
+
123
+ *aio-libs* google group: https://groups.google.com/forum/#!forum/aio-libs
124
+
125
+ Feel free to post your questions and ideas here.
126
+
127
+
128
+ Authors and License
129
+ -------------------
130
+
131
+ The ``propcache`` package is derived from ``yarl`` which is written by Andrew Svetlov.
132
+
133
+ It's *Apache 2* licensed and freely available.
134
+
135
+
136
+ .. _GitHub: https://github.com/aio-libs/propcache
137
+
138
+ =========
139
+ Changelog
140
+ =========
141
+
142
+ ..
143
+ You should *NOT* be adding new change log entries to this file, this
144
+ file is managed by towncrier. You *may* edit previous change logs to
145
+ fix problems like typo corrections or such.
146
+ To add a new change log entry, please see
147
+ https://pip.pypa.io/en/latest/development/#adding-a-news-entry
148
+ we named the news folder "changes".
149
+
150
+ WARNING: Don't drop the next directive!
151
+
152
+ .. towncrier release notes start
153
+
154
+ 0.3.0
155
+ =====
156
+
157
+ *(2025-02-20)*
158
+
159
+
160
+ Features
161
+ --------
162
+
163
+ - Implemented support for the free-threaded build of CPython 3.13 -- by `@lysnikolaou <https://github.com/sponsors/lysnikolaou>`__.
164
+
165
+ *Related issues and pull requests on GitHub:*
166
+ `#84 <https://github.com/aio-libs/propcache/issues/84>`__.
167
+
168
+
169
+ Packaging updates and notes for downstreams
170
+ -------------------------------------------
171
+
172
+ - Started building wheels for the free-threaded build of CPython 3.13 -- by `@lysnikolaou <https://github.com/sponsors/lysnikolaou>`__.
173
+
174
+ *Related issues and pull requests on GitHub:*
175
+ `#84 <https://github.com/aio-libs/propcache/issues/84>`__.
176
+
177
+
178
+ Contributor-facing changes
179
+ --------------------------
180
+
181
+ - GitHub Actions CI/CD is now configured to manage caching pip-ecosystem
182
+ dependencies using `re-actors/cache-python-deps`_ -- an action by
183
+ `@webknjaz <https://github.com/sponsors/webknjaz>`__ that takes into account ABI stability and the exact
184
+ version of Python runtime.
185
+
186
+ .. _`re-actors/cache-python-deps`:
187
+ https://github.com/marketplace/actions/cache-python-deps
188
+
189
+ *Related issues and pull requests on GitHub:*
190
+ `#93 <https://github.com/aio-libs/propcache/issues/93>`__.
191
+
192
+
193
+ ----
194
+
195
+
196
+ 0.2.1
197
+ =====
198
+
199
+ *(2024-12-01)*
200
+
201
+
202
+ Bug fixes
203
+ ---------
204
+
205
+ - Stopped implicitly allowing the use of Cython pre-release versions when
206
+ building the distribution package -- by `@ajsanchezsanz <https://github.com/sponsors/ajsanchezsanz>`__ and
207
+ `@markgreene74 <https://github.com/sponsors/markgreene74>`__.
208
+
209
+ *Related commits on GitHub:*
210
+ `64df0a6 <https://github.com/aio-libs/propcache/commit/64df0a6>`__.
211
+
212
+ - Fixed ``wrapped`` and ``func`` not being accessible in the Cython versions of ``propcache.api.cached_property`` and ``propcache.api.under_cached_property`` decorators -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
213
+
214
+ *Related issues and pull requests on GitHub:*
215
+ `#72 <https://github.com/aio-libs/propcache/issues/72>`__.
216
+
217
+
218
+ Removals and backward incompatible breaking changes
219
+ ---------------------------------------------------
220
+
221
+ - Removed support for Python 3.8 as it has reached end of life -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
222
+
223
+ *Related issues and pull requests on GitHub:*
224
+ `#57 <https://github.com/aio-libs/propcache/issues/57>`__.
225
+
226
+
227
+ Packaging updates and notes for downstreams
228
+ -------------------------------------------
229
+
230
+ - Stopped implicitly allowing the use of Cython pre-release versions when
231
+ building the distribution package -- by `@ajsanchezsanz <https://github.com/sponsors/ajsanchezsanz>`__ and
232
+ `@markgreene74 <https://github.com/sponsors/markgreene74>`__.
233
+
234
+ *Related commits on GitHub:*
235
+ `64df0a6 <https://github.com/aio-libs/propcache/commit/64df0a6>`__.
236
+
237
+
238
+ ----
239
+
240
+
241
+ 0.2.0
242
+ =====
243
+
244
+ *(2024-10-07)*
245
+
246
+
247
+ Bug fixes
248
+ ---------
249
+
250
+ - Fixed loading the C-extensions on Python 3.8 -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
251
+
252
+ *Related issues and pull requests on GitHub:*
253
+ `#26 <https://github.com/aio-libs/propcache/issues/26>`__.
254
+
255
+
256
+ Features
257
+ --------
258
+
259
+ - Improved typing for the ``propcache.api.under_cached_property`` decorator -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
260
+
261
+ *Related issues and pull requests on GitHub:*
262
+ `#38 <https://github.com/aio-libs/propcache/issues/38>`__.
263
+
264
+
265
+ Improved documentation
266
+ ----------------------
267
+
268
+ - Added API documentation for the ``propcache.api.cached_property`` and ``propcache.api.under_cached_property`` decorators -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
269
+
270
+ *Related issues and pull requests on GitHub:*
271
+ `#16 <https://github.com/aio-libs/propcache/issues/16>`__.
272
+
273
+
274
+ Packaging updates and notes for downstreams
275
+ -------------------------------------------
276
+
277
+ - Moved ``propcache.api.under_cached_property`` and ``propcache.api.cached_property`` to `propcache.api` -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
278
+
279
+ Both decorators remain importable from the top-level package, however importing from `propcache.api` is now the recommended way to use them.
280
+
281
+ *Related issues and pull requests on GitHub:*
282
+ `#19 <https://github.com/aio-libs/propcache/issues/19>`__, `#24 <https://github.com/aio-libs/propcache/issues/24>`__, `#32 <https://github.com/aio-libs/propcache/issues/32>`__.
283
+
284
+ - Converted project to use a src layout -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
285
+
286
+ *Related issues and pull requests on GitHub:*
287
+ `#22 <https://github.com/aio-libs/propcache/issues/22>`__, `#29 <https://github.com/aio-libs/propcache/issues/29>`__, `#37 <https://github.com/aio-libs/propcache/issues/37>`__.
288
+
289
+
290
+ ----
291
+
292
+
293
+ 0.1.0
294
+ =====
295
+
296
+ *(2024-10-03)*
297
+
298
+
299
+ Features
300
+ --------
301
+
302
+ - Added ``armv7l`` wheels -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
303
+
304
+ *Related issues and pull requests on GitHub:*
305
+ `#5 <https://github.com/aio-libs/propcache/issues/5>`__.
306
+
307
+
308
+ ----
309
+
310
+
311
+ 0.0.0
312
+ =====
313
+
314
+ *(2024-10-02)*
315
+
316
+
317
+ - Initial release.
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/NOTICE ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2016-2021, Andrew Svetlov and aio-libs team
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/RECORD ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ propcache-0.3.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ propcache-0.3.0.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
3
+ propcache-0.3.0.dist-info/METADATA,sha256=EVZE30m1bHCrcoZ2RLtvfNkdXyHIInAIG39SNZEjLyE,10357
4
+ propcache-0.3.0.dist-info/NOTICE,sha256=VtasbIEFwKUTBMIdsGDjYa-ajqCvmnXCOcKLXRNpODg,609
5
+ propcache-0.3.0.dist-info/RECORD,,
6
+ propcache-0.3.0.dist-info/WHEEL,sha256=siqMuoWpRueIZ87ijidBxnOwHeSOOcxNwYCs-pC4Yv0,151
7
+ propcache-0.3.0.dist-info/top_level.txt,sha256=pVF_GbqSAITPMiX27kfU3QP9-ufhRvkADmudDxWdF3w,10
8
+ propcache/__init__.py,sha256=82yOKjJMHwsj2IpsIfiuDumvBEOckaz2HB823YDJH4Y,965
9
+ propcache/__pycache__/__init__.cpython-312.pyc,,
10
+ propcache/__pycache__/_helpers.cpython-312.pyc,,
11
+ propcache/__pycache__/_helpers_py.cpython-312.pyc,,
12
+ propcache/__pycache__/api.cpython-312.pyc,,
13
+ propcache/_helpers.py,sha256=8CnlWmfTM6RDbMvNDXwL-VMHWiwIUjG8nbeqmvRsbh8,1579
14
+ propcache/_helpers_c.cpython-312-x86_64-linux-gnu.so,sha256=mqnzbvcdqj2KOhx9xGMMjatXLlEuB69xfnsCF5O9G8I,844512
15
+ propcache/_helpers_c.pyx,sha256=9UqfhVrbbkiZDGtEPFEOfT7qghPjAkNtJpgI1JYUPao,2518
16
+ propcache/_helpers_py.py,sha256=jnK6W43iETLcW-A1WMroGUKnElzX8Drw2UQfbEqLlI8,1637
17
+ propcache/api.py,sha256=wvgB-ypkkI5uf72VVYl2NFGc_TnzUQA2CxC7dTlL5ak,179
18
+ propcache/py.typed,sha256=ay5OMO475PlcZ_Fbun9maHW7Y6MBTk0UXL4ztHx3Iug,14
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/WHEEL ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.8.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp312-cp312-manylinux_2_17_x86_64
5
+ Tag: cp312-cp312-manylinux2014_x86_64
6
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ propcache
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/__init__.py ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # flake8: noqa
19
+
20
+ """
21
+ PyArrow is the python implementation of Apache Arrow.
22
+
23
+ Apache Arrow is a cross-language development platform for in-memory data.
24
+ It specifies a standardized language-independent columnar memory format for
25
+ flat and hierarchical data, organized for efficient analytic operations on
26
+ modern hardware. It also provides computational libraries and zero-copy
27
+ streaming messaging and interprocess communication.
28
+
29
+ For more information see the official page at https://arrow.apache.org
30
+ """
31
+
32
+ import gc as _gc
33
+ import importlib as _importlib
34
+ import os as _os
35
+ import platform as _platform
36
+ import sys as _sys
37
+ import warnings as _warnings
38
+
39
+ try:
40
+ from ._generated_version import version as __version__
41
+ except ImportError:
42
+ # Package is not installed, parse git tag at runtime
43
+ try:
44
+ import setuptools_scm
45
+ # Code duplicated from setup.py to avoid a dependency on each other
46
+
47
+ def parse_git(root, **kwargs):
48
+ """
49
+ Parse function for setuptools_scm that ignores tags for non-C++
50
+ subprojects, e.g. apache-arrow-js-XXX tags.
51
+ """
52
+ from setuptools_scm.git import parse
53
+ kwargs['describe_command'] = \
54
+ "git describe --dirty --tags --long --match 'apache-arrow-[0-9]*.*'"
55
+ return parse(root, **kwargs)
56
+ __version__ = setuptools_scm.get_version('../',
57
+ parse=parse_git)
58
+ except ImportError:
59
+ __version__ = None
60
+
61
+ # ARROW-8684: Disable GC while initializing Cython extension module,
62
+ # to workaround Cython bug in https://github.com/cython/cython/issues/3603
63
+ _gc_enabled = _gc.isenabled()
64
+ _gc.disable()
65
+ import pyarrow.lib as _lib
66
+ if _gc_enabled:
67
+ _gc.enable()
68
+
69
+ from pyarrow.lib import (BuildInfo, RuntimeInfo, set_timezone_db_path,
70
+ MonthDayNano, VersionInfo, cpp_build_info,
71
+ cpp_version, cpp_version_info, runtime_info,
72
+ cpu_count, set_cpu_count, enable_signal_handlers,
73
+ io_thread_count, set_io_thread_count)
74
+
75
+
76
+ def show_versions():
77
+ """
78
+ Print various version information, to help with error reporting.
79
+ """
80
+ def print_entry(label, value):
81
+ print(f"{label: <26}: {value: <8}")
82
+
83
+ print("pyarrow version info\n--------------------")
84
+ print_entry("Package kind", cpp_build_info.package_kind
85
+ if len(cpp_build_info.package_kind) > 0
86
+ else "not indicated")
87
+ print_entry("Arrow C++ library version", cpp_build_info.version)
88
+ print_entry("Arrow C++ compiler",
89
+ f"{cpp_build_info.compiler_id} {cpp_build_info.compiler_version}")
90
+ print_entry("Arrow C++ compiler flags", cpp_build_info.compiler_flags)
91
+ print_entry("Arrow C++ git revision", cpp_build_info.git_id)
92
+ print_entry("Arrow C++ git description", cpp_build_info.git_description)
93
+ print_entry("Arrow C++ build type", cpp_build_info.build_type)
94
+
95
+
96
+ def _module_is_available(module):
97
+ try:
98
+ _importlib.import_module(f'pyarrow.{module}')
99
+ except ImportError:
100
+ return False
101
+ else:
102
+ return True
103
+
104
+
105
+ def _filesystem_is_available(fs):
106
+ try:
107
+ import pyarrow.fs
108
+ except ImportError:
109
+ return False
110
+
111
+ try:
112
+ getattr(pyarrow.fs, fs)
113
+ except (ImportError, AttributeError):
114
+ return False
115
+ else:
116
+ return True
117
+
118
+
119
+ def show_info():
120
+ """
121
+ Print detailed version and platform information, for error reporting
122
+ """
123
+ show_versions()
124
+
125
+ def print_entry(label, value):
126
+ print(f" {label: <20}: {value: <8}")
127
+
128
+ print("\nPlatform:")
129
+ print_entry("OS / Arch", f"{_platform.system()} {_platform.machine()}")
130
+ print_entry("SIMD Level", runtime_info().simd_level)
131
+ print_entry("Detected SIMD Level", runtime_info().detected_simd_level)
132
+
133
+ pool = default_memory_pool()
134
+ print("\nMemory:")
135
+ print_entry("Default backend", pool.backend_name)
136
+ print_entry("Bytes allocated", f"{pool.bytes_allocated()} bytes")
137
+ print_entry("Max memory", f"{pool.max_memory()} bytes")
138
+ print_entry("Supported Backends", ', '.join(supported_memory_backends()))
139
+
140
+ print("\nOptional modules:")
141
+ modules = ["csv", "cuda", "dataset", "feather", "flight", "fs", "gandiva", "json",
142
+ "orc", "parquet"]
143
+ for module in modules:
144
+ status = "Enabled" if _module_is_available(module) else "-"
145
+ print(f" {module: <20}: {status: <8}")
146
+
147
+ print("\nFilesystems:")
148
+ filesystems = ["AzureFileSystem", "GcsFileSystem",
149
+ "HadoopFileSystem", "S3FileSystem"]
150
+ for fs in filesystems:
151
+ status = "Enabled" if _filesystem_is_available(fs) else "-"
152
+ print(f" {fs: <20}: {status: <8}")
153
+
154
+ print("\nCompression Codecs:")
155
+ codecs = ["brotli", "bz2", "gzip", "lz4_frame", "lz4", "snappy", "zstd"]
156
+ for codec in codecs:
157
+ status = "Enabled" if Codec.is_available(codec) else "-"
158
+ print(f" {codec: <20}: {status: <8}")
159
+
160
+
161
+ from pyarrow.lib import (null, bool_,
162
+ int8, int16, int32, int64,
163
+ uint8, uint16, uint32, uint64,
164
+ time32, time64, timestamp, date32, date64, duration,
165
+ month_day_nano_interval,
166
+ float16, float32, float64,
167
+ binary, string, utf8, binary_view, string_view,
168
+ large_binary, large_string, large_utf8,
169
+ decimal32, decimal64, decimal128, decimal256,
170
+ list_, large_list, list_view, large_list_view,
171
+ map_, struct,
172
+ union, sparse_union, dense_union,
173
+ dictionary,
174
+ run_end_encoded,
175
+ bool8, fixed_shape_tensor, json_, opaque, uuid,
176
+ field,
177
+ type_for_alias,
178
+ DataType, DictionaryType, StructType,
179
+ ListType, LargeListType, FixedSizeListType,
180
+ ListViewType, LargeListViewType,
181
+ MapType, UnionType, SparseUnionType, DenseUnionType,
182
+ TimestampType, Time32Type, Time64Type, DurationType,
183
+ FixedSizeBinaryType,
184
+ Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type,
185
+ BaseExtensionType, ExtensionType,
186
+ RunEndEncodedType, Bool8Type, FixedShapeTensorType,
187
+ JsonType, OpaqueType, UuidType,
188
+ PyExtensionType, UnknownExtensionType,
189
+ register_extension_type, unregister_extension_type,
190
+ DictionaryMemo,
191
+ KeyValueMetadata,
192
+ Field,
193
+ Schema,
194
+ schema,
195
+ unify_schemas,
196
+ Array, Tensor,
197
+ array, chunked_array, record_batch, nulls, repeat,
198
+ SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix,
199
+ SparseCSFTensor,
200
+ infer_type, from_numpy_dtype,
201
+ NullArray,
202
+ NumericArray, IntegerArray, FloatingPointArray,
203
+ BooleanArray,
204
+ Int8Array, UInt8Array,
205
+ Int16Array, UInt16Array,
206
+ Int32Array, UInt32Array,
207
+ Int64Array, UInt64Array,
208
+ HalfFloatArray, FloatArray, DoubleArray,
209
+ ListArray, LargeListArray, FixedSizeListArray,
210
+ ListViewArray, LargeListViewArray,
211
+ MapArray, UnionArray,
212
+ BinaryArray, StringArray,
213
+ LargeBinaryArray, LargeStringArray,
214
+ BinaryViewArray, StringViewArray,
215
+ FixedSizeBinaryArray,
216
+ DictionaryArray,
217
+ Date32Array, Date64Array, TimestampArray,
218
+ Time32Array, Time64Array, DurationArray,
219
+ MonthDayNanoIntervalArray,
220
+ Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
221
+ StructArray, ExtensionArray,
222
+ RunEndEncodedArray, Bool8Array, FixedShapeTensorArray,
223
+ JsonArray, OpaqueArray, UuidArray,
224
+ scalar, NA, _NULL as NULL, Scalar,
225
+ NullScalar, BooleanScalar,
226
+ Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
227
+ UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
228
+ HalfFloatScalar, FloatScalar, DoubleScalar,
229
+ Decimal32Scalar, Decimal64Scalar, Decimal128Scalar, Decimal256Scalar,
230
+ ListScalar, LargeListScalar, FixedSizeListScalar,
231
+ ListViewScalar, LargeListViewScalar,
232
+ Date32Scalar, Date64Scalar,
233
+ Time32Scalar, Time64Scalar,
234
+ TimestampScalar, DurationScalar,
235
+ MonthDayNanoIntervalScalar,
236
+ BinaryScalar, LargeBinaryScalar, BinaryViewScalar,
237
+ StringScalar, LargeStringScalar, StringViewScalar,
238
+ FixedSizeBinaryScalar, DictionaryScalar,
239
+ MapScalar, StructScalar, UnionScalar,
240
+ RunEndEncodedScalar, Bool8Scalar, ExtensionScalar,
241
+ FixedShapeTensorScalar, JsonScalar, OpaqueScalar, UuidScalar)
242
+
243
+ # Buffers, allocation
244
+ from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
245
+ default_cpu_memory_manager)
246
+
247
+ from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
248
+ Codec, compress, decompress, allocate_buffer)
249
+
250
+ from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
251
+ total_allocated_bytes, set_memory_pool,
252
+ default_memory_pool, system_memory_pool,
253
+ jemalloc_memory_pool, mimalloc_memory_pool,
254
+ logging_memory_pool, proxy_memory_pool,
255
+ log_memory_allocations, jemalloc_set_decay_ms,
256
+ supported_memory_backends)
257
+
258
+ # I/O
259
+ from pyarrow.lib import (NativeFile, PythonFile,
260
+ BufferedInputStream, BufferedOutputStream, CacheOptions,
261
+ CompressedInputStream, CompressedOutputStream,
262
+ TransformInputStream, transcoding_input_stream,
263
+ FixedSizeBufferWriter,
264
+ BufferReader, BufferOutputStream,
265
+ OSFile, MemoryMappedFile, memory_map,
266
+ create_memory_map, MockOutputStream,
267
+ input_stream, output_stream,
268
+ have_libhdfs)
269
+
270
+ from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
271
+ concat_arrays, concat_tables, TableGroupBy,
272
+ RecordBatchReader, concat_batches)
273
+
274
+ # Exceptions
275
+ from pyarrow.lib import (ArrowCancelled,
276
+ ArrowCapacityError,
277
+ ArrowException,
278
+ ArrowKeyError,
279
+ ArrowIndexError,
280
+ ArrowInvalid,
281
+ ArrowIOError,
282
+ ArrowMemoryError,
283
+ ArrowNotImplementedError,
284
+ ArrowTypeError,
285
+ ArrowSerializationError)
286
+
287
+ from pyarrow.ipc import serialize_pandas, deserialize_pandas
288
+ import pyarrow.ipc as ipc
289
+
290
+ import pyarrow.types as types
291
+
292
+
293
+ # ----------------------------------------------------------------------
294
+ # Deprecations
295
+
296
+ from pyarrow.util import _deprecate_api, _deprecate_class
297
+
298
+
299
+ # TODO: Deprecate these somehow in the pyarrow namespace
300
+ from pyarrow.ipc import (Message, MessageReader, MetadataVersion,
301
+ RecordBatchFileReader, RecordBatchFileWriter,
302
+ RecordBatchStreamReader, RecordBatchStreamWriter)
303
+
304
+ # ----------------------------------------------------------------------
305
+ # Returning absolute path to the pyarrow include directory (if bundled, e.g. in
306
+ # wheels)
307
+
308
+
309
+ def get_include():
310
+ """
311
+ Return absolute path to directory containing Arrow C++ include
312
+ headers. Similar to numpy.get_include
313
+ """
314
+ return _os.path.join(_os.path.dirname(__file__), 'include')
315
+
316
+
317
+ def _get_pkg_config_executable():
318
+ return _os.environ.get('PKG_CONFIG', 'pkg-config')
319
+
320
+
321
+ def _has_pkg_config(pkgname):
322
+ import subprocess
323
+ try:
324
+ return subprocess.call([_get_pkg_config_executable(),
325
+ '--exists', pkgname]) == 0
326
+ except FileNotFoundError:
327
+ return False
328
+
329
+
330
+ def _read_pkg_config_variable(pkgname, cli_args):
331
+ import subprocess
332
+ cmd = [_get_pkg_config_executable(), pkgname] + cli_args
333
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
334
+ stderr=subprocess.PIPE)
335
+ out, err = proc.communicate()
336
+ if proc.returncode != 0:
337
+ raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
338
+ return out.rstrip().decode('utf8')
339
+
340
+
341
+ def get_libraries():
342
+ """
343
+ Return list of library names to include in the `libraries` argument for C
344
+ or Cython extensions using pyarrow
345
+ """
346
+ return ['arrow_python', 'arrow']
347
+
348
+
349
+ def create_library_symlinks():
350
+ """
351
+ With Linux and macOS wheels, the bundled shared libraries have an embedded
352
+ ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them
353
+ with -larrow won't work unless we create symlinks at locations like
354
+ site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses
355
+ prior problems we had with shipping two copies of the shared libraries to
356
+ permit third party projects like turbodbc to build their C++ extensions
357
+ against the pyarrow wheels.
358
+
359
+ This function must only be invoked once and only when the shared libraries
360
+ are bundled with the Python package, which should only apply to wheel-based
361
+ installs. It requires write access to the site-packages/pyarrow directory
362
+ and so depending on your system may need to be run with root.
363
+ """
364
+ import glob
365
+ if _sys.platform == 'win32':
366
+ return
367
+ package_cwd = _os.path.dirname(__file__)
368
+
369
+ if _sys.platform == 'linux':
370
+ bundled_libs = glob.glob(_os.path.join(package_cwd, '*.so.*'))
371
+
372
+ def get_symlink_path(hard_path):
373
+ return hard_path.rsplit('.', 1)[0]
374
+ else:
375
+ bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib'))
376
+
377
+ def get_symlink_path(hard_path):
378
+ return '.'.join((hard_path.rsplit('.', 2)[0], 'dylib'))
379
+
380
+ for lib_hard_path in bundled_libs:
381
+ symlink_path = get_symlink_path(lib_hard_path)
382
+ if _os.path.exists(symlink_path):
383
+ continue
384
+ try:
385
+ _os.symlink(lib_hard_path, symlink_path)
386
+ except PermissionError:
387
+ print("Tried creating symlink {}. If you need to link to "
388
+ "bundled shared libraries, run "
389
+ "pyarrow.create_library_symlinks() as root")
390
+
391
+
392
+ def get_library_dirs():
393
+ """
394
+ Return lists of directories likely to contain Arrow C++ libraries for
395
+ linking C or Cython extensions using pyarrow
396
+ """
397
+ package_cwd = _os.path.dirname(__file__)
398
+ library_dirs = [package_cwd]
399
+
400
+ def append_library_dir(library_dir):
401
+ if library_dir not in library_dirs:
402
+ library_dirs.append(library_dir)
403
+
404
+ # Search library paths via pkg-config. This is necessary if the user
405
+ # installed libarrow and the other shared libraries manually and they
406
+ # are not shipped inside the pyarrow package (see also ARROW-2976).
407
+ pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
408
+ for pkgname in ["arrow", "arrow_python"]:
409
+ if _has_pkg_config(pkgname):
410
+ library_dir = _read_pkg_config_variable(pkgname,
411
+ ["--libs-only-L"])
412
+ # pkg-config output could be empty if Arrow is installed
413
+ # as a system package.
414
+ if library_dir:
415
+ if not library_dir.startswith("-L"):
416
+ raise ValueError(
417
+ "pkg-config --libs-only-L returned unexpected "
418
+ "value {!r}".format(library_dir))
419
+ append_library_dir(library_dir[2:])
420
+
421
+ if _sys.platform == 'win32':
422
+ # TODO(wesm): Is this necessary, or does setuptools within a conda
423
+ # installation add Library\lib to the linker path for MSVC?
424
+ python_base_install = _os.path.dirname(_sys.executable)
425
+ library_dir = _os.path.join(python_base_install, 'Library', 'lib')
426
+
427
+ if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
428
+ append_library_dir(library_dir)
429
+
430
+ # ARROW-4074: Allow for ARROW_HOME to be set to some other directory
431
+ if _os.environ.get('ARROW_HOME'):
432
+ append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
433
+ else:
434
+ # Python wheels bundle the Arrow libraries in the pyarrow directory.
435
+ append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))
436
+
437
+ return library_dirs
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_azurefs.pyx ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # cython: language_level = 3
19
+
20
+ from cython cimport binding
21
+
22
+
23
+ from pyarrow.lib import frombytes, tobytes
24
+ from pyarrow.includes.libarrow_fs cimport *
25
+ from pyarrow._fs cimport FileSystem
26
+
27
+
28
+ cdef class AzureFileSystem(FileSystem):
29
+ """
30
+ Azure Blob Storage backed FileSystem implementation
31
+
32
+ This implementation supports flat namespace and hierarchical namespace (HNS) a.k.a.
33
+ Data Lake Gen2 storage accounts. HNS will be automatically detected and HNS specific
34
+ features will be used when they provide a performance advantage. Azurite emulator is
35
+ also supported. Note: `/` is the only supported delimiter.
36
+
37
+ The storage account is considered the root of the filesystem. When enabled, containers
38
+ will be created or deleted during relevant directory operations. Obviously, this also
39
+ requires authentication with the additional permissions.
40
+
41
+ By default `DefaultAzureCredential <https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
42
+ is used for authentication. This means it will try several types of authentication
43
+ and go with the first one that works. If any authentication parameters are provided when
44
+ initialising the FileSystem, they will be used instead of the default credential.
45
+
46
+ Parameters
47
+ ----------
48
+ account_name : str
49
+ Azure Blob Storage account name. This is the globally unique identifier for the
50
+ storage account.
51
+ account_key : str, default None
52
+ Account key of the storage account. Pass None to use default credential.
53
+ blob_storage_authority : str, default None
54
+ hostname[:port] of the Blob Service. Defaults to `.blob.core.windows.net`. Useful
55
+ for connecting to a local emulator, like Azurite.
56
+ dfs_storage_authority : str, default None
57
+ hostname[:port] of the Data Lake Gen 2 Service. Defaults to
58
+ `.dfs.core.windows.net`. Useful for connecting to a local emulator, like Azurite.
59
+ blob_storage_scheme : str, default None
60
+ Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
61
+ emulator, like Azurite.
62
+ dfs_storage_scheme : str, default None
63
+ Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
64
+ emulator, like Azurite.
65
+
66
+ Examples
67
+ --------
68
+ >>> from pyarrow import fs
69
+ >>> azure_fs = fs.AzureFileSystem(account_name='myaccount')
70
+ >>> azurite_fs = fs.AzureFileSystem(
71
+ ... account_name='devstoreaccount1',
72
+ ... account_key='Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
73
+ ... blob_storage_authority='127.0.0.1:10000',
74
+ ... dfs_storage_authority='127.0.0.1:10000',
75
+ ... blob_storage_scheme='http',
76
+ ... dfs_storage_scheme='http',
77
+ ... )
78
+
79
+ For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
80
+ """
81
+ cdef:
82
+ CAzureFileSystem* azurefs
83
+ c_string account_key
84
+
85
+ def __init__(self, account_name, *, account_key=None, blob_storage_authority=None,
86
+ dfs_storage_authority=None, blob_storage_scheme=None,
87
+ dfs_storage_scheme=None):
88
+ cdef:
89
+ CAzureOptions options
90
+ shared_ptr[CAzureFileSystem] wrapped
91
+
92
+ options.account_name = tobytes(account_name)
93
+ if blob_storage_authority:
94
+ options.blob_storage_authority = tobytes(blob_storage_authority)
95
+ if dfs_storage_authority:
96
+ options.dfs_storage_authority = tobytes(dfs_storage_authority)
97
+ if blob_storage_scheme:
98
+ options.blob_storage_scheme = tobytes(blob_storage_scheme)
99
+ if dfs_storage_scheme:
100
+ options.dfs_storage_scheme = tobytes(dfs_storage_scheme)
101
+
102
+ if account_key:
103
+ options.ConfigureAccountKeyCredential(tobytes(account_key))
104
+ self.account_key = tobytes(account_key)
105
+ else:
106
+ options.ConfigureDefaultCredential()
107
+
108
+ with nogil:
109
+ wrapped = GetResultValue(CAzureFileSystem.Make(options))
110
+
111
+ self.init(<shared_ptr[CFileSystem]> wrapped)
112
+
113
+ cdef init(self, const shared_ptr[CFileSystem]& wrapped):
114
+ FileSystem.init(self, wrapped)
115
+ self.azurefs = <CAzureFileSystem*> wrapped.get()
116
+
117
+ @staticmethod
118
+ @binding(True) # Required for cython < 3
119
+ def _reconstruct(kwargs):
120
+ # __reduce__ doesn't allow passing named arguments directly to the
121
+ # reconstructor, hence this wrapper.
122
+ return AzureFileSystem(**kwargs)
123
+
124
+ def __reduce__(self):
125
+ cdef CAzureOptions opts = self.azurefs.options()
126
+ return (
127
+ AzureFileSystem._reconstruct, (dict(
128
+ account_name=frombytes(opts.account_name),
129
+ account_key=frombytes(self.account_key),
130
+ blob_storage_authority=frombytes(opts.blob_storage_authority),
131
+ dfs_storage_authority=frombytes(opts.dfs_storage_authority),
132
+ blob_storage_scheme=frombytes(opts.blob_storage_scheme),
133
+ dfs_storage_scheme=frombytes(opts.dfs_storage_scheme)
134
+ ),))
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute.pyx ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute_docstrings.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ """
19
+ Custom documentation additions for compute functions.
20
+ """
21
+
22
+ function_doc_additions = {}
23
+
24
+ function_doc_additions["filter"] = """
25
+ Examples
26
+ --------
27
+ >>> import pyarrow as pa
28
+ >>> arr = pa.array(["a", "b", "c", None, "e"])
29
+ >>> mask = pa.array([True, False, None, False, True])
30
+ >>> arr.filter(mask)
31
+ <pyarrow.lib.StringArray object at ...>
32
+ [
33
+ "a",
34
+ "e"
35
+ ]
36
+ >>> arr.filter(mask, null_selection_behavior='emit_null')
37
+ <pyarrow.lib.StringArray object at ...>
38
+ [
39
+ "a",
40
+ null,
41
+ "e"
42
+ ]
43
+ """
44
+
45
+ function_doc_additions["mode"] = """
46
+ Examples
47
+ --------
48
+ >>> import pyarrow as pa
49
+ >>> import pyarrow.compute as pc
50
+ >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
51
+ >>> modes = pc.mode(arr, 2)
52
+ >>> modes[0]
53
+ <pyarrow.StructScalar: [('mode', 2), ('count', 5)]>
54
+ >>> modes[1]
55
+ <pyarrow.StructScalar: [('mode', 1), ('count', 2)]>
56
+ """
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_cuda.pyx ADDED
@@ -0,0 +1,1080 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+
19
+ from pyarrow.lib cimport *
20
+ from pyarrow.includes.libarrow_cuda cimport *
21
+ from pyarrow.lib import allocate_buffer, as_buffer, ArrowTypeError
22
+ from pyarrow.util import get_contiguous_span
23
+ cimport cpython as cp
24
+
25
+
26
+ cdef class Context(_Weakrefable):
27
+ """
28
+ CUDA driver context.
29
+ """
30
+
31
+ def __init__(self, *args, **kwargs):
32
+ """
33
+ Create a CUDA driver context for a particular device.
34
+
35
+ If a CUDA context handle is passed, it is wrapped, otherwise
36
+ a default CUDA context for the given device is requested.
37
+
38
+ Parameters
39
+ ----------
40
+ device_number : int (default 0)
41
+ Specify the GPU device for which the CUDA driver context is
42
+ requested.
43
+ handle : int, optional
44
+ Specify CUDA handle for a shared context that has been created
45
+ by another library.
46
+ """
47
+ # This method exposed because autodoc doesn't pick __cinit__
48
+
49
+ def __cinit__(self, int device_number=0, uintptr_t handle=0):
50
+ cdef CCudaDeviceManager* manager
51
+ manager = GetResultValue(CCudaDeviceManager.Instance())
52
+ cdef int n = manager.num_devices()
53
+ if device_number >= n or device_number < 0:
54
+ self.context.reset()
55
+ raise ValueError('device_number argument must be '
56
+ 'non-negative less than %s' % (n))
57
+ if handle == 0:
58
+ self.context = GetResultValue(manager.GetContext(device_number))
59
+ else:
60
+ self.context = GetResultValue(manager.GetSharedContext(
61
+ device_number, <void*>handle))
62
+ self.device_number = device_number
63
+
64
+ @staticmethod
65
+ def from_numba(context=None):
66
+ """
67
+ Create a Context instance from a Numba CUDA context.
68
+
69
+ Parameters
70
+ ----------
71
+ context : {numba.cuda.cudadrv.driver.Context, None}
72
+ A Numba CUDA context instance.
73
+ If None, the current Numba context is used.
74
+
75
+ Returns
76
+ -------
77
+ shared_context : pyarrow.cuda.Context
78
+ Context instance.
79
+ """
80
+ if context is None:
81
+ import numba.cuda
82
+ context = numba.cuda.current_context()
83
+ return Context(device_number=context.device.id,
84
+ handle=context.handle.value)
85
+
86
+ def to_numba(self):
87
+ """
88
+ Convert Context to a Numba CUDA context.
89
+
90
+ Returns
91
+ -------
92
+ context : numba.cuda.cudadrv.driver.Context
93
+ Numba CUDA context instance.
94
+ """
95
+ import ctypes
96
+ import numba.cuda
97
+ device = numba.cuda.gpus[self.device_number]
98
+ handle = ctypes.c_void_p(self.handle)
99
+ context = numba.cuda.cudadrv.driver.Context(device, handle)
100
+
101
+ class DummyPendingDeallocs(object):
102
+ # Context is managed by pyarrow
103
+ def add_item(self, *args, **kwargs):
104
+ pass
105
+
106
+ context.deallocations = DummyPendingDeallocs()
107
+ return context
108
+
109
+ @staticmethod
110
+ def get_num_devices():
111
+ """ Return the number of GPU devices.
112
+ """
113
+ cdef CCudaDeviceManager* manager
114
+ manager = GetResultValue(CCudaDeviceManager.Instance())
115
+ return manager.num_devices()
116
+
117
+ @property
118
+ def device_number(self):
119
+ """ Return context device number.
120
+ """
121
+ return self.device_number
122
+
123
+ @property
124
+ def handle(self):
125
+ """ Return pointer to context handle.
126
+ """
127
+ return <uintptr_t>self.context.get().handle()
128
+
129
+ cdef void init(self, const shared_ptr[CCudaContext]& ctx):
130
+ self.context = ctx
131
+
132
+ def synchronize(self):
133
+ """Blocks until the device has completed all preceding requested
134
+ tasks.
135
+ """
136
+ check_status(self.context.get().Synchronize())
137
+
138
+ @property
139
+ def bytes_allocated(self):
140
+ """Return the number of allocated bytes.
141
+ """
142
+ return self.context.get().bytes_allocated()
143
+
144
+ def get_device_address(self, uintptr_t address):
145
+ """Return the device address that is reachable from kernels running in
146
+ the context
147
+
148
+ Parameters
149
+ ----------
150
+ address : int
151
+ Specify memory address value
152
+
153
+ Returns
154
+ -------
155
+ device_address : int
156
+ Device address accessible from device context
157
+
158
+ Notes
159
+ -----
160
+ The device address is defined as a memory address accessible
161
+ by device. While it is often a device memory address but it
162
+ can be also a host memory address, for instance, when the
163
+ memory is allocated as host memory (using cudaMallocHost or
164
+ cudaHostAlloc) or as managed memory (using cudaMallocManaged)
165
+ or the host memory is page-locked (using cudaHostRegister).
166
+ """
167
+ return GetResultValue(self.context.get().GetDeviceAddress(address))
168
+
169
+ def new_buffer(self, int64_t nbytes):
170
+ """Return new device buffer.
171
+
172
+ Parameters
173
+ ----------
174
+ nbytes : int
175
+ Specify the number of bytes to be allocated.
176
+
177
+ Returns
178
+ -------
179
+ buf : CudaBuffer
180
+ Allocated buffer.
181
+ """
182
+ cdef:
183
+ shared_ptr[CCudaBuffer] cudabuf
184
+ with nogil:
185
+ cudabuf = GetResultValue(self.context.get().Allocate(nbytes))
186
+ return pyarrow_wrap_cudabuffer(cudabuf)
187
+
188
+ @property
189
+ def memory_manager(self):
190
+ """
191
+ The default memory manager tied to this context's device.
192
+
193
+ Returns
194
+ -------
195
+ MemoryManager
196
+ """
197
+ return MemoryManager.wrap(self.context.get().memory_manager())
198
+
199
+ @property
200
+ def device(self):
201
+ """
202
+ The device instance associated with this context.
203
+
204
+ Returns
205
+ -------
206
+ Device
207
+ """
208
+ return Device.wrap(self.context.get().device())
209
+
210
+ def foreign_buffer(self, address, size, base=None):
211
+ """
212
+ Create device buffer from address and size as a view.
213
+
214
+ The caller is responsible for allocating and freeing the
215
+ memory. When `address==size==0` then a new zero-sized buffer
216
+ is returned.
217
+
218
+ Parameters
219
+ ----------
220
+ address : int
221
+ Specify the starting address of the buffer. The address can
222
+ refer to both device or host memory but it must be
223
+ accessible from device after mapping it with
224
+ `get_device_address` method.
225
+ size : int
226
+ Specify the size of device buffer in bytes.
227
+ base : {None, object}
228
+ Specify object that owns the referenced memory.
229
+
230
+ Returns
231
+ -------
232
+ cbuf : CudaBuffer
233
+ Device buffer as a view of device reachable memory.
234
+
235
+ """
236
+ if not address and size == 0:
237
+ return self.new_buffer(0)
238
+ cdef:
239
+ uintptr_t c_addr = self.get_device_address(address)
240
+ int64_t c_size = size
241
+ shared_ptr[CCudaBuffer] cudabuf
242
+
243
+ cudabuf = GetResultValue(self.context.get().View(
244
+ <uint8_t*>c_addr, c_size))
245
+ return pyarrow_wrap_cudabuffer_base(cudabuf, base)
246
+
247
+ def open_ipc_buffer(self, ipc_handle):
248
+ """ Open existing CUDA IPC memory handle
249
+
250
+ Parameters
251
+ ----------
252
+ ipc_handle : IpcMemHandle
253
+ Specify opaque pointer to CUipcMemHandle (driver API).
254
+
255
+ Returns
256
+ -------
257
+ buf : CudaBuffer
258
+ referencing device buffer
259
+ """
260
+ handle = pyarrow_unwrap_cudaipcmemhandle(ipc_handle)
261
+ cdef shared_ptr[CCudaBuffer] cudabuf
262
+ with nogil:
263
+ cudabuf = GetResultValue(
264
+ self.context.get().OpenIpcBuffer(handle.get()[0]))
265
+ return pyarrow_wrap_cudabuffer(cudabuf)
266
+
267
+ def buffer_from_data(self, object data, int64_t offset=0, int64_t size=-1):
268
+ """Create device buffer and initialize with data.
269
+
270
+ Parameters
271
+ ----------
272
+ data : {CudaBuffer, HostBuffer, Buffer, array-like}
273
+ Specify data to be copied to device buffer.
274
+ offset : int
275
+ Specify the offset of input buffer for device data
276
+ buffering. Default: 0.
277
+ size : int
278
+ Specify the size of device buffer in bytes. Default: all
279
+ (starting from input offset)
280
+
281
+ Returns
282
+ -------
283
+ cbuf : CudaBuffer
284
+ Device buffer with copied data.
285
+ """
286
+ is_host_data = not pyarrow_is_cudabuffer(data)
287
+ buf = as_buffer(data) if is_host_data else data
288
+
289
+ bsize = buf.size
290
+ if offset < 0 or (bsize and offset >= bsize):
291
+ raise ValueError('offset argument is out-of-range')
292
+ if size < 0:
293
+ size = bsize - offset
294
+ elif offset + size > bsize:
295
+ raise ValueError(
296
+ 'requested larger slice than available in device buffer')
297
+
298
+ if offset != 0 or size != bsize:
299
+ buf = buf.slice(offset, size)
300
+
301
+ result = self.new_buffer(size)
302
+ if is_host_data:
303
+ result.copy_from_host(buf, position=0, nbytes=size)
304
+ else:
305
+ result.copy_from_device(buf, position=0, nbytes=size)
306
+ return result
307
+
308
+ def buffer_from_object(self, obj):
309
+ """Create device buffer view of arbitrary object that references
310
+ device accessible memory.
311
+
312
+ When the object contains a non-contiguous view of device
313
+ accessible memory then the returned device buffer will contain
314
+ contiguous view of the memory, that is, including the
315
+ intermediate data that is otherwise invisible to the input
316
+ object.
317
+
318
+ Parameters
319
+ ----------
320
+ obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
321
+ Specify an object that holds (device or host) address that
322
+ can be accessed from device. This includes objects with
323
+ types defined in pyarrow.cuda as well as arbitrary objects
324
+ that implement the CUDA array interface as defined by numba.
325
+
326
+ Returns
327
+ -------
328
+ cbuf : CudaBuffer
329
+ Device buffer as a view of device accessible memory.
330
+
331
+ """
332
+ if isinstance(obj, HostBuffer):
333
+ return self.foreign_buffer(obj.address, obj.size, base=obj)
334
+ elif isinstance(obj, Buffer):
335
+ return CudaBuffer.from_buffer(obj)
336
+ elif isinstance(obj, CudaBuffer):
337
+ return obj
338
+ elif hasattr(obj, '__cuda_array_interface__'):
339
+ desc = obj.__cuda_array_interface__
340
+ addr = desc['data'][0]
341
+ if addr is None:
342
+ return self.new_buffer(0)
343
+ import numpy as np
344
+ start, end = get_contiguous_span(
345
+ desc['shape'], desc.get('strides'),
346
+ np.dtype(desc['typestr']).itemsize)
347
+ return self.foreign_buffer(addr + start, end - start, base=obj)
348
+ raise ArrowTypeError('cannot create device buffer view from'
349
+ ' `%s` object' % (type(obj)))
350
+
351
+
352
+ cdef class IpcMemHandle(_Weakrefable):
353
+ """A serializable container for a CUDA IPC handle.
354
+ """
355
+ cdef void init(self, shared_ptr[CCudaIpcMemHandle]& h):
356
+ self.handle = h
357
+
358
+ @staticmethod
359
+ def from_buffer(Buffer opaque_handle):
360
+ """Create IpcMemHandle from opaque buffer (e.g. from another
361
+ process)
362
+
363
+ Parameters
364
+ ----------
365
+ opaque_handle :
366
+ a CUipcMemHandle as a const void*
367
+
368
+ Returns
369
+ -------
370
+ ipc_handle : IpcMemHandle
371
+ """
372
+ c_buf = pyarrow_unwrap_buffer(opaque_handle)
373
+ cdef:
374
+ shared_ptr[CCudaIpcMemHandle] handle
375
+
376
+ handle = GetResultValue(
377
+ CCudaIpcMemHandle.FromBuffer(c_buf.get().data()))
378
+ return pyarrow_wrap_cudaipcmemhandle(handle)
379
+
380
+ def serialize(self, pool=None):
381
+ """Write IpcMemHandle to a Buffer
382
+
383
+ Parameters
384
+ ----------
385
+ pool : {MemoryPool, None}
386
+ Specify a pool to allocate memory from
387
+
388
+ Returns
389
+ -------
390
+ buf : Buffer
391
+ The serialized buffer.
392
+ """
393
+ cdef CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
394
+ cdef shared_ptr[CBuffer] buf
395
+ cdef CCudaIpcMemHandle* h = self.handle.get()
396
+ with nogil:
397
+ buf = GetResultValue(h.Serialize(pool_))
398
+ return pyarrow_wrap_buffer(buf)
399
+
400
+
401
+ cdef class CudaBuffer(Buffer):
402
+ """An Arrow buffer with data located in a GPU device.
403
+
404
+ To create a CudaBuffer instance, use Context.device_buffer().
405
+
406
+ The memory allocated in a CudaBuffer is freed when the buffer object
407
+ is deleted.
408
+ """
409
+
410
+ def __init__(self):
411
+ raise TypeError("Do not call CudaBuffer's constructor directly, use "
412
+ "`<pyarrow.Context instance>.device_buffer`"
413
+ " method instead.")
414
+
415
+ cdef void init_cuda(self,
416
+ const shared_ptr[CCudaBuffer]& buffer,
417
+ object base):
418
+ self.cuda_buffer = buffer
419
+ self.init(<shared_ptr[CBuffer]> buffer)
420
+ self.base = base
421
+
422
+ @staticmethod
423
+ def from_buffer(buf):
424
+ """ Convert back generic buffer into CudaBuffer
425
+
426
+ Parameters
427
+ ----------
428
+ buf : Buffer
429
+ Specify buffer containing CudaBuffer
430
+
431
+ Returns
432
+ -------
433
+ dbuf : CudaBuffer
434
+ Resulting device buffer.
435
+ """
436
+ c_buf = pyarrow_unwrap_buffer(buf)
437
+ cuda_buffer = GetResultValue(CCudaBuffer.FromBuffer(c_buf))
438
+ return pyarrow_wrap_cudabuffer(cuda_buffer)
439
+
440
+ @staticmethod
441
+ def from_numba(mem):
442
+ """Create a CudaBuffer view from numba MemoryPointer instance.
443
+
444
+ Parameters
445
+ ----------
446
+ mem : numba.cuda.cudadrv.driver.MemoryPointer
447
+
448
+ Returns
449
+ -------
450
+ cbuf : CudaBuffer
451
+ Device buffer as a view of numba MemoryPointer.
452
+ """
453
+ ctx = Context.from_numba(mem.context)
454
+ if mem.device_pointer.value is None and mem.size==0:
455
+ return ctx.new_buffer(0)
456
+ return ctx.foreign_buffer(mem.device_pointer.value, mem.size, base=mem)
457
+
458
+ def to_numba(self):
459
+ """Return numba memory pointer of CudaBuffer instance.
460
+ """
461
+ import ctypes
462
+ from numba.cuda.cudadrv.driver import MemoryPointer
463
+ return MemoryPointer(self.context.to_numba(),
464
+ pointer=ctypes.c_void_p(self.address),
465
+ size=self.size)
466
+
467
+ cdef getitem(self, int64_t i):
468
+ return self.copy_to_host(position=i, nbytes=1)[0]
469
+
470
+ def copy_to_host(self, int64_t position=0, int64_t nbytes=-1,
471
+ Buffer buf=None,
472
+ MemoryPool memory_pool=None, c_bool resizable=False):
473
+ """Copy memory from GPU device to CPU host
474
+
475
+ Caller is responsible for ensuring that all tasks affecting
476
+ the memory are finished. Use
477
+
478
+ `<CudaBuffer instance>.context.synchronize()`
479
+
480
+ when needed.
481
+
482
+ Parameters
483
+ ----------
484
+ position : int
485
+ Specify the starting position of the source data in GPU
486
+ device buffer. Default: 0.
487
+ nbytes : int
488
+ Specify the number of bytes to copy. Default: -1 (all from
489
+ the position until host buffer is full).
490
+ buf : Buffer
491
+ Specify a pre-allocated output buffer in host. Default: None
492
+ (allocate new output buffer).
493
+ memory_pool : MemoryPool
494
+ resizable : bool
495
+ Specify extra arguments to allocate_buffer. Used only when
496
+ buf is None.
497
+
498
+ Returns
499
+ -------
500
+ buf : Buffer
501
+ Output buffer in host.
502
+
503
+ """
504
+ if position < 0 or (self.size and position > self.size) \
505
+ or (self.size == 0 and position != 0):
506
+ raise ValueError('position argument is out-of-range')
507
+ cdef:
508
+ int64_t c_nbytes
509
+ if buf is None:
510
+ if nbytes < 0:
511
+ # copy all starting from position to new host buffer
512
+ c_nbytes = self.size - position
513
+ else:
514
+ if nbytes > self.size - position:
515
+ raise ValueError(
516
+ 'requested more to copy than available from '
517
+ 'device buffer')
518
+ # copy nbytes starting from position to new host buffer
519
+ c_nbytes = nbytes
520
+ buf = allocate_buffer(c_nbytes, memory_pool=memory_pool,
521
+ resizable=resizable)
522
+ else:
523
+ if nbytes < 0:
524
+ # copy all from position until given host buffer is full
525
+ c_nbytes = min(self.size - position, buf.size)
526
+ else:
527
+ if nbytes > buf.size:
528
+ raise ValueError(
529
+ 'requested copy does not fit into host buffer')
530
+ # copy nbytes from position to given host buffer
531
+ c_nbytes = nbytes
532
+
533
+ cdef:
534
+ shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
535
+ int64_t c_position = position
536
+ with nogil:
537
+ check_status(self.cuda_buffer.get()
538
+ .CopyToHost(c_position, c_nbytes,
539
+ c_buf.get().mutable_data()))
540
+ return buf
541
+
542
+ def copy_from_host(self, data, int64_t position=0, int64_t nbytes=-1):
543
+ """Copy data from host to device.
544
+
545
+ The device buffer must be pre-allocated.
546
+
547
+ Parameters
548
+ ----------
549
+ data : {Buffer, array-like}
550
+ Specify data in host. It can be array-like that is valid
551
+ argument to py_buffer
552
+ position : int
553
+ Specify the starting position of the copy in device buffer.
554
+ Default: 0.
555
+ nbytes : int
556
+ Specify the number of bytes to copy. Default: -1 (all from
557
+ source until device buffer, starting from position, is full)
558
+
559
+ Returns
560
+ -------
561
+ nbytes : int
562
+ Number of bytes copied.
563
+ """
564
+ if position < 0 or position > self.size:
565
+ raise ValueError('position argument is out-of-range')
566
+ cdef:
567
+ int64_t c_nbytes
568
+ buf = as_buffer(data)
569
+
570
+ if nbytes < 0:
571
+ # copy from host buffer to device buffer starting from
572
+ # position until device buffer is full
573
+ c_nbytes = min(self.size - position, buf.size)
574
+ else:
575
+ if nbytes > buf.size:
576
+ raise ValueError(
577
+ 'requested more to copy than available from host buffer')
578
+ if nbytes > self.size - position:
579
+ raise ValueError(
580
+ 'requested more to copy than available in device buffer')
581
+ # copy nbytes from host buffer to device buffer starting
582
+ # from position
583
+ c_nbytes = nbytes
584
+
585
+ cdef:
586
+ shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
587
+ int64_t c_position = position
588
+ with nogil:
589
+ check_status(self.cuda_buffer.get().
590
+ CopyFromHost(c_position, c_buf.get().data(),
591
+ c_nbytes))
592
+ return c_nbytes
593
+
594
+ def copy_from_device(self, buf, int64_t position=0, int64_t nbytes=-1):
595
+ """Copy data from device to device.
596
+
597
+ Parameters
598
+ ----------
599
+ buf : CudaBuffer
600
+ Specify source device buffer.
601
+ position : int
602
+ Specify the starting position of the copy in device buffer.
603
+ Default: 0.
604
+ nbytes : int
605
+ Specify the number of bytes to copy. Default: -1 (all from
606
+ source until device buffer, starting from position, is full)
607
+
608
+ Returns
609
+ -------
610
+ nbytes : int
611
+ Number of bytes copied.
612
+
613
+ """
614
+ if position < 0 or position > self.size:
615
+ raise ValueError('position argument is out-of-range')
616
+ cdef:
617
+ int64_t c_nbytes
618
+
619
+ if nbytes < 0:
620
+ # copy from source device buffer to device buffer starting
621
+ # from position until device buffer is full
622
+ c_nbytes = min(self.size - position, buf.size)
623
+ else:
624
+ if nbytes > buf.size:
625
+ raise ValueError(
626
+ 'requested more to copy than available from device buffer')
627
+ if nbytes > self.size - position:
628
+ raise ValueError(
629
+ 'requested more to copy than available in device buffer')
630
+ # copy nbytes from source device buffer to device buffer
631
+ # starting from position
632
+ c_nbytes = nbytes
633
+
634
+ cdef:
635
+ shared_ptr[CCudaBuffer] c_buf = pyarrow_unwrap_cudabuffer(buf)
636
+ int64_t c_position = position
637
+ shared_ptr[CCudaContext] c_src_ctx = pyarrow_unwrap_cudacontext(
638
+ buf.context)
639
+ void* c_source_data = <void*>(c_buf.get().address())
640
+
641
+ if self.context.handle != buf.context.handle:
642
+ with nogil:
643
+ check_status(self.cuda_buffer.get().
644
+ CopyFromAnotherDevice(c_src_ctx, c_position,
645
+ c_source_data, c_nbytes))
646
+ else:
647
+ with nogil:
648
+ check_status(self.cuda_buffer.get().
649
+ CopyFromDevice(c_position, c_source_data,
650
+ c_nbytes))
651
+ return c_nbytes
652
+
653
+ def export_for_ipc(self):
654
+ """
655
+ Expose this device buffer as IPC memory which can be used in other
656
+ processes.
657
+
658
+ After calling this function, this device memory will not be
659
+ freed when the CudaBuffer is destructed.
660
+
661
+ Returns
662
+ -------
663
+ ipc_handle : IpcMemHandle
664
+ The exported IPC handle
665
+
666
+ """
667
+ cdef shared_ptr[CCudaIpcMemHandle] handle
668
+ with nogil:
669
+ handle = GetResultValue(self.cuda_buffer.get().ExportForIpc())
670
+ return pyarrow_wrap_cudaipcmemhandle(handle)
671
+
672
+ @property
673
+ def context(self):
674
+ """Returns the CUDA driver context of this buffer.
675
+ """
676
+ return pyarrow_wrap_cudacontext(self.cuda_buffer.get().context())
677
+
678
+ def slice(self, offset=0, length=None):
679
+ """Return slice of device buffer
680
+
681
+ Parameters
682
+ ----------
683
+ offset : int, default 0
684
+ Specify offset from the start of device buffer to slice
685
+ length : int, default None
686
+ Specify the length of slice (default is until end of device
687
+ buffer starting from offset). If the length is larger than
688
+ the data available, the returned slice will have a size of
689
+ the available data starting from the offset.
690
+
691
+ Returns
692
+ -------
693
+ sliced : CudaBuffer
694
+ Zero-copy slice of device buffer.
695
+
696
+ """
697
+ if offset < 0 or (self.size and offset >= self.size):
698
+ raise ValueError('offset argument is out-of-range')
699
+ cdef int64_t offset_ = offset
700
+ cdef int64_t size
701
+ if length is None:
702
+ size = self.size - offset_
703
+ elif offset + length <= self.size:
704
+ size = length
705
+ else:
706
+ size = self.size - offset
707
+ parent = pyarrow_unwrap_cudabuffer(self)
708
+ return pyarrow_wrap_cudabuffer(make_shared[CCudaBuffer](parent,
709
+ offset_, size))
710
+
711
+ def to_pybytes(self):
712
+ """Return device buffer content as Python bytes.
713
+ """
714
+ return self.copy_to_host().to_pybytes()
715
+
716
+ def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
717
+ # Device buffer contains data pointers on the device. Hence,
718
+ # cannot support buffer protocol PEP-3118 for CudaBuffer.
719
+ raise BufferError('buffer protocol for device buffer not supported')
720
+
721
+
722
+ cdef class HostBuffer(Buffer):
723
+ """Device-accessible CPU memory created using cudaHostAlloc.
724
+
725
+ To create a HostBuffer instance, use
726
+
727
+ cuda.new_host_buffer(<nbytes>)
728
+ """
729
+
730
+ def __init__(self):
731
+ raise TypeError("Do not call HostBuffer's constructor directly,"
732
+ " use `cuda.new_host_buffer` function instead.")
733
+
734
+ cdef void init_host(self, const shared_ptr[CCudaHostBuffer]& buffer):
735
+ self.host_buffer = buffer
736
+ self.init(<shared_ptr[CBuffer]> buffer)
737
+
738
+ @property
739
+ def size(self):
740
+ return self.host_buffer.get().size()
741
+
742
+
743
+ cdef class BufferReader(NativeFile):
744
+ """File interface for zero-copy read from CUDA buffers.
745
+
746
+ Note: Read methods return pointers to device memory. This means
747
+ you must be careful using this interface with any Arrow code which
748
+ may expect to be able to do anything other than pointer arithmetic
749
+ on the returned buffers.
750
+ """
751
+
752
+ def __cinit__(self, CudaBuffer obj):
753
+ self.buffer = obj
754
+ self.reader = new CCudaBufferReader(self.buffer.buffer)
755
+ self.set_random_access_file(
756
+ shared_ptr[CRandomAccessFile](self.reader))
757
+ self.is_readable = True
758
+
759
+ def read_buffer(self, nbytes=None):
760
+ """Return a slice view of the underlying device buffer.
761
+
762
+ The slice will start at the current reader position and will
763
+ have specified size in bytes.
764
+
765
+ Parameters
766
+ ----------
767
+ nbytes : int, default None
768
+ Specify the number of bytes to read. Default: None (read all
769
+ remaining bytes).
770
+
771
+ Returns
772
+ -------
773
+ cbuf : CudaBuffer
774
+ New device buffer.
775
+
776
+ """
777
+ cdef:
778
+ int64_t c_nbytes
779
+ shared_ptr[CCudaBuffer] output
780
+
781
+ if nbytes is None:
782
+ c_nbytes = self.size() - self.tell()
783
+ else:
784
+ c_nbytes = nbytes
785
+
786
+ with nogil:
787
+ output = static_pointer_cast[CCudaBuffer, CBuffer](
788
+ GetResultValue(self.reader.Read(c_nbytes)))
789
+
790
+ return pyarrow_wrap_cudabuffer(output)
791
+
792
+
793
+ cdef class BufferWriter(NativeFile):
794
+ """File interface for writing to CUDA buffers.
795
+
796
+ By default writes are unbuffered. Use set_buffer_size to enable
797
+ buffering.
798
+ """
799
+
800
+ def __cinit__(self, CudaBuffer buffer):
801
+ self.buffer = buffer
802
+ self.writer = new CCudaBufferWriter(self.buffer.cuda_buffer)
803
+ self.set_output_stream(shared_ptr[COutputStream](self.writer))
804
+ self.is_writable = True
805
+
806
+ def writeat(self, int64_t position, object data):
807
+ """Write data to buffer starting from position.
808
+
809
+ Parameters
810
+ ----------
811
+ position : int
812
+ Specify device buffer position where the data will be
813
+ written.
814
+ data : array-like
815
+ Specify data, the data instance must implement buffer
816
+ protocol.
817
+ """
818
+ cdef:
819
+ Buffer buf = as_buffer(data)
820
+ const uint8_t* c_data = buf.buffer.get().data()
821
+ int64_t c_size = buf.buffer.get().size()
822
+
823
+ with nogil:
824
+ check_status(self.writer.WriteAt(position, c_data, c_size))
825
+
826
+ def flush(self):
827
+ """ Flush the buffer stream """
828
+ with nogil:
829
+ check_status(self.writer.Flush())
830
+
831
+ def seek(self, int64_t position, int whence=0):
832
+ # TODO: remove this method after NativeFile.seek supports
833
+ # writable files.
834
+ cdef int64_t offset
835
+
836
+ with nogil:
837
+ if whence == 0:
838
+ offset = position
839
+ elif whence == 1:
840
+ offset = GetResultValue(self.writer.Tell())
841
+ offset = offset + position
842
+ else:
843
+ with gil:
844
+ raise ValueError("Invalid value of whence: {0}"
845
+ .format(whence))
846
+ check_status(self.writer.Seek(offset))
847
+ return self.tell()
848
+
849
+ @property
850
+ def buffer_size(self):
851
+ """Returns size of host (CPU) buffer, 0 for unbuffered
852
+ """
853
+ return self.writer.buffer_size()
854
+
855
+ @buffer_size.setter
856
+ def buffer_size(self, int64_t buffer_size):
857
+ """Set CPU buffer size to limit calls to cudaMemcpy
858
+
859
+ Parameters
860
+ ----------
861
+ buffer_size : int
862
+ Specify the size of CPU buffer to allocate in bytes.
863
+ """
864
+ with nogil:
865
+ check_status(self.writer.SetBufferSize(buffer_size))
866
+
867
+ @property
868
+ def num_bytes_buffered(self):
869
+ """Returns number of bytes buffered on host
870
+ """
871
+ return self.writer.num_bytes_buffered()
872
+
873
+ # Functions
874
+
875
+
876
+ def new_host_buffer(const int64_t size, int device=0):
877
+ """Return buffer with CUDA-accessible memory on CPU host
878
+
879
+ Parameters
880
+ ----------
881
+ size : int
882
+ Specify the number of bytes to be allocated.
883
+ device : int
884
+ Specify GPU device number.
885
+
886
+ Returns
887
+ -------
888
+ dbuf : HostBuffer
889
+ Allocated host buffer
890
+ """
891
+ cdef shared_ptr[CCudaHostBuffer] buffer
892
+ with nogil:
893
+ buffer = GetResultValue(AllocateCudaHostBuffer(device, size))
894
+ return pyarrow_wrap_cudahostbuffer(buffer)
895
+
896
+
897
+ def serialize_record_batch(object batch, object ctx):
898
+ """ Write record batch message to GPU device memory
899
+
900
+ Parameters
901
+ ----------
902
+ batch : RecordBatch
903
+ Record batch to write
904
+ ctx : Context
905
+ CUDA Context to allocate device memory from
906
+
907
+ Returns
908
+ -------
909
+ dbuf : CudaBuffer
910
+ device buffer which contains the record batch message
911
+ """
912
+ cdef shared_ptr[CCudaBuffer] buffer
913
+ cdef CRecordBatch* batch_ = pyarrow_unwrap_batch(batch).get()
914
+ cdef CCudaContext* ctx_ = pyarrow_unwrap_cudacontext(ctx).get()
915
+ with nogil:
916
+ buffer = GetResultValue(CudaSerializeRecordBatch(batch_[0], ctx_))
917
+ return pyarrow_wrap_cudabuffer(buffer)
918
+
919
+
920
+ def read_message(object source, pool=None):
921
+ """ Read Arrow IPC message located on GPU device
922
+
923
+ Parameters
924
+ ----------
925
+ source : {CudaBuffer, cuda.BufferReader}
926
+ Device buffer or reader of device buffer.
927
+ pool : MemoryPool (optional)
928
+ Pool to allocate CPU memory for the metadata
929
+
930
+ Returns
931
+ -------
932
+ message : Message
933
+ The deserialized message, body still on device
934
+ """
935
+ cdef:
936
+ Message result = Message.__new__(Message)
937
+ cdef CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
938
+ if not isinstance(source, BufferReader):
939
+ reader = BufferReader(source)
940
+ with nogil:
941
+ result.message = move(
942
+ GetResultValue(ReadMessage(reader.reader, pool_)))
943
+ return result
944
+
945
+
946
+ def read_record_batch(object buffer, object schema, *,
947
+ DictionaryMemo dictionary_memo=None, pool=None):
948
+ """Construct RecordBatch referencing IPC message located on CUDA device.
949
+
950
+ While the metadata is copied to host memory for deserialization,
951
+ the record batch data remains on the device.
952
+
953
+ Parameters
954
+ ----------
955
+ buffer :
956
+ Device buffer containing the complete IPC message
957
+ schema : Schema
958
+ The schema for the record batch
959
+ dictionary_memo : DictionaryMemo, optional
960
+ If message contains dictionaries, must pass a populated
961
+ DictionaryMemo
962
+ pool : MemoryPool (optional)
963
+ Pool to allocate metadata from
964
+
965
+ Returns
966
+ -------
967
+ batch : RecordBatch
968
+ Reconstructed record batch, with device pointers
969
+
970
+ """
971
+ cdef:
972
+ shared_ptr[CSchema] schema_ = pyarrow_unwrap_schema(schema)
973
+ shared_ptr[CCudaBuffer] buffer_ = pyarrow_unwrap_cudabuffer(buffer)
974
+ CDictionaryMemo temp_memo
975
+ CDictionaryMemo* arg_dict_memo
976
+ CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
977
+ shared_ptr[CRecordBatch] batch
978
+
979
+ if dictionary_memo is not None:
980
+ arg_dict_memo = dictionary_memo.memo
981
+ else:
982
+ arg_dict_memo = &temp_memo
983
+
984
+ with nogil:
985
+ batch = GetResultValue(CudaReadRecordBatch(
986
+ schema_, arg_dict_memo, buffer_, pool_))
987
+ return pyarrow_wrap_batch(batch)
988
+
989
+
990
+ # Public API
991
+
992
+
993
+ cdef public api bint pyarrow_is_buffer(object buffer):
994
+ return isinstance(buffer, Buffer)
995
+
996
+ # cudabuffer
997
+
998
+ cdef public api bint pyarrow_is_cudabuffer(object buffer):
999
+ return isinstance(buffer, CudaBuffer)
1000
+
1001
+
1002
+ cdef public api object \
1003
+ pyarrow_wrap_cudabuffer_base(const shared_ptr[CCudaBuffer]& buf, base):
1004
+ cdef CudaBuffer result = CudaBuffer.__new__(CudaBuffer)
1005
+ result.init_cuda(buf, base)
1006
+ return result
1007
+
1008
+
1009
+ cdef public api object \
1010
+ pyarrow_wrap_cudabuffer(const shared_ptr[CCudaBuffer]& buf):
1011
+ cdef CudaBuffer result = CudaBuffer.__new__(CudaBuffer)
1012
+ result.init_cuda(buf, None)
1013
+ return result
1014
+
1015
+
1016
+ cdef public api shared_ptr[CCudaBuffer] pyarrow_unwrap_cudabuffer(object obj):
1017
+ if pyarrow_is_cudabuffer(obj):
1018
+ return (<CudaBuffer>obj).cuda_buffer
1019
+ raise TypeError('expected CudaBuffer instance, got %s'
1020
+ % (type(obj).__name__))
1021
+
1022
+ # cudahostbuffer
1023
+
1024
+ cdef public api bint pyarrow_is_cudahostbuffer(object buffer):
1025
+ return isinstance(buffer, HostBuffer)
1026
+
1027
+
1028
+ cdef public api object \
1029
+ pyarrow_wrap_cudahostbuffer(const shared_ptr[CCudaHostBuffer]& buf):
1030
+ cdef HostBuffer result = HostBuffer.__new__(HostBuffer)
1031
+ result.init_host(buf)
1032
+ return result
1033
+
1034
+
1035
+ cdef public api shared_ptr[CCudaHostBuffer] \
1036
+ pyarrow_unwrap_cudahostbuffer(object obj):
1037
+ if pyarrow_is_cudahostbuffer(obj):
1038
+ return (<HostBuffer>obj).host_buffer
1039
+ raise TypeError('expected HostBuffer instance, got %s'
1040
+ % (type(obj).__name__))
1041
+
1042
+ # cudacontext
1043
+
1044
+ cdef public api bint pyarrow_is_cudacontext(object ctx):
1045
+ return isinstance(ctx, Context)
1046
+
1047
+
1048
+ cdef public api object \
1049
+ pyarrow_wrap_cudacontext(const shared_ptr[CCudaContext]& ctx):
1050
+ cdef Context result = Context.__new__(Context)
1051
+ result.init(ctx)
1052
+ return result
1053
+
1054
+
1055
+ cdef public api shared_ptr[CCudaContext] \
1056
+ pyarrow_unwrap_cudacontext(object obj):
1057
+ if pyarrow_is_cudacontext(obj):
1058
+ return (<Context>obj).context
1059
+ raise TypeError('expected Context instance, got %s'
1060
+ % (type(obj).__name__))
1061
+
1062
+ # cudaipcmemhandle
1063
+
1064
+ cdef public api bint pyarrow_is_cudaipcmemhandle(object handle):
1065
+ return isinstance(handle, IpcMemHandle)
1066
+
1067
+
1068
+ cdef public api object \
1069
+ pyarrow_wrap_cudaipcmemhandle(shared_ptr[CCudaIpcMemHandle]& h):
1070
+ cdef IpcMemHandle result = IpcMemHandle.__new__(IpcMemHandle)
1071
+ result.init(h)
1072
+ return result
1073
+
1074
+
1075
+ cdef public api shared_ptr[CCudaIpcMemHandle] \
1076
+ pyarrow_unwrap_cudaipcmemhandle(object obj):
1077
+ if pyarrow_is_cudaipcmemhandle(obj):
1078
+ return (<IpcMemHandle>obj).handle
1079
+ raise TypeError('expected IpcMemHandle instance, got %s'
1080
+ % (type(obj).__name__))
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_dataset_parquet.pyx ADDED
@@ -0,0 +1,1053 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # cython: language_level = 3
19
+
20
+ """Dataset support for Parquet file format."""
21
+
22
+ from cython cimport binding
23
+ from cython.operator cimport dereference as deref
24
+
25
+ import os
26
+ import warnings
27
+
28
+ import pyarrow as pa
29
+ from pyarrow.lib cimport *
30
+ from pyarrow.lib import frombytes, tobytes, is_threading_enabled
31
+ from pyarrow.includes.libarrow cimport *
32
+ from pyarrow.includes.libarrow_dataset cimport *
33
+ from pyarrow.includes.libarrow_dataset_parquet cimport *
34
+ from pyarrow._fs cimport FileSystem
35
+
36
+ from pyarrow._compute cimport Expression, _bind
37
+ from pyarrow._dataset cimport (
38
+ _make_file_source,
39
+ DatasetFactory,
40
+ FileFormat,
41
+ FileFragment,
42
+ FileWriteOptions,
43
+ Fragment,
44
+ FragmentScanOptions,
45
+ CacheOptions,
46
+ Partitioning,
47
+ PartitioningFactory,
48
+ WrittenFile
49
+ )
50
+
51
+ from pyarrow._parquet cimport (
52
+ _create_writer_properties, _create_arrow_writer_properties,
53
+ FileMetaData,
54
+ )
55
+
56
+
57
+ try:
58
+ from pyarrow._dataset_parquet_encryption import (
59
+ set_encryption_config, set_decryption_config, set_decryption_properties
60
+ )
61
+ parquet_encryption_enabled = True
62
+ except ImportError:
63
+ parquet_encryption_enabled = False
64
+
65
+
66
+ cdef Expression _true = Expression._scalar(True)
67
+
68
+ ctypedef CParquetFileWriter* _CParquetFileWriterPtr
69
+
70
+
71
+ cdef class ParquetFileFormat(FileFormat):
72
+ """
73
+ FileFormat for Parquet
74
+
75
+ Parameters
76
+ ----------
77
+ read_options : ParquetReadOptions
78
+ Read options for the file.
79
+ default_fragment_scan_options : ParquetFragmentScanOptions
80
+ Scan Options for the file.
81
+ **kwargs : dict
82
+ Additional options for read option or scan option
83
+ """
84
+
85
+ cdef:
86
+ CParquetFileFormat* parquet_format
87
+
88
+ def __init__(self, read_options=None,
89
+ default_fragment_scan_options=None,
90
+ **kwargs):
91
+ cdef:
92
+ shared_ptr[CParquetFileFormat] wrapped
93
+ CParquetFileFormatReaderOptions* options
94
+
95
+ # Read/scan options
96
+ read_options_args = {option: kwargs[option] for option in kwargs
97
+ if option in _PARQUET_READ_OPTIONS}
98
+ scan_args = {option: kwargs[option] for option in kwargs
99
+ if option not in _PARQUET_READ_OPTIONS}
100
+ if read_options and read_options_args:
101
+ duplicates = ', '.join(sorted(read_options_args))
102
+ raise ValueError(f'If `read_options` is given, '
103
+ f'cannot specify {duplicates}')
104
+ if default_fragment_scan_options and scan_args:
105
+ duplicates = ', '.join(sorted(scan_args))
106
+ raise ValueError(f'If `default_fragment_scan_options` is given, '
107
+ f'cannot specify {duplicates}')
108
+
109
+ if read_options is None:
110
+ read_options = ParquetReadOptions(**read_options_args)
111
+ elif isinstance(read_options, dict):
112
+ # For backwards compatibility
113
+ duplicates = []
114
+ for option, value in read_options.items():
115
+ if option in _PARQUET_READ_OPTIONS:
116
+ read_options_args[option] = value
117
+ else:
118
+ duplicates.append(option)
119
+ scan_args[option] = value
120
+ if duplicates:
121
+ duplicates = ", ".join(duplicates)
122
+ warnings.warn(f'The scan options {duplicates} should be '
123
+ 'specified directly as keyword arguments')
124
+ read_options = ParquetReadOptions(**read_options_args)
125
+ elif not isinstance(read_options, ParquetReadOptions):
126
+ raise TypeError('`read_options` must be either a dictionary or an '
127
+ 'instance of ParquetReadOptions')
128
+
129
+ if default_fragment_scan_options is None:
130
+ default_fragment_scan_options = ParquetFragmentScanOptions(**scan_args)
131
+ elif isinstance(default_fragment_scan_options, dict):
132
+ default_fragment_scan_options = ParquetFragmentScanOptions(
133
+ **default_fragment_scan_options)
134
+ elif not isinstance(default_fragment_scan_options,
135
+ ParquetFragmentScanOptions):
136
+ raise TypeError('`default_fragment_scan_options` must be either a '
137
+ 'dictionary or an instance of '
138
+ 'ParquetFragmentScanOptions')
139
+
140
+ wrapped = make_shared[CParquetFileFormat]()
141
+
142
+ options = &(wrapped.get().reader_options)
143
+ if read_options.dictionary_columns is not None:
144
+ for column in read_options.dictionary_columns:
145
+ options.dict_columns.insert(tobytes(column))
146
+ options.coerce_int96_timestamp_unit = \
147
+ read_options._coerce_int96_timestamp_unit
148
+
149
+ self.init(<shared_ptr[CFileFormat]> wrapped)
150
+ self.default_fragment_scan_options = default_fragment_scan_options
151
+
152
+ cdef void init(self, const shared_ptr[CFileFormat]& sp):
153
+ FileFormat.init(self, sp)
154
+ self.parquet_format = <CParquetFileFormat*> sp.get()
155
+
156
+ cdef WrittenFile _finish_write(self, path, base_dir,
157
+ CFileWriter* file_writer):
158
+ cdef:
159
+ FileMetaData parquet_metadata
160
+ CParquetFileWriter* parquet_file_writer
161
+
162
+ parquet_metadata = None
163
+ parquet_file_writer = dynamic_cast[_CParquetFileWriterPtr](file_writer)
164
+ with nogil:
165
+ metadata = deref(
166
+ deref(parquet_file_writer).parquet_writer()).metadata()
167
+ if metadata:
168
+ parquet_metadata = FileMetaData()
169
+ parquet_metadata.init(metadata)
170
+ parquet_metadata.set_file_path(os.path.relpath(path, base_dir))
171
+
172
+ size = GetResultValue(file_writer.GetBytesWritten())
173
+
174
+ return WrittenFile(path, parquet_metadata, size)
175
+
176
+ @property
177
+ def read_options(self):
178
+ cdef CParquetFileFormatReaderOptions* options
179
+ options = &self.parquet_format.reader_options
180
+ parquet_read_options = ParquetReadOptions(
181
+ dictionary_columns={frombytes(col)
182
+ for col in options.dict_columns},
183
+ )
184
+ # Read options getter/setter works with strings so setting
185
+ # the private property which uses the C Type
186
+ parquet_read_options._coerce_int96_timestamp_unit = \
187
+ options.coerce_int96_timestamp_unit
188
+ return parquet_read_options
189
+
190
+ def make_write_options(self, **kwargs):
191
+ """
192
+ Parameters
193
+ ----------
194
+ **kwargs : dict
195
+
196
+ Returns
197
+ -------
198
+ pyarrow.dataset.FileWriteOptions
199
+ """
200
+ # Safeguard from calling make_write_options as a static class method
201
+ if not isinstance(self, ParquetFileFormat):
202
+ raise TypeError("make_write_options() should be called on "
203
+ "an instance of ParquetFileFormat")
204
+ opts = FileFormat.make_write_options(self)
205
+ (<ParquetFileWriteOptions> opts).update(**kwargs)
206
+ return opts
207
+
208
+ cdef _set_default_fragment_scan_options(self, FragmentScanOptions options):
209
+ if options.type_name == 'parquet':
210
+ self.parquet_format.default_fragment_scan_options = options.wrapped
211
+ else:
212
+ super()._set_default_fragment_scan_options(options)
213
+
214
+ def equals(self, ParquetFileFormat other):
215
+ """
216
+ Parameters
217
+ ----------
218
+ other : pyarrow.dataset.ParquetFileFormat
219
+
220
+ Returns
221
+ -------
222
+ bool
223
+ """
224
+ return (
225
+ self.read_options.equals(other.read_options) and
226
+ self.default_fragment_scan_options ==
227
+ other.default_fragment_scan_options
228
+ )
229
+
230
+ @property
231
+ def default_extname(self):
232
+ return "parquet"
233
+
234
+ def __reduce__(self):
235
+ return ParquetFileFormat, (self.read_options,
236
+ self.default_fragment_scan_options)
237
+
238
+ def __repr__(self):
239
+ return f"<ParquetFileFormat read_options={self.read_options}>"
240
+
241
+ def make_fragment(self, file, filesystem=None,
242
+ Expression partition_expression=None, row_groups=None, *, file_size=None):
243
+ """
244
+ Make a FileFragment from a given file.
245
+
246
+ Parameters
247
+ ----------
248
+ file : file-like object, path-like or str
249
+ The file or file path to make a fragment from.
250
+ filesystem : Filesystem, optional
251
+ If `filesystem` is given, `file` must be a string and specifies
252
+ the path of the file to read from the filesystem.
253
+ partition_expression : Expression, optional
254
+ An expression that is guaranteed true for all rows in the fragment. Allows
255
+ fragment to be potentially skipped while scanning with a filter.
256
+ row_groups : Iterable, optional
257
+ The indices of the row groups to include
258
+ file_size : int, optional
259
+ The size of the file in bytes. Can improve performance with high-latency filesystems
260
+ when file size needs to be known before reading.
261
+
262
+ Returns
263
+ -------
264
+ fragment : Fragment
265
+ The file fragment
266
+ """
267
+ cdef:
268
+ vector[int] c_row_groups
269
+ if partition_expression is None:
270
+ partition_expression = _true
271
+ if row_groups is None:
272
+ return super().make_fragment(file, filesystem,
273
+ partition_expression, file_size=file_size)
274
+
275
+ c_source = _make_file_source(file, filesystem, file_size)
276
+ c_row_groups = [<int> row_group for row_group in set(row_groups)]
277
+
278
+ c_fragment = <shared_ptr[CFragment]> GetResultValue(
279
+ self.parquet_format.MakeFragment(move(c_source),
280
+ partition_expression.unwrap(),
281
+ <shared_ptr[CSchema]>nullptr,
282
+ move(c_row_groups)))
283
+ return Fragment.wrap(move(c_fragment))
284
+
285
+
286
+ class RowGroupInfo:
287
+ """
288
+ A wrapper class for RowGroup information
289
+
290
+ Parameters
291
+ ----------
292
+ id : integer
293
+ The group ID.
294
+ metadata : FileMetaData
295
+ The rowgroup metadata.
296
+ schema : Schema
297
+ Schema of the rows.
298
+ """
299
+
300
+ def __init__(self, id, metadata, schema):
301
+ self.id = id
302
+ self.metadata = metadata
303
+ self.schema = schema
304
+
305
+ @property
306
+ def num_rows(self):
307
+ return self.metadata.num_rows
308
+
309
+ @property
310
+ def total_byte_size(self):
311
+ return self.metadata.total_byte_size
312
+
313
+ @property
314
+ def statistics(self):
315
+ def name_stats(i):
316
+ col = self.metadata.column(i)
317
+
318
+ stats = col.statistics
319
+ if stats is None or not stats.has_min_max:
320
+ return None, None
321
+
322
+ name = col.path_in_schema
323
+ field_index = self.schema.get_field_index(name)
324
+ if field_index < 0:
325
+ return None, None
326
+
327
+ typ = self.schema.field(field_index).type
328
+ return col.path_in_schema, {
329
+ 'min': pa.scalar(stats.min, type=typ).as_py(),
330
+ 'max': pa.scalar(stats.max, type=typ).as_py()
331
+ }
332
+
333
+ return {
334
+ name: stats for name, stats
335
+ in map(name_stats, range(self.metadata.num_columns))
336
+ if stats is not None
337
+ }
338
+
339
+ def __repr__(self):
340
+ return "RowGroupInfo({})".format(self.id)
341
+
342
+ def __eq__(self, other):
343
+ if isinstance(other, int):
344
+ return self.id == other
345
+ if not isinstance(other, RowGroupInfo):
346
+ return False
347
+ return self.id == other.id
348
+
349
+
350
+ cdef class ParquetFileFragment(FileFragment):
351
+ """A Fragment representing a parquet file."""
352
+
353
+ cdef:
354
+ CParquetFileFragment* parquet_file_fragment
355
+
356
+ cdef void init(self, const shared_ptr[CFragment]& sp):
357
+ FileFragment.init(self, sp)
358
+ self.parquet_file_fragment = <CParquetFileFragment*> sp.get()
359
+
360
+ def __reduce__(self):
361
+ buffer = self.buffer
362
+ # parquet_file_fragment.row_groups() is empty if the metadata
363
+ # information of the file is not yet populated
364
+ if not bool(self.parquet_file_fragment.row_groups()):
365
+ row_groups = None
366
+ else:
367
+ row_groups = [row_group.id for row_group in self.row_groups]
368
+
369
+ return self.format.make_fragment, (
370
+ self.path if buffer is None else buffer,
371
+ self.filesystem,
372
+ self.partition_expression,
373
+ row_groups
374
+ )
375
+
376
+ def ensure_complete_metadata(self):
377
+ """
378
+ Ensure that all metadata (statistics, physical schema, ...) have
379
+ been read and cached in this fragment.
380
+ """
381
+ with nogil:
382
+ check_status(self.parquet_file_fragment.EnsureCompleteMetadata())
383
+
384
+ @property
385
+ def row_groups(self):
386
+ metadata = self.metadata
387
+ cdef vector[int] row_groups = self.parquet_file_fragment.row_groups()
388
+ return [RowGroupInfo(i, metadata.row_group(i), self.physical_schema)
389
+ for i in row_groups]
390
+
391
+ @property
392
+ def metadata(self):
393
+ self.ensure_complete_metadata()
394
+ cdef FileMetaData metadata = FileMetaData()
395
+ metadata.init(self.parquet_file_fragment.metadata())
396
+ return metadata
397
+
398
+ @property
399
+ def num_row_groups(self):
400
+ """
401
+ Return the number of row groups viewed by this fragment (not the
402
+ number of row groups in the origin file).
403
+ """
404
+ self.ensure_complete_metadata()
405
+ return self.parquet_file_fragment.row_groups().size()
406
+
407
+ def split_by_row_group(self, Expression filter=None,
408
+ Schema schema=None):
409
+ """
410
+ Split the fragment into multiple fragments.
411
+
412
+ Yield a Fragment wrapping each row group in this ParquetFileFragment.
413
+ Row groups will be excluded whose metadata contradicts the optional
414
+ filter.
415
+
416
+ Parameters
417
+ ----------
418
+ filter : Expression, default None
419
+ Only include the row groups which satisfy this predicate (using
420
+ the Parquet RowGroup statistics).
421
+ schema : Schema, default None
422
+ Schema to use when filtering row groups. Defaults to the
423
+ Fragment's physical schema
424
+
425
+ Returns
426
+ -------
427
+ A list of Fragments
428
+ """
429
+ cdef:
430
+ vector[shared_ptr[CFragment]] c_fragments
431
+ CExpression c_filter
432
+ shared_ptr[CFragment] c_fragment
433
+
434
+ schema = schema or self.physical_schema
435
+ c_filter = _bind(filter, schema)
436
+ with nogil:
437
+ c_fragments = move(GetResultValue(
438
+ self.parquet_file_fragment.SplitByRowGroup(move(c_filter))))
439
+
440
+ return [Fragment.wrap(c_fragment) for c_fragment in c_fragments]
441
+
442
+ def subset(self, Expression filter=None, Schema schema=None,
443
+ object row_group_ids=None):
444
+ """
445
+ Create a subset of the fragment (viewing a subset of the row groups).
446
+
447
+ Subset can be specified by either a filter predicate (with optional
448
+ schema) or by a list of row group IDs. Note that when using a filter,
449
+ the resulting fragment can be empty (viewing no row groups).
450
+
451
+ Parameters
452
+ ----------
453
+ filter : Expression, default None
454
+ Only include the row groups which satisfy this predicate (using
455
+ the Parquet RowGroup statistics).
456
+ schema : Schema, default None
457
+ Schema to use when filtering row groups. Defaults to the
458
+ Fragment's physical schema
459
+ row_group_ids : list of ints
460
+ The row group IDs to include in the subset. Can only be specified
461
+ if `filter` is None.
462
+
463
+ Returns
464
+ -------
465
+ ParquetFileFragment
466
+ """
467
+ cdef:
468
+ CExpression c_filter
469
+ vector[int] c_row_group_ids
470
+ shared_ptr[CFragment] c_fragment
471
+
472
+ if filter is not None and row_group_ids is not None:
473
+ raise ValueError(
474
+ "Cannot specify both 'filter' and 'row_group_ids'."
475
+ )
476
+
477
+ if filter is not None:
478
+ schema = schema or self.physical_schema
479
+ c_filter = _bind(filter, schema)
480
+ with nogil:
481
+ c_fragment = move(GetResultValue(
482
+ self.parquet_file_fragment.SubsetWithFilter(
483
+ move(c_filter))))
484
+ elif row_group_ids is not None:
485
+ c_row_group_ids = [
486
+ <int> row_group for row_group in sorted(set(row_group_ids))
487
+ ]
488
+ with nogil:
489
+ c_fragment = move(GetResultValue(
490
+ self.parquet_file_fragment.SubsetWithIds(
491
+ move(c_row_group_ids))))
492
+ else:
493
+ raise ValueError(
494
+ "Need to specify one of 'filter' or 'row_group_ids'"
495
+ )
496
+
497
+ return Fragment.wrap(c_fragment)
498
+
499
+
500
+ cdef class ParquetReadOptions(_Weakrefable):
501
+ """
502
+ Parquet format specific options for reading.
503
+
504
+ Parameters
505
+ ----------
506
+ dictionary_columns : list of string, default None
507
+ Names of columns which should be dictionary encoded as
508
+ they are read
509
+ coerce_int96_timestamp_unit : str, default None
510
+ Cast timestamps that are stored in INT96 format to a particular
511
+ resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
512
+ and therefore INT96 timestamps will be inferred as timestamps
513
+ in nanoseconds
514
+ """
515
+
516
+ cdef public:
517
+ set dictionary_columns
518
+ TimeUnit _coerce_int96_timestamp_unit
519
+
520
+ # Also see _PARQUET_READ_OPTIONS
521
+ def __init__(self, dictionary_columns=None,
522
+ coerce_int96_timestamp_unit=None):
523
+ self.dictionary_columns = set(dictionary_columns or set())
524
+ self.coerce_int96_timestamp_unit = coerce_int96_timestamp_unit
525
+
526
+ @property
527
+ def coerce_int96_timestamp_unit(self):
528
+ return timeunit_to_string(self._coerce_int96_timestamp_unit)
529
+
530
+ @coerce_int96_timestamp_unit.setter
531
+ def coerce_int96_timestamp_unit(self, unit):
532
+ if unit is not None:
533
+ self._coerce_int96_timestamp_unit = string_to_timeunit(unit)
534
+ else:
535
+ self._coerce_int96_timestamp_unit = TimeUnit_NANO
536
+
537
+ def equals(self, ParquetReadOptions other):
538
+ """
539
+ Parameters
540
+ ----------
541
+ other : pyarrow.dataset.ParquetReadOptions
542
+
543
+ Returns
544
+ -------
545
+ bool
546
+ """
547
+ return (self.dictionary_columns == other.dictionary_columns and
548
+ self.coerce_int96_timestamp_unit ==
549
+ other.coerce_int96_timestamp_unit)
550
+
551
+ def __eq__(self, other):
552
+ try:
553
+ return self.equals(other)
554
+ except TypeError:
555
+ return False
556
+
557
+ def __repr__(self):
558
+ return (
559
+ f"<ParquetReadOptions"
560
+ f" dictionary_columns={self.dictionary_columns}"
561
+ f" coerce_int96_timestamp_unit={self.coerce_int96_timestamp_unit}>"
562
+ )
563
+
564
+
565
+ cdef class ParquetFileWriteOptions(FileWriteOptions):
566
+
567
+ def update(self, **kwargs):
568
+ """
569
+ Parameters
570
+ ----------
571
+ **kwargs : dict
572
+ """
573
+ arrow_fields = {
574
+ "use_deprecated_int96_timestamps",
575
+ "coerce_timestamps",
576
+ "allow_truncated_timestamps",
577
+ "use_compliant_nested_type",
578
+ }
579
+
580
+ setters = set()
581
+ for name, value in kwargs.items():
582
+ if name not in self._properties:
583
+ raise TypeError("unexpected parquet write option: " + name)
584
+ self._properties[name] = value
585
+ if name in arrow_fields:
586
+ setters.add(self._set_arrow_properties)
587
+ elif name == "encryption_config" and value is not None:
588
+ setters.add(self._set_encryption_config)
589
+ else:
590
+ setters.add(self._set_properties)
591
+
592
+ for setter in setters:
593
+ setter()
594
+
595
+ def _set_properties(self):
596
+ cdef CParquetFileWriteOptions* opts = self.parquet_options
597
+
598
+ opts.writer_properties = _create_writer_properties(
599
+ use_dictionary=self._properties["use_dictionary"],
600
+ compression=self._properties["compression"],
601
+ version=self._properties["version"],
602
+ write_statistics=self._properties["write_statistics"],
603
+ data_page_size=self._properties["data_page_size"],
604
+ compression_level=self._properties["compression_level"],
605
+ use_byte_stream_split=(
606
+ self._properties["use_byte_stream_split"]
607
+ ),
608
+ column_encoding=self._properties["column_encoding"],
609
+ data_page_version=self._properties["data_page_version"],
610
+ encryption_properties=self._properties["encryption_properties"],
611
+ write_batch_size=self._properties["write_batch_size"],
612
+ dictionary_pagesize_limit=self._properties["dictionary_pagesize_limit"],
613
+ write_page_index=self._properties["write_page_index"],
614
+ write_page_checksum=self._properties["write_page_checksum"],
615
+ sorting_columns=self._properties["sorting_columns"],
616
+ store_decimal_as_integer=self._properties["store_decimal_as_integer"],
617
+ )
618
+
619
+ def _set_arrow_properties(self):
620
+ cdef CParquetFileWriteOptions* opts = self.parquet_options
621
+
622
+ opts.arrow_writer_properties = _create_arrow_writer_properties(
623
+ use_deprecated_int96_timestamps=(
624
+ self._properties["use_deprecated_int96_timestamps"]
625
+ ),
626
+ coerce_timestamps=self._properties["coerce_timestamps"],
627
+ allow_truncated_timestamps=(
628
+ self._properties["allow_truncated_timestamps"]
629
+ ),
630
+ writer_engine_version="V2",
631
+ use_compliant_nested_type=(
632
+ self._properties["use_compliant_nested_type"]
633
+ )
634
+ )
635
+
636
+ def _set_encryption_config(self):
637
+ if not parquet_encryption_enabled:
638
+ raise NotImplementedError(
639
+ "Encryption is not enabled in your installation of pyarrow, but an "
640
+ "encryption_config was provided."
641
+ )
642
+ set_encryption_config(self, self._properties["encryption_config"])
643
+
644
+ cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
645
+ FileWriteOptions.init(self, sp)
646
+ self.parquet_options = <CParquetFileWriteOptions*> sp.get()
647
+ self._properties = dict(
648
+ use_dictionary=True,
649
+ compression="snappy",
650
+ version="2.6",
651
+ write_statistics=None,
652
+ data_page_size=None,
653
+ compression_level=None,
654
+ use_byte_stream_split=False,
655
+ column_encoding=None,
656
+ data_page_version="1.0",
657
+ use_deprecated_int96_timestamps=False,
658
+ coerce_timestamps=None,
659
+ allow_truncated_timestamps=False,
660
+ use_compliant_nested_type=True,
661
+ encryption_properties=None,
662
+ write_batch_size=None,
663
+ dictionary_pagesize_limit=None,
664
+ write_page_index=False,
665
+ encryption_config=None,
666
+ write_page_checksum=False,
667
+ sorting_columns=None,
668
+ store_decimal_as_integer=False,
669
+ )
670
+
671
+ self._set_properties()
672
+ self._set_arrow_properties()
673
+
674
+ def __repr__(self):
675
+ return "<pyarrow.dataset.ParquetFileWriteOptions {0}>".format(
676
+ " ".join([f"{key}={value}" for key, value in self._properties.items()])
677
+ )
678
+
679
+
680
+ cdef set _PARQUET_READ_OPTIONS = {
681
+ 'dictionary_columns', 'coerce_int96_timestamp_unit'
682
+ }
683
+
684
+
685
+ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
686
+ """
687
+ Scan-specific options for Parquet fragments.
688
+
689
+ Parameters
690
+ ----------
691
+ use_buffered_stream : bool, default False
692
+ Read files through buffered input streams rather than loading entire
693
+ row groups at once. This may be enabled to reduce memory overhead.
694
+ Disabled by default.
695
+ buffer_size : int, default 8192
696
+ Size of buffered stream, if enabled. Default is 8KB.
697
+ pre_buffer : bool, default True
698
+ If enabled, pre-buffer the raw Parquet data instead of issuing one
699
+ read per column chunk. This can improve performance on high-latency
700
+ filesystems (e.g. S3, GCS) by coalescing and issuing file reads in
701
+ parallel using a background I/O thread pool.
702
+ Set to False if you want to prioritize minimal memory usage
703
+ over maximum speed.
704
+ cache_options : pyarrow.CacheOptions, default None
705
+ Cache options used when pre_buffer is enabled. The default values should
706
+ be good for most use cases. You may want to adjust these for example if
707
+ you have exceptionally high latency to the file system.
708
+ thrift_string_size_limit : int, default None
709
+ If not None, override the maximum total string size allocated
710
+ when decoding Thrift structures. The default limit should be
711
+ sufficient for most Parquet files.
712
+ thrift_container_size_limit : int, default None
713
+ If not None, override the maximum total size of containers allocated
714
+ when decoding Thrift structures. The default limit should be
715
+ sufficient for most Parquet files.
716
+ decryption_config : pyarrow.dataset.ParquetDecryptionConfig, default None
717
+ If not None, use the provided ParquetDecryptionConfig to decrypt the
718
+ Parquet file.
719
+ decryption_properties : pyarrow.parquet.FileDecryptionProperties, default None
720
+ If not None, use the provided FileDecryptionProperties to decrypt encrypted
721
+ Parquet file.
722
+ page_checksum_verification : bool, default False
723
+ If True, verify the page checksum for each page read from the file.
724
+ """
725
+
726
+ # Avoid mistakingly creating attributes
727
+ __slots__ = ()
728
+
729
+ def __init__(self, *, bint use_buffered_stream=False,
730
+ buffer_size=8192,
731
+ bint pre_buffer=True,
732
+ cache_options=None,
733
+ thrift_string_size_limit=None,
734
+ thrift_container_size_limit=None,
735
+ decryption_config=None,
736
+ decryption_properties=None,
737
+ bint page_checksum_verification=False):
738
+ self.init(shared_ptr[CFragmentScanOptions](
739
+ new CParquetFragmentScanOptions()))
740
+ self.use_buffered_stream = use_buffered_stream
741
+ self.buffer_size = buffer_size
742
+ if pre_buffer and not is_threading_enabled():
743
+ pre_buffer = False
744
+ self.pre_buffer = pre_buffer
745
+ if cache_options is not None:
746
+ self.cache_options = cache_options
747
+ if thrift_string_size_limit is not None:
748
+ self.thrift_string_size_limit = thrift_string_size_limit
749
+ if thrift_container_size_limit is not None:
750
+ self.thrift_container_size_limit = thrift_container_size_limit
751
+ if decryption_config is not None:
752
+ self.parquet_decryption_config = decryption_config
753
+ if decryption_properties is not None:
754
+ self.decryption_properties = decryption_properties
755
+ self.page_checksum_verification = page_checksum_verification
756
+
757
+ cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp):
758
+ FragmentScanOptions.init(self, sp)
759
+ self.parquet_options = <CParquetFragmentScanOptions*> sp.get()
760
+
761
+ cdef CReaderProperties* reader_properties(self):
762
+ return self.parquet_options.reader_properties.get()
763
+
764
+ cdef ArrowReaderProperties* arrow_reader_properties(self):
765
+ return self.parquet_options.arrow_reader_properties.get()
766
+
767
+ @property
768
+ def use_buffered_stream(self):
769
+ return self.reader_properties().is_buffered_stream_enabled()
770
+
771
+ @use_buffered_stream.setter
772
+ def use_buffered_stream(self, bint use_buffered_stream):
773
+ if use_buffered_stream:
774
+ self.reader_properties().enable_buffered_stream()
775
+ else:
776
+ self.reader_properties().disable_buffered_stream()
777
+
778
+ @property
779
+ def buffer_size(self):
780
+ return self.reader_properties().buffer_size()
781
+
782
+ @buffer_size.setter
783
+ def buffer_size(self, buffer_size):
784
+ if buffer_size <= 0:
785
+ raise ValueError("Buffer size must be larger than zero")
786
+ self.reader_properties().set_buffer_size(buffer_size)
787
+
788
+ @property
789
+ def pre_buffer(self):
790
+ return self.arrow_reader_properties().pre_buffer()
791
+
792
+ @pre_buffer.setter
793
+ def pre_buffer(self, bint pre_buffer):
794
+ if pre_buffer and not is_threading_enabled():
795
+ return
796
+ self.arrow_reader_properties().set_pre_buffer(pre_buffer)
797
+
798
+ @property
799
+ def cache_options(self):
800
+ return CacheOptions.wrap(self.arrow_reader_properties().cache_options())
801
+
802
+ @cache_options.setter
803
+ def cache_options(self, CacheOptions options):
804
+ self.arrow_reader_properties().set_cache_options(options.unwrap())
805
+
806
+ @property
807
+ def thrift_string_size_limit(self):
808
+ return self.reader_properties().thrift_string_size_limit()
809
+
810
+ @thrift_string_size_limit.setter
811
+ def thrift_string_size_limit(self, size):
812
+ if size <= 0:
813
+ raise ValueError("size must be larger than zero")
814
+ self.reader_properties().set_thrift_string_size_limit(size)
815
+
816
+ @property
817
+ def thrift_container_size_limit(self):
818
+ return self.reader_properties().thrift_container_size_limit()
819
+
820
+ @thrift_container_size_limit.setter
821
+ def thrift_container_size_limit(self, size):
822
+ if size <= 0:
823
+ raise ValueError("size must be larger than zero")
824
+ self.reader_properties().set_thrift_container_size_limit(size)
825
+
826
+ @property
827
+ def decryption_properties(self):
828
+ if not parquet_encryption_enabled:
829
+ raise NotImplementedError(
830
+ "Unable to access encryption features. "
831
+ "Encryption is not enabled in your installation of pyarrow."
832
+ )
833
+ return self._decryption_properties
834
+
835
+ @decryption_properties.setter
836
+ def decryption_properties(self, config):
837
+ if not parquet_encryption_enabled:
838
+ raise NotImplementedError(
839
+ "Encryption is not enabled in your installation of pyarrow, but "
840
+ "decryption_properties were provided."
841
+ )
842
+ set_decryption_properties(self, config)
843
+ self._decryption_properties = config
844
+
845
+ @property
846
+ def parquet_decryption_config(self):
847
+ if not parquet_encryption_enabled:
848
+ raise NotImplementedError(
849
+ "Unable to access encryption features. "
850
+ "Encryption is not enabled in your installation of pyarrow."
851
+ )
852
+ return self._parquet_decryption_config
853
+
854
+ @parquet_decryption_config.setter
855
+ def parquet_decryption_config(self, config):
856
+ if not parquet_encryption_enabled:
857
+ raise NotImplementedError(
858
+ "Encryption is not enabled in your installation of pyarrow, but a "
859
+ "decryption_config was provided."
860
+ )
861
+ set_decryption_config(self, config)
862
+ self._parquet_decryption_config = config
863
+
864
+ @property
865
+ def page_checksum_verification(self):
866
+ return self.reader_properties().page_checksum_verification()
867
+
868
+ @page_checksum_verification.setter
869
+ def page_checksum_verification(self, bint page_checksum_verification):
870
+ self.reader_properties().set_page_checksum_verification(page_checksum_verification)
871
+
872
+ def equals(self, ParquetFragmentScanOptions other):
873
+ """
874
+ Parameters
875
+ ----------
876
+ other : pyarrow.dataset.ParquetFragmentScanOptions
877
+
878
+ Returns
879
+ -------
880
+ bool
881
+ """
882
+ attrs = (
883
+ self.use_buffered_stream, self.buffer_size, self.pre_buffer, self.cache_options,
884
+ self.thrift_string_size_limit, self.thrift_container_size_limit,
885
+ self.page_checksum_verification)
886
+ other_attrs = (
887
+ other.use_buffered_stream, other.buffer_size, other.pre_buffer, other.cache_options,
888
+ other.thrift_string_size_limit,
889
+ other.thrift_container_size_limit, other.page_checksum_verification)
890
+ return attrs == other_attrs
891
+
892
+ @staticmethod
893
+ @binding(True) # Required for Cython < 3
894
+ def _reconstruct(kwargs):
895
+ # __reduce__ doesn't allow passing named arguments directly to the
896
+ # reconstructor, hence this wrapper.
897
+ return ParquetFragmentScanOptions(**kwargs)
898
+
899
+ def __reduce__(self):
900
+ kwargs = dict(
901
+ use_buffered_stream=self.use_buffered_stream,
902
+ buffer_size=self.buffer_size,
903
+ pre_buffer=self.pre_buffer,
904
+ cache_options=self.cache_options,
905
+ thrift_string_size_limit=self.thrift_string_size_limit,
906
+ thrift_container_size_limit=self.thrift_container_size_limit,
907
+ page_checksum_verification=self.page_checksum_verification
908
+ )
909
+ return ParquetFragmentScanOptions._reconstruct, (kwargs,)
910
+
911
+
912
+ cdef class ParquetFactoryOptions(_Weakrefable):
913
+ """
914
+ Influences the discovery of parquet dataset.
915
+
916
+ Parameters
917
+ ----------
918
+ partition_base_dir : str, optional
919
+ For the purposes of applying the partitioning, paths will be
920
+ stripped of the partition_base_dir. Files not matching the
921
+ partition_base_dir prefix will be skipped for partitioning discovery.
922
+ The ignored files will still be part of the Dataset, but will not
923
+ have partition information.
924
+ partitioning : Partitioning, PartitioningFactory, optional
925
+ The partitioning scheme applied to fragments, see ``Partitioning``.
926
+ validate_column_chunk_paths : bool, default False
927
+ Assert that all ColumnChunk paths are consistent. The parquet spec
928
+ allows for ColumnChunk data to be stored in multiple files, but
929
+ ParquetDatasetFactory supports only a single file with all ColumnChunk
930
+ data. If this flag is set construction of a ParquetDatasetFactory will
931
+ raise an error if ColumnChunk data is not resident in a single file.
932
+ """
933
+
934
+ cdef:
935
+ CParquetFactoryOptions options
936
+
937
+ __slots__ = () # avoid mistakingly creating attributes
938
+
939
+ def __init__(self, partition_base_dir=None, partitioning=None,
940
+ validate_column_chunk_paths=False):
941
+ if isinstance(partitioning, PartitioningFactory):
942
+ self.partitioning_factory = partitioning
943
+ elif isinstance(partitioning, Partitioning):
944
+ self.partitioning = partitioning
945
+
946
+ if partition_base_dir is not None:
947
+ self.partition_base_dir = partition_base_dir
948
+
949
+ self.options.validate_column_chunk_paths = validate_column_chunk_paths
950
+
951
+ cdef inline CParquetFactoryOptions unwrap(self):
952
+ return self.options
953
+
954
+ @property
955
+ def partitioning(self):
956
+ """Partitioning to apply to discovered files.
957
+
958
+ NOTE: setting this property will overwrite partitioning_factory.
959
+ """
960
+ c_partitioning = self.options.partitioning.partitioning()
961
+ if c_partitioning.get() == nullptr:
962
+ return None
963
+ return Partitioning.wrap(c_partitioning)
964
+
965
+ @partitioning.setter
966
+ def partitioning(self, Partitioning value):
967
+ self.options.partitioning = (<Partitioning> value).unwrap()
968
+
969
+ @property
970
+ def partitioning_factory(self):
971
+ """PartitioningFactory to apply to discovered files and
972
+ discover a Partitioning.
973
+
974
+ NOTE: setting this property will overwrite partitioning.
975
+ """
976
+ c_factory = self.options.partitioning.factory()
977
+ if c_factory.get() == nullptr:
978
+ return None
979
+ return PartitioningFactory.wrap(c_factory, None, None)
980
+
981
+ @partitioning_factory.setter
982
+ def partitioning_factory(self, PartitioningFactory value):
983
+ self.options.partitioning = (<PartitioningFactory> value).unwrap()
984
+
985
+ @property
986
+ def partition_base_dir(self):
987
+ """
988
+ Base directory to strip paths before applying the partitioning.
989
+ """
990
+ return frombytes(self.options.partition_base_dir)
991
+
992
+ @partition_base_dir.setter
993
+ def partition_base_dir(self, value):
994
+ self.options.partition_base_dir = tobytes(value)
995
+
996
+ @property
997
+ def validate_column_chunk_paths(self):
998
+ """
999
+ Base directory to strip paths before applying the partitioning.
1000
+ """
1001
+ return self.options.validate_column_chunk_paths
1002
+
1003
+ @validate_column_chunk_paths.setter
1004
+ def validate_column_chunk_paths(self, value):
1005
+ self.options.validate_column_chunk_paths = value
1006
+
1007
+
1008
+ cdef class ParquetDatasetFactory(DatasetFactory):
1009
+ """
1010
+ Create a ParquetDatasetFactory from a Parquet `_metadata` file.
1011
+
1012
+ Parameters
1013
+ ----------
1014
+ metadata_path : str
1015
+ Path to the `_metadata` parquet metadata-only file generated with
1016
+ `pyarrow.parquet.write_metadata`.
1017
+ filesystem : pyarrow.fs.FileSystem
1018
+ Filesystem to read the metadata_path from, and subsequent parquet
1019
+ files.
1020
+ format : ParquetFileFormat
1021
+ Parquet format options.
1022
+ options : ParquetFactoryOptions, optional
1023
+ Various flags influencing the discovery of filesystem paths.
1024
+ """
1025
+
1026
+ cdef:
1027
+ CParquetDatasetFactory* parquet_factory
1028
+
1029
+ def __init__(self, metadata_path, FileSystem filesystem not None,
1030
+ FileFormat format not None,
1031
+ ParquetFactoryOptions options=None):
1032
+ cdef:
1033
+ c_string c_path
1034
+ shared_ptr[CFileSystem] c_filesystem
1035
+ shared_ptr[CParquetFileFormat] c_format
1036
+ CResult[shared_ptr[CDatasetFactory]] result
1037
+ CParquetFactoryOptions c_options
1038
+
1039
+ c_path = tobytes(metadata_path)
1040
+ c_filesystem = filesystem.unwrap()
1041
+ c_format = static_pointer_cast[CParquetFileFormat, CFileFormat](
1042
+ format.unwrap())
1043
+ options = options or ParquetFactoryOptions()
1044
+ c_options = options.unwrap()
1045
+
1046
+ with nogil:
1047
+ result = CParquetDatasetFactory.MakeFromMetaDataPath(
1048
+ c_path, c_filesystem, c_format, c_options)
1049
+ self.init(GetResultValue(result))
1050
+
1051
+ cdef init(self, shared_ptr[CDatasetFactory]& sp):
1052
+ DatasetFactory.init(self, sp)
1053
+ self.parquet_factory = <CParquetDatasetFactory*> sp.get()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_fs.pxd ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # cython: language_level = 3
19
+
20
+ from pyarrow.includes.common cimport *
21
+ from pyarrow.includes.libarrow_fs cimport *
22
+ from pyarrow.lib import _detect_compression, frombytes, tobytes
23
+ from pyarrow.lib cimport *
24
+
25
+
26
+ cpdef enum FileType:
27
+ NotFound = <int8_t> CFileType_NotFound
28
+ Unknown = <int8_t> CFileType_Unknown
29
+ File = <int8_t> CFileType_File
30
+ Directory = <int8_t> CFileType_Directory
31
+
32
+
33
+ cdef class FileInfo(_Weakrefable):
34
+ cdef:
35
+ CFileInfo info
36
+
37
+ @staticmethod
38
+ cdef wrap(CFileInfo info)
39
+
40
+ cdef inline CFileInfo unwrap(self) nogil
41
+
42
+ @staticmethod
43
+ cdef CFileInfo unwrap_safe(obj)
44
+
45
+
46
+ cdef class FileSelector(_Weakrefable):
47
+ cdef:
48
+ CFileSelector selector
49
+
50
+ @staticmethod
51
+ cdef FileSelector wrap(CFileSelector selector)
52
+
53
+ cdef inline CFileSelector unwrap(self) nogil
54
+
55
+
56
+ cdef class FileSystem(_Weakrefable):
57
+ cdef:
58
+ shared_ptr[CFileSystem] wrapped
59
+ CFileSystem* fs
60
+
61
+ cdef init(self, const shared_ptr[CFileSystem]& wrapped)
62
+
63
+ @staticmethod
64
+ cdef wrap(const shared_ptr[CFileSystem]& sp)
65
+
66
+ cdef inline shared_ptr[CFileSystem] unwrap(self) nogil
67
+
68
+
69
+ cdef class LocalFileSystem(FileSystem):
70
+ cdef init(self, const shared_ptr[CFileSystem]& wrapped)
71
+
72
+
73
+ cdef class SubTreeFileSystem(FileSystem):
74
+ cdef:
75
+ CSubTreeFileSystem* subtreefs
76
+
77
+ cdef init(self, const shared_ptr[CFileSystem]& wrapped)
78
+
79
+
80
+ cdef class _MockFileSystem(FileSystem):
81
+ cdef:
82
+ CMockFileSystem* mockfs
83
+
84
+ cdef init(self, const shared_ptr[CFileSystem]& wrapped)
85
+
86
+
87
+ cdef class PyFileSystem(FileSystem):
88
+ cdef:
89
+ CPyFileSystem* pyfs
90
+
91
+ cdef init(self, const shared_ptr[CFileSystem]& wrapped)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_hdfs.pyx ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # cython: language_level = 3
19
+
20
+ from cython cimport binding
21
+
22
+ from pyarrow.includes.common cimport *
23
+ from pyarrow.includes.libarrow cimport *
24
+ from pyarrow.includes.libarrow_fs cimport *
25
+ from pyarrow._fs cimport FileSystem
26
+
27
+ from pyarrow.lib import frombytes, tobytes
28
+ from pyarrow.util import _stringify_path
29
+
30
+
31
+ cdef class HadoopFileSystem(FileSystem):
32
+ """
33
+ HDFS backed FileSystem implementation
34
+
35
+ Parameters
36
+ ----------
37
+ host : str
38
+ HDFS host to connect to. Set to "default" for fs.defaultFS from
39
+ core-site.xml.
40
+ port : int, default 8020
41
+ HDFS port to connect to. Set to 0 for default or logical (HA) nodes.
42
+ user : str, default None
43
+ Username when connecting to HDFS; None implies login user.
44
+ replication : int, default 3
45
+ Number of copies each block will have.
46
+ buffer_size : int, default 0
47
+ If 0, no buffering will happen otherwise the size of the temporary read
48
+ and write buffer.
49
+ default_block_size : int, default None
50
+ None means the default configuration for HDFS, a typical block size is
51
+ 128 MB.
52
+ kerb_ticket : string or path, default None
53
+ If not None, the path to the Kerberos ticket cache.
54
+ extra_conf : dict, default None
55
+ Extra key/value pairs for configuration; will override any
56
+ hdfs-site.xml properties.
57
+
58
+ Examples
59
+ --------
60
+ >>> from pyarrow import fs
61
+ >>> hdfs = fs.HadoopFileSystem(host, port, user=user, kerb_ticket=ticket_cache_path) # doctest: +SKIP
62
+
63
+ For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
64
+ """
65
+
66
+ cdef:
67
+ CHadoopFileSystem* hdfs
68
+
69
+ def __init__(self, str host, int port=8020, *, str user=None,
70
+ int replication=3, int buffer_size=0,
71
+ default_block_size=None, kerb_ticket=None,
72
+ extra_conf=None):
73
+ cdef:
74
+ CHdfsOptions options
75
+ shared_ptr[CHadoopFileSystem] wrapped
76
+
77
+ if not host.startswith(('hdfs://', 'viewfs://')) and host != "default":
78
+ # TODO(kszucs): do more sanitization
79
+ host = 'hdfs://{}'.format(host)
80
+
81
+ options.ConfigureEndPoint(tobytes(host), int(port))
82
+ options.ConfigureReplication(replication)
83
+ options.ConfigureBufferSize(buffer_size)
84
+
85
+ if user is not None:
86
+ options.ConfigureUser(tobytes(user))
87
+ if default_block_size is not None:
88
+ options.ConfigureBlockSize(default_block_size)
89
+ if kerb_ticket is not None:
90
+ options.ConfigureKerberosTicketCachePath(
91
+ tobytes(_stringify_path(kerb_ticket)))
92
+ if extra_conf is not None:
93
+ for k, v in extra_conf.items():
94
+ options.ConfigureExtraConf(tobytes(k), tobytes(v))
95
+
96
+ with nogil:
97
+ wrapped = GetResultValue(CHadoopFileSystem.Make(options))
98
+ self.init(<shared_ptr[CFileSystem]> wrapped)
99
+
100
+ cdef init(self, const shared_ptr[CFileSystem]& wrapped):
101
+ FileSystem.init(self, wrapped)
102
+ self.hdfs = <CHadoopFileSystem*> wrapped.get()
103
+
104
+ @staticmethod
105
+ def from_uri(uri):
106
+ """
107
+ Instantiate HadoopFileSystem object from an URI string.
108
+
109
+ The following two calls are equivalent
110
+
111
+ * ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
112
+ &replication=1')``
113
+ * ``HadoopFileSystem('localhost', port=8020, user='test', \
114
+ replication=1)``
115
+
116
+ Parameters
117
+ ----------
118
+ uri : str
119
+ A string URI describing the connection to HDFS.
120
+ In order to change the user, replication, buffer_size or
121
+ default_block_size pass the values as query parts.
122
+
123
+ Returns
124
+ -------
125
+ HadoopFileSystem
126
+ """
127
+ cdef:
128
+ HadoopFileSystem self = HadoopFileSystem.__new__(HadoopFileSystem)
129
+ shared_ptr[CHadoopFileSystem] wrapped
130
+ CHdfsOptions options
131
+
132
+ options = GetResultValue(CHdfsOptions.FromUriString(tobytes(uri)))
133
+ with nogil:
134
+ wrapped = GetResultValue(CHadoopFileSystem.Make(options))
135
+
136
+ self.init(<shared_ptr[CFileSystem]> wrapped)
137
+ return self
138
+
139
+ @staticmethod
140
+ @binding(True) # Required for cython < 3
141
+ def _reconstruct(kwargs):
142
+ # __reduce__ doesn't allow passing named arguments directly to the
143
+ # reconstructor, hence this wrapper.
144
+ return HadoopFileSystem(**kwargs)
145
+
146
+ def __reduce__(self):
147
+ cdef CHdfsOptions opts = self.hdfs.options()
148
+ return (
149
+ HadoopFileSystem._reconstruct, (dict(
150
+ host=frombytes(opts.connection_config.host),
151
+ port=opts.connection_config.port,
152
+ user=frombytes(opts.connection_config.user),
153
+ replication=opts.replication,
154
+ buffer_size=opts.buffer_size,
155
+ default_block_size=opts.default_block_size,
156
+ kerb_ticket=frombytes(opts.connection_config.kerb_ticket),
157
+ extra_conf={frombytes(k): frombytes(v)
158
+ for k, v in opts.connection_config.extra_conf},
159
+ ),)
160
+ )
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pxd ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # cython: language_level = 3
19
+
20
+ from pyarrow.includes.libarrow cimport *
21
+ from pyarrow.lib cimport _Weakrefable
22
+
23
+
24
+ cdef class ParseOptions(_Weakrefable):
25
+ cdef:
26
+ CJSONParseOptions options
27
+
28
+ @staticmethod
29
+ cdef ParseOptions wrap(CJSONParseOptions options)
30
+
31
+ cdef class ReadOptions(_Weakrefable):
32
+ cdef:
33
+ CJSONReadOptions options
34
+
35
+ @staticmethod
36
+ cdef ReadOptions wrap(CJSONReadOptions options)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pyx ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # cython: profile=False
19
+ # distutils: language = c++
20
+ # cython: language_level = 3
21
+
22
+ from pyarrow.includes.common cimport *
23
+ from pyarrow.includes.libarrow cimport *
24
+ from pyarrow.lib cimport (_Weakrefable, MemoryPool,
25
+ maybe_unbox_memory_pool,
26
+ get_input_stream, pyarrow_wrap_table,
27
+ pyarrow_wrap_schema, pyarrow_unwrap_schema)
28
+
29
+
30
+ cdef class ReadOptions(_Weakrefable):
31
+ """
32
+ Options for reading JSON files.
33
+
34
+ Parameters
35
+ ----------
36
+ use_threads : bool, optional (default True)
37
+ Whether to use multiple threads to accelerate reading
38
+ block_size : int, optional
39
+ How much bytes to process at a time from the input stream.
40
+ This will determine multi-threading granularity as well as
41
+ the size of individual chunks in the Table.
42
+ """
43
+
44
+ # Avoid mistakingly creating attributes
45
+ __slots__ = ()
46
+
47
+ def __init__(self, use_threads=None, block_size=None):
48
+ self.options = CJSONReadOptions.Defaults()
49
+ if use_threads is not None:
50
+ self.use_threads = use_threads
51
+ if block_size is not None:
52
+ self.block_size = block_size
53
+
54
+ @property
55
+ def use_threads(self):
56
+ """
57
+ Whether to use multiple threads to accelerate reading.
58
+ """
59
+ return self.options.use_threads
60
+
61
+ @use_threads.setter
62
+ def use_threads(self, value):
63
+ self.options.use_threads = value
64
+
65
+ @property
66
+ def block_size(self):
67
+ """
68
+ How much bytes to process at a time from the input stream.
69
+
70
+ This will determine multi-threading granularity as well as the size of
71
+ individual chunks in the Table.
72
+ """
73
+ return self.options.block_size
74
+
75
+ @block_size.setter
76
+ def block_size(self, value):
77
+ self.options.block_size = value
78
+
79
+ def __reduce__(self):
80
+ return ReadOptions, (
81
+ self.use_threads,
82
+ self.block_size
83
+ )
84
+
85
+ def equals(self, ReadOptions other):
86
+ """
87
+ Parameters
88
+ ----------
89
+ other : pyarrow.json.ReadOptions
90
+
91
+ Returns
92
+ -------
93
+ bool
94
+ """
95
+ return (
96
+ self.use_threads == other.use_threads and
97
+ self.block_size == other.block_size
98
+ )
99
+
100
+ def __eq__(self, other):
101
+ try:
102
+ return self.equals(other)
103
+ except TypeError:
104
+ return False
105
+
106
+ @staticmethod
107
+ cdef ReadOptions wrap(CJSONReadOptions options):
108
+ out = ReadOptions()
109
+ out.options = options # shallow copy
110
+ return out
111
+
112
+
113
+ cdef class ParseOptions(_Weakrefable):
114
+ """
115
+ Options for parsing JSON files.
116
+
117
+ Parameters
118
+ ----------
119
+ explicit_schema : Schema, optional (default None)
120
+ Optional explicit schema (no type inference, ignores other fields).
121
+ newlines_in_values : bool, optional (default False)
122
+ Whether objects may be printed across multiple lines (for example
123
+ pretty printed). If false, input must end with an empty line.
124
+ unexpected_field_behavior : str, default "infer"
125
+ How JSON fields outside of explicit_schema (if given) are treated.
126
+
127
+ Possible behaviors:
128
+
129
+ - "ignore": unexpected JSON fields are ignored
130
+ - "error": error out on unexpected JSON fields
131
+ - "infer": unexpected JSON fields are type-inferred and included in
132
+ the output
133
+ """
134
+
135
+ __slots__ = ()
136
+
137
+ def __init__(self, explicit_schema=None, newlines_in_values=None,
138
+ unexpected_field_behavior=None):
139
+ self.options = CJSONParseOptions.Defaults()
140
+ if explicit_schema is not None:
141
+ self.explicit_schema = explicit_schema
142
+ if newlines_in_values is not None:
143
+ self.newlines_in_values = newlines_in_values
144
+ if unexpected_field_behavior is not None:
145
+ self.unexpected_field_behavior = unexpected_field_behavior
146
+
147
+ def __reduce__(self):
148
+ return ParseOptions, (
149
+ self.explicit_schema,
150
+ self.newlines_in_values,
151
+ self.unexpected_field_behavior
152
+ )
153
+
154
+ @property
155
+ def explicit_schema(self):
156
+ """
157
+ Optional explicit schema (no type inference, ignores other fields)
158
+ """
159
+ if self.options.explicit_schema.get() == NULL:
160
+ return None
161
+ else:
162
+ return pyarrow_wrap_schema(self.options.explicit_schema)
163
+
164
+ @explicit_schema.setter
165
+ def explicit_schema(self, value):
166
+ self.options.explicit_schema = pyarrow_unwrap_schema(value)
167
+
168
+ @property
169
+ def newlines_in_values(self):
170
+ """
171
+ Whether newline characters are allowed in JSON values.
172
+ Setting this to True reduces the performance of multi-threaded
173
+ JSON reading.
174
+ """
175
+ return self.options.newlines_in_values
176
+
177
+ @newlines_in_values.setter
178
+ def newlines_in_values(self, value):
179
+ self.options.newlines_in_values = value
180
+
181
+ @property
182
+ def unexpected_field_behavior(self):
183
+ """
184
+ How JSON fields outside of explicit_schema (if given) are treated.
185
+
186
+ Possible behaviors:
187
+
188
+ - "ignore": unexpected JSON fields are ignored
189
+ - "error": error out on unexpected JSON fields
190
+ - "infer": unexpected JSON fields are type-inferred and included in
191
+ the output
192
+
193
+ Set to "infer" by default.
194
+ """
195
+ v = self.options.unexpected_field_behavior
196
+ if v == CUnexpectedFieldBehavior_Ignore:
197
+ return "ignore"
198
+ elif v == CUnexpectedFieldBehavior_Error:
199
+ return "error"
200
+ elif v == CUnexpectedFieldBehavior_InferType:
201
+ return "infer"
202
+ else:
203
+ raise ValueError('Unexpected value for unexpected_field_behavior')
204
+
205
+ @unexpected_field_behavior.setter
206
+ def unexpected_field_behavior(self, value):
207
+ cdef CUnexpectedFieldBehavior v
208
+
209
+ if value == "ignore":
210
+ v = CUnexpectedFieldBehavior_Ignore
211
+ elif value == "error":
212
+ v = CUnexpectedFieldBehavior_Error
213
+ elif value == "infer":
214
+ v = CUnexpectedFieldBehavior_InferType
215
+ else:
216
+ raise ValueError(
217
+ "Unexpected value `{}` for `unexpected_field_behavior`, pass "
218
+ "either `ignore`, `error` or `infer`.".format(value)
219
+ )
220
+
221
+ self.options.unexpected_field_behavior = v
222
+
223
+ def equals(self, ParseOptions other):
224
+ """
225
+ Parameters
226
+ ----------
227
+ other : pyarrow.json.ParseOptions
228
+
229
+ Returns
230
+ -------
231
+ bool
232
+ """
233
+ return (
234
+ self.explicit_schema == other.explicit_schema and
235
+ self.newlines_in_values == other.newlines_in_values and
236
+ self.unexpected_field_behavior == other.unexpected_field_behavior
237
+ )
238
+
239
+ def __eq__(self, other):
240
+ try:
241
+ return self.equals(other)
242
+ except TypeError:
243
+ return False
244
+
245
+ @staticmethod
246
+ cdef ParseOptions wrap(CJSONParseOptions options):
247
+ out = ParseOptions()
248
+ out.options = options # shallow copy
249
+ return out
250
+
251
+
252
+ cdef _get_reader(input_file, shared_ptr[CInputStream]* out):
253
+ use_memory_map = False
254
+ get_input_stream(input_file, use_memory_map, out)
255
+
256
+ cdef _get_read_options(ReadOptions read_options, CJSONReadOptions* out):
257
+ if read_options is None:
258
+ out[0] = CJSONReadOptions.Defaults()
259
+ else:
260
+ out[0] = read_options.options
261
+
262
+ cdef _get_parse_options(ParseOptions parse_options, CJSONParseOptions* out):
263
+ if parse_options is None:
264
+ out[0] = CJSONParseOptions.Defaults()
265
+ else:
266
+ out[0] = parse_options.options
267
+
268
+
269
+ def read_json(input_file, read_options=None, parse_options=None,
270
+ MemoryPool memory_pool=None):
271
+ """
272
+ Read a Table from a stream of JSON data.
273
+
274
+ Parameters
275
+ ----------
276
+ input_file : str, path or file-like object
277
+ The location of JSON data. Currently only the line-delimited JSON
278
+ format is supported.
279
+ read_options : pyarrow.json.ReadOptions, optional
280
+ Options for the JSON reader (see ReadOptions constructor for defaults).
281
+ parse_options : pyarrow.json.ParseOptions, optional
282
+ Options for the JSON parser
283
+ (see ParseOptions constructor for defaults).
284
+ memory_pool : MemoryPool, optional
285
+ Pool to allocate Table memory from.
286
+
287
+ Returns
288
+ -------
289
+ :class:`pyarrow.Table`
290
+ Contents of the JSON file as a in-memory table.
291
+ """
292
+ cdef:
293
+ shared_ptr[CInputStream] stream
294
+ CJSONReadOptions c_read_options
295
+ CJSONParseOptions c_parse_options
296
+ shared_ptr[CJSONReader] reader
297
+ shared_ptr[CTable] table
298
+
299
+ _get_reader(input_file, &stream)
300
+ _get_read_options(read_options, &c_read_options)
301
+ _get_parse_options(parse_options, &c_parse_options)
302
+
303
+ reader = GetResultValue(
304
+ CJSONReader.Make(maybe_unbox_memory_pool(memory_pool),
305
+ stream, c_read_options, c_parse_options))
306
+
307
+ with nogil:
308
+ table = GetResultValue(reader.get().Read())
309
+
310
+ return pyarrow_wrap_table(table)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_orc.pyx ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # cython: profile=False
19
+ # distutils: language = c++
20
+
21
+ from cython.operator cimport dereference as deref
22
+ from libcpp.vector cimport vector as std_vector
23
+ from libcpp.utility cimport move
24
+ from pyarrow.includes.common cimport *
25
+ from pyarrow.includes.libarrow cimport *
26
+ from pyarrow.lib cimport (check_status, _Weakrefable,
27
+ MemoryPool, maybe_unbox_memory_pool,
28
+ pyarrow_wrap_schema,
29
+ pyarrow_wrap_batch,
30
+ Table,
31
+ pyarrow_wrap_table,
32
+ pyarrow_wrap_metadata,
33
+ pyarrow_unwrap_table,
34
+ get_reader,
35
+ get_writer)
36
+ from pyarrow.lib import frombytes, tobytes
37
+ from pyarrow.util import _stringify_path
38
+
39
+
40
+ cdef compression_type_from_enum(CCompressionType compression_type):
41
+ compression_map = {
42
+ CCompressionType_UNCOMPRESSED: 'UNCOMPRESSED',
43
+ CCompressionType_GZIP: 'ZLIB',
44
+ CCompressionType_SNAPPY: 'SNAPPY',
45
+ CCompressionType_LZ4: 'LZ4',
46
+ CCompressionType_ZSTD: 'ZSTD',
47
+ }
48
+ if compression_type in compression_map:
49
+ return compression_map[compression_type]
50
+ raise ValueError('Unsupported compression')
51
+
52
+
53
+ cdef CCompressionType compression_type_from_name(name) except *:
54
+ if not isinstance(name, str):
55
+ raise TypeError('compression must be a string')
56
+ name = name.upper()
57
+ if name == 'ZLIB':
58
+ return CCompressionType_GZIP
59
+ elif name == 'SNAPPY':
60
+ return CCompressionType_SNAPPY
61
+ elif name == 'LZ4':
62
+ return CCompressionType_LZ4
63
+ elif name == 'ZSTD':
64
+ return CCompressionType_ZSTD
65
+ elif name == 'UNCOMPRESSED':
66
+ return CCompressionType_UNCOMPRESSED
67
+ raise ValueError(f'Unknown CompressionKind: {name}')
68
+
69
+
70
+ cdef compression_strategy_from_enum(
71
+ CompressionStrategy compression_strategy
72
+ ):
73
+ compression_strategy_map = {
74
+ _CompressionStrategy_SPEED: 'SPEED',
75
+ _CompressionStrategy_COMPRESSION: 'COMPRESSION',
76
+ }
77
+ if compression_strategy in compression_strategy_map:
78
+ return compression_strategy_map[compression_strategy]
79
+ raise ValueError('Unsupported compression strategy')
80
+
81
+
82
+ cdef CompressionStrategy compression_strategy_from_name(name) except *:
83
+ if not isinstance(name, str):
84
+ raise TypeError('compression strategy must be a string')
85
+ name = name.upper()
86
+ if name == 'COMPRESSION':
87
+ return _CompressionStrategy_COMPRESSION
88
+ elif name == 'SPEED':
89
+ return _CompressionStrategy_SPEED
90
+ raise ValueError(f'Unknown CompressionStrategy: {name}')
91
+
92
+
93
+ cdef file_version_from_class(FileVersion file_version):
94
+ return frombytes(file_version.ToString())
95
+
96
+
97
+ cdef writer_id_from_enum(WriterId writer_id):
98
+ writer_id_map = {
99
+ _WriterId_ORC_JAVA_WRITER: 'ORC_JAVA',
100
+ _WriterId_ORC_CPP_WRITER: 'ORC_CPP',
101
+ _WriterId_PRESTO_WRITER: 'PRESTO',
102
+ _WriterId_SCRITCHLEY_GO: 'SCRITCHLEY_GO',
103
+ _WriterId_TRINO_WRITER: 'TRINO',
104
+ }
105
+ if writer_id in writer_id_map:
106
+ return writer_id_map[writer_id]
107
+ raise ValueError('Unsupported writer ID')
108
+
109
+
110
+ cdef writer_version_from_enum(WriterVersion writer_version):
111
+ writer_version_map = {
112
+ _WriterVersion_ORIGINAL: 'ORIGINAL',
113
+ _WriterVersion_HIVE_8732: 'HIVE_8732',
114
+ _WriterVersion_HIVE_4243: 'HIVE_4243',
115
+ _WriterVersion_HIVE_12055: 'HIVE_12055',
116
+ _WriterVersion_HIVE_13083: 'HIVE_13083',
117
+ _WriterVersion_ORC_101: 'ORC_101',
118
+ _WriterVersion_ORC_135: 'ORC_135',
119
+ _WriterVersion_ORC_517: 'ORC_517',
120
+ _WriterVersion_ORC_203: 'ORC_203',
121
+ _WriterVersion_ORC_14: 'ORC_14',
122
+ }
123
+ if writer_version in writer_version_map:
124
+ return writer_version_map[writer_version]
125
+ raise ValueError('Unsupported writer version')
126
+
127
+
128
+ cdef shared_ptr[WriteOptions] _create_write_options(
129
+ file_version=None,
130
+ batch_size=None,
131
+ stripe_size=None,
132
+ compression=None,
133
+ compression_block_size=None,
134
+ compression_strategy=None,
135
+ row_index_stride=None,
136
+ padding_tolerance=None,
137
+ dictionary_key_size_threshold=None,
138
+ bloom_filter_columns=None,
139
+ bloom_filter_fpp=None
140
+ ) except *:
141
+ """General writer options"""
142
+ cdef:
143
+ shared_ptr[WriteOptions] options
144
+ options = make_shared[WriteOptions]()
145
+ # batch_size
146
+ if batch_size is not None:
147
+ if isinstance(batch_size, int) and batch_size > 0:
148
+ deref(options).batch_size = batch_size
149
+ else:
150
+ raise ValueError(f"Invalid ORC writer batch size: {batch_size}")
151
+ # file_version
152
+ if file_version is not None:
153
+ if file_version == "0.12":
154
+ deref(options).file_version = FileVersion(0, 12)
155
+ elif file_version == "0.11":
156
+ deref(options).file_version = FileVersion(0, 11)
157
+ else:
158
+ raise ValueError(f"Unsupported ORC file version: {file_version}")
159
+ # stripe_size
160
+ if stripe_size is not None:
161
+ if isinstance(stripe_size, int) and stripe_size > 0:
162
+ deref(options).stripe_size = stripe_size
163
+ else:
164
+ raise ValueError(f"Invalid ORC stripe size: {stripe_size}")
165
+ # compression
166
+ if compression is not None:
167
+ if isinstance(compression, str):
168
+ deref(options).compression = compression_type_from_name(
169
+ compression)
170
+ else:
171
+ raise TypeError("Unsupported ORC compression type: "
172
+ f"{compression}")
173
+ # compression_block_size
174
+ if compression_block_size is not None:
175
+ if (isinstance(compression_block_size, int) and
176
+ compression_block_size > 0):
177
+ deref(options).compression_block_size = compression_block_size
178
+ else:
179
+ raise ValueError("Invalid ORC compression block size: "
180
+ f"{compression_block_size}")
181
+ # compression_strategy
182
+ if compression_strategy is not None:
183
+ if isinstance(compression, str):
184
+ deref(options).compression_strategy = \
185
+ compression_strategy_from_name(compression_strategy)
186
+ else:
187
+ raise TypeError("Unsupported ORC compression strategy: "
188
+ f"{compression_strategy}")
189
+ # row_index_stride
190
+ if row_index_stride is not None:
191
+ if isinstance(row_index_stride, int) and row_index_stride > 0:
192
+ deref(options).row_index_stride = row_index_stride
193
+ else:
194
+ raise ValueError("Invalid ORC row index stride: "
195
+ f"{row_index_stride}")
196
+ # padding_tolerance
197
+ if padding_tolerance is not None:
198
+ try:
199
+ padding_tolerance = float(padding_tolerance)
200
+ deref(options).padding_tolerance = padding_tolerance
201
+ except Exception:
202
+ raise ValueError("Invalid ORC padding tolerance: "
203
+ f"{padding_tolerance}")
204
+ # dictionary_key_size_threshold
205
+ if dictionary_key_size_threshold is not None:
206
+ try:
207
+ dictionary_key_size_threshold = float(
208
+ dictionary_key_size_threshold)
209
+ assert 0 <= dictionary_key_size_threshold <= 1
210
+ deref(options).dictionary_key_size_threshold = \
211
+ dictionary_key_size_threshold
212
+ except Exception:
213
+ raise ValueError("Invalid ORC dictionary key size threshold: "
214
+ f"{dictionary_key_size_threshold}")
215
+ # bloom_filter_columns
216
+ if bloom_filter_columns is not None:
217
+ try:
218
+ bloom_filter_columns = list(bloom_filter_columns)
219
+ for col in bloom_filter_columns:
220
+ assert isinstance(col, int) and col >= 0
221
+ deref(options).bloom_filter_columns = bloom_filter_columns
222
+ except Exception:
223
+ raise ValueError("Invalid ORC BloomFilter columns: "
224
+ f"{bloom_filter_columns}")
225
+ # Max false positive rate of the Bloom Filter
226
+ if bloom_filter_fpp is not None:
227
+ try:
228
+ bloom_filter_fpp = float(bloom_filter_fpp)
229
+ assert 0 <= bloom_filter_fpp <= 1
230
+ deref(options).bloom_filter_fpp = bloom_filter_fpp
231
+ except Exception:
232
+ raise ValueError("Invalid ORC BloomFilter false positive rate: "
233
+ f"{bloom_filter_fpp}")
234
+ return options
235
+
236
+
237
+ cdef class ORCReader(_Weakrefable):
238
+ cdef:
239
+ object source
240
+ CMemoryPool* allocator
241
+ unique_ptr[ORCFileReader] reader
242
+
243
+ def __cinit__(self, MemoryPool memory_pool=None):
244
+ self.allocator = maybe_unbox_memory_pool(memory_pool)
245
+
246
+ def open(self, object source, c_bool use_memory_map=True):
247
+ cdef:
248
+ shared_ptr[CRandomAccessFile] rd_handle
249
+
250
+ self.source = source
251
+
252
+ get_reader(source, use_memory_map, &rd_handle)
253
+ with nogil:
254
+ self.reader = move(GetResultValue(
255
+ ORCFileReader.Open(rd_handle, self.allocator)
256
+ ))
257
+
258
+ def metadata(self):
259
+ """
260
+ The arrow metadata for this file.
261
+
262
+ Returns
263
+ -------
264
+ metadata : pyarrow.KeyValueMetadata
265
+ """
266
+ cdef:
267
+ shared_ptr[const CKeyValueMetadata] sp_arrow_metadata
268
+
269
+ with nogil:
270
+ sp_arrow_metadata = GetResultValue(
271
+ deref(self.reader).ReadMetadata()
272
+ )
273
+
274
+ return pyarrow_wrap_metadata(sp_arrow_metadata)
275
+
276
+ def schema(self):
277
+ """
278
+ The arrow schema for this file.
279
+
280
+ Returns
281
+ -------
282
+ schema : pyarrow.Schema
283
+ """
284
+ cdef:
285
+ shared_ptr[CSchema] sp_arrow_schema
286
+
287
+ with nogil:
288
+ sp_arrow_schema = GetResultValue(deref(self.reader).ReadSchema())
289
+
290
+ return pyarrow_wrap_schema(sp_arrow_schema)
291
+
292
+ def nrows(self):
293
+ return deref(self.reader).NumberOfRows()
294
+
295
+ def nstripes(self):
296
+ return deref(self.reader).NumberOfStripes()
297
+
298
+ def file_version(self):
299
+ return file_version_from_class(deref(self.reader).GetFileVersion())
300
+
301
+ def software_version(self):
302
+ return frombytes(deref(self.reader).GetSoftwareVersion())
303
+
304
+ def compression(self):
305
+ return compression_type_from_enum(
306
+ GetResultValue(deref(self.reader).GetCompression()))
307
+
308
+ def compression_size(self):
309
+ return deref(self.reader).GetCompressionSize()
310
+
311
+ def row_index_stride(self):
312
+ return deref(self.reader).GetRowIndexStride()
313
+
314
+ def writer(self):
315
+ writer_name = writer_id_from_enum(deref(self.reader).GetWriterId())
316
+ if writer_name == 'UNKNOWN':
317
+ return deref(self.reader).GetWriterIdValue()
318
+ else:
319
+ return writer_name
320
+
321
+ def writer_version(self):
322
+ return writer_version_from_enum(deref(self.reader).GetWriterVersion())
323
+
324
+ def nstripe_statistics(self):
325
+ return deref(self.reader).GetNumberOfStripeStatistics()
326
+
327
+ def content_length(self):
328
+ return deref(self.reader).GetContentLength()
329
+
330
+ def stripe_statistics_length(self):
331
+ return deref(self.reader).GetStripeStatisticsLength()
332
+
333
+ def file_footer_length(self):
334
+ return deref(self.reader).GetFileFooterLength()
335
+
336
+ def file_postscript_length(self):
337
+ return deref(self.reader).GetFilePostscriptLength()
338
+
339
+ def file_length(self):
340
+ return deref(self.reader).GetFileLength()
341
+
342
+ def serialized_file_tail(self):
343
+ return deref(self.reader).GetSerializedFileTail()
344
+
345
+ def read_stripe(self, n, columns=None):
346
+ cdef:
347
+ shared_ptr[CRecordBatch] sp_record_batch
348
+ int64_t stripe
349
+ std_vector[c_string] c_names
350
+
351
+ stripe = n
352
+
353
+ if columns is None:
354
+ with nogil:
355
+ sp_record_batch = GetResultValue(
356
+ deref(self.reader).ReadStripe(stripe)
357
+ )
358
+ else:
359
+ c_names = [tobytes(name) for name in columns]
360
+ with nogil:
361
+ sp_record_batch = GetResultValue(
362
+ deref(self.reader).ReadStripe(stripe, c_names)
363
+ )
364
+
365
+ return pyarrow_wrap_batch(sp_record_batch)
366
+
367
+ def read(self, columns=None):
368
+ cdef:
369
+ shared_ptr[CTable] sp_table
370
+ std_vector[c_string] c_names
371
+
372
+ if columns is None:
373
+ with nogil:
374
+ sp_table = GetResultValue(deref(self.reader).Read())
375
+ else:
376
+ c_names = [tobytes(name) for name in columns]
377
+ with nogil:
378
+ sp_table = GetResultValue(deref(self.reader).Read(c_names))
379
+
380
+ return pyarrow_wrap_table(sp_table)
381
+
382
+
383
+ cdef class ORCWriter(_Weakrefable):
384
+ cdef:
385
+ unique_ptr[ORCFileWriter] writer
386
+ shared_ptr[COutputStream] sink
387
+ c_bool own_sink
388
+
389
+ def open(self, object where, *,
390
+ file_version=None,
391
+ batch_size=None,
392
+ stripe_size=None,
393
+ compression=None,
394
+ compression_block_size=None,
395
+ compression_strategy=None,
396
+ row_index_stride=None,
397
+ padding_tolerance=None,
398
+ dictionary_key_size_threshold=None,
399
+ bloom_filter_columns=None,
400
+ bloom_filter_fpp=None):
401
+ cdef:
402
+ shared_ptr[WriteOptions] write_options
403
+ c_string c_where
404
+ try:
405
+ where = _stringify_path(where)
406
+ except TypeError:
407
+ get_writer(where, &self.sink)
408
+ self.own_sink = False
409
+ else:
410
+ c_where = tobytes(where)
411
+ with nogil:
412
+ self.sink = GetResultValue(FileOutputStream.Open(c_where))
413
+ self.own_sink = True
414
+
415
+ write_options = _create_write_options(
416
+ file_version=file_version,
417
+ batch_size=batch_size,
418
+ stripe_size=stripe_size,
419
+ compression=compression,
420
+ compression_block_size=compression_block_size,
421
+ compression_strategy=compression_strategy,
422
+ row_index_stride=row_index_stride,
423
+ padding_tolerance=padding_tolerance,
424
+ dictionary_key_size_threshold=dictionary_key_size_threshold,
425
+ bloom_filter_columns=bloom_filter_columns,
426
+ bloom_filter_fpp=bloom_filter_fpp
427
+ )
428
+
429
+ with nogil:
430
+ self.writer = move(GetResultValue(
431
+ ORCFileWriter.Open(self.sink.get(),
432
+ deref(write_options))))
433
+
434
+ def write(self, Table table):
435
+ cdef:
436
+ shared_ptr[CTable] sp_table
437
+ sp_table = pyarrow_unwrap_table(table)
438
+ with nogil:
439
+ check_status(deref(self.writer).Write(deref(sp_table)))
440
+
441
+ def close(self):
442
+ with nogil:
443
+ check_status(deref(self.writer).Close())
444
+ if self.own_sink:
445
+ check_status(deref(self.sink).Close())
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_parquet.pyx ADDED
@@ -0,0 +1,2266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # cython: profile=False
19
+ # distutils: language = c++
20
+
21
+ from collections.abc import Sequence
22
+ from textwrap import indent
23
+ import warnings
24
+
25
+ from cython.operator cimport dereference as deref
26
+ from pyarrow.includes.common cimport *
27
+ from pyarrow.includes.libarrow cimport *
28
+ from pyarrow.includes.libarrow_python cimport *
29
+ from pyarrow.lib cimport (_Weakrefable, Buffer, Schema,
30
+ check_status,
31
+ MemoryPool, maybe_unbox_memory_pool,
32
+ Table, KeyValueMetadata,
33
+ pyarrow_wrap_chunked_array,
34
+ pyarrow_wrap_schema,
35
+ pyarrow_unwrap_metadata,
36
+ pyarrow_unwrap_schema,
37
+ pyarrow_wrap_table,
38
+ pyarrow_wrap_batch,
39
+ pyarrow_wrap_scalar,
40
+ NativeFile, get_reader, get_writer,
41
+ string_to_timeunit)
42
+
43
+ from pyarrow.lib import (ArrowException, NativeFile, BufferOutputStream,
44
+ _stringify_path,
45
+ tobytes, frombytes, is_threading_enabled)
46
+
47
+ cimport cpython as cp
48
+
49
+ _DEFAULT_ROW_GROUP_SIZE = 1024*1024
50
+ _MAX_ROW_GROUP_SIZE = 64*1024*1024
51
+
52
+ cdef class Statistics(_Weakrefable):
53
+ """Statistics for a single column in a single row group."""
54
+
55
+ def __cinit__(self):
56
+ pass
57
+
58
+ def __repr__(self):
59
+ return """{}
60
+ has_min_max: {}
61
+ min: {}
62
+ max: {}
63
+ null_count: {}
64
+ distinct_count: {}
65
+ num_values: {}
66
+ physical_type: {}
67
+ logical_type: {}
68
+ converted_type (legacy): {}""".format(object.__repr__(self),
69
+ self.has_min_max,
70
+ self.min,
71
+ self.max,
72
+ self.null_count,
73
+ self.distinct_count,
74
+ self.num_values,
75
+ self.physical_type,
76
+ str(self.logical_type),
77
+ self.converted_type)
78
+
79
+ def to_dict(self):
80
+ """
81
+ Get dictionary representation of statistics.
82
+
83
+ Returns
84
+ -------
85
+ dict
86
+ Dictionary with a key for each attribute of this class.
87
+ """
88
+ d = dict(
89
+ has_min_max=self.has_min_max,
90
+ min=self.min,
91
+ max=self.max,
92
+ null_count=self.null_count,
93
+ distinct_count=self.distinct_count,
94
+ num_values=self.num_values,
95
+ physical_type=self.physical_type
96
+ )
97
+ return d
98
+
99
+ def __eq__(self, other):
100
+ try:
101
+ return self.equals(other)
102
+ except TypeError:
103
+ return NotImplemented
104
+
105
+ def equals(self, Statistics other):
106
+ """
107
+ Return whether the two column statistics objects are equal.
108
+
109
+ Parameters
110
+ ----------
111
+ other : Statistics
112
+ Statistics to compare against.
113
+
114
+ Returns
115
+ -------
116
+ are_equal : bool
117
+ """
118
+ return self.statistics.get().Equals(deref(other.statistics.get()))
119
+
120
+ @property
121
+ def has_min_max(self):
122
+ """Whether min and max are present (bool)."""
123
+ return self.statistics.get().HasMinMax()
124
+
125
+ @property
126
+ def has_null_count(self):
127
+ """Whether null count is present (bool)."""
128
+ return self.statistics.get().HasNullCount()
129
+
130
+ @property
131
+ def has_distinct_count(self):
132
+ """Whether distinct count is preset (bool)."""
133
+ return self.statistics.get().HasDistinctCount()
134
+
135
+ @property
136
+ def min_raw(self):
137
+ """Min value as physical type (bool, int, float, or bytes)."""
138
+ if self.has_min_max:
139
+ return _cast_statistic_raw_min(self.statistics.get())
140
+ else:
141
+ return None
142
+
143
+ @property
144
+ def max_raw(self):
145
+ """Max value as physical type (bool, int, float, or bytes)."""
146
+ if self.has_min_max:
147
+ return _cast_statistic_raw_max(self.statistics.get())
148
+ else:
149
+ return None
150
+
151
+ @property
152
+ def min(self):
153
+ """
154
+ Min value as logical type.
155
+
156
+ Returned as the Python equivalent of logical type, such as datetime.date
157
+ for dates and decimal.Decimal for decimals.
158
+ """
159
+ if self.has_min_max:
160
+ min_scalar, _ = _cast_statistics(self.statistics.get())
161
+ return min_scalar.as_py()
162
+ else:
163
+ return None
164
+
165
+ @property
166
+ def max(self):
167
+ """
168
+ Max value as logical type.
169
+
170
+ Returned as the Python equivalent of logical type, such as datetime.date
171
+ for dates and decimal.Decimal for decimals.
172
+ """
173
+ if self.has_min_max:
174
+ _, max_scalar = _cast_statistics(self.statistics.get())
175
+ return max_scalar.as_py()
176
+ else:
177
+ return None
178
+
179
+ @property
180
+ def null_count(self):
181
+ """Number of null values in chunk (int)."""
182
+ if self.has_null_count:
183
+ return self.statistics.get().null_count()
184
+ else:
185
+ return None
186
+
187
+ @property
188
+ def distinct_count(self):
189
+ """Distinct number of values in chunk (int)."""
190
+ if self.has_distinct_count:
191
+ return self.statistics.get().distinct_count()
192
+ else:
193
+ return None
194
+
195
+ @property
196
+ def num_values(self):
197
+ """Number of non-null values (int)."""
198
+ return self.statistics.get().num_values()
199
+
200
+ @property
201
+ def physical_type(self):
202
+ """Physical type of column (str)."""
203
+ raw_physical_type = self.statistics.get().physical_type()
204
+ return physical_type_name_from_enum(raw_physical_type)
205
+
206
+ @property
207
+ def logical_type(self):
208
+ """Logical type of column (:class:`ParquetLogicalType`)."""
209
+ return wrap_logical_type(self.statistics.get().descr().logical_type())
210
+
211
+ @property
212
+ def converted_type(self):
213
+ """Legacy converted type (str or None)."""
214
+ raw_converted_type = self.statistics.get().descr().converted_type()
215
+ return converted_type_name_from_enum(raw_converted_type)
216
+
217
+
218
+ cdef class ParquetLogicalType(_Weakrefable):
219
+ """Logical type of parquet type."""
220
+ cdef:
221
+ shared_ptr[const CParquetLogicalType] type
222
+
223
+ def __cinit__(self):
224
+ pass
225
+
226
+ cdef init(self, const shared_ptr[const CParquetLogicalType]& type):
227
+ self.type = type
228
+
229
+ def __repr__(self):
230
+ return "{}\n {}".format(object.__repr__(self), str(self))
231
+
232
+ def __str__(self):
233
+ return frombytes(self.type.get().ToString(), safe=True)
234
+
235
+ def to_json(self):
236
+ """
237
+ Get a JSON string containing type and type parameters.
238
+
239
+ Returns
240
+ -------
241
+ json : str
242
+ JSON representation of type, with at least a field called 'Type'
243
+ which contains the type name. If the type is parameterized, such
244
+ as a decimal with scale and precision, will contain those as fields
245
+ as well.
246
+ """
247
+ return frombytes(self.type.get().ToJSON())
248
+
249
+ @property
250
+ def type(self):
251
+ """Name of the logical type (str)."""
252
+ return logical_type_name_from_enum(self.type.get().type())
253
+
254
+
255
+ cdef wrap_logical_type(const shared_ptr[const CParquetLogicalType]& type):
256
+ cdef ParquetLogicalType out = ParquetLogicalType()
257
+ out.init(type)
258
+ return out
259
+
260
+
261
+ cdef _cast_statistic_raw_min(CStatistics* statistics):
262
+ cdef ParquetType physical_type = statistics.physical_type()
263
+ cdef uint32_t type_length = statistics.descr().type_length()
264
+ if physical_type == ParquetType_BOOLEAN:
265
+ return (<CBoolStatistics*> statistics).min()
266
+ elif physical_type == ParquetType_INT32:
267
+ return (<CInt32Statistics*> statistics).min()
268
+ elif physical_type == ParquetType_INT64:
269
+ return (<CInt64Statistics*> statistics).min()
270
+ elif physical_type == ParquetType_FLOAT:
271
+ return (<CFloatStatistics*> statistics).min()
272
+ elif physical_type == ParquetType_DOUBLE:
273
+ return (<CDoubleStatistics*> statistics).min()
274
+ elif physical_type == ParquetType_BYTE_ARRAY:
275
+ return _box_byte_array((<CByteArrayStatistics*> statistics).min())
276
+ elif physical_type == ParquetType_FIXED_LEN_BYTE_ARRAY:
277
+ return _box_flba((<CFLBAStatistics*> statistics).min(), type_length)
278
+
279
+
280
+ cdef _cast_statistic_raw_max(CStatistics* statistics):
281
+ cdef ParquetType physical_type = statistics.physical_type()
282
+ cdef uint32_t type_length = statistics.descr().type_length()
283
+ if physical_type == ParquetType_BOOLEAN:
284
+ return (<CBoolStatistics*> statistics).max()
285
+ elif physical_type == ParquetType_INT32:
286
+ return (<CInt32Statistics*> statistics).max()
287
+ elif physical_type == ParquetType_INT64:
288
+ return (<CInt64Statistics*> statistics).max()
289
+ elif physical_type == ParquetType_FLOAT:
290
+ return (<CFloatStatistics*> statistics).max()
291
+ elif physical_type == ParquetType_DOUBLE:
292
+ return (<CDoubleStatistics*> statistics).max()
293
+ elif physical_type == ParquetType_BYTE_ARRAY:
294
+ return _box_byte_array((<CByteArrayStatistics*> statistics).max())
295
+ elif physical_type == ParquetType_FIXED_LEN_BYTE_ARRAY:
296
+ return _box_flba((<CFLBAStatistics*> statistics).max(), type_length)
297
+
298
+
299
+ cdef _cast_statistics(CStatistics* statistics):
300
+ cdef:
301
+ shared_ptr[CScalar] c_min
302
+ shared_ptr[CScalar] c_max
303
+ check_status(StatisticsAsScalars(statistics[0], &c_min, &c_max))
304
+ return (pyarrow_wrap_scalar(c_min), pyarrow_wrap_scalar(c_max))
305
+
306
+
307
+ cdef _box_byte_array(ParquetByteArray val):
308
+ return cp.PyBytes_FromStringAndSize(<char*> val.ptr, <Py_ssize_t> val.len)
309
+
310
+
311
+ cdef _box_flba(ParquetFLBA val, uint32_t len):
312
+ return cp.PyBytes_FromStringAndSize(<char*> val.ptr, <Py_ssize_t> len)
313
+
314
+
315
+ cdef class ColumnChunkMetaData(_Weakrefable):
316
+ """Column metadata for a single row group."""
317
+
318
+ def __cinit__(self):
319
+ pass
320
+
321
+ def __repr__(self):
322
+ statistics = indent(repr(self.statistics), 4 * ' ')
323
+ return """{0}
324
+ file_offset: {1}
325
+ file_path: {2}
326
+ physical_type: {3}
327
+ num_values: {4}
328
+ path_in_schema: {5}
329
+ is_stats_set: {6}
330
+ statistics:
331
+ {7}
332
+ compression: {8}
333
+ encodings: {9}
334
+ has_dictionary_page: {10}
335
+ dictionary_page_offset: {11}
336
+ data_page_offset: {12}
337
+ total_compressed_size: {13}
338
+ total_uncompressed_size: {14}""".format(object.__repr__(self),
339
+ self.file_offset,
340
+ self.file_path,
341
+ self.physical_type,
342
+ self.num_values,
343
+ self.path_in_schema,
344
+ self.is_stats_set,
345
+ statistics,
346
+ self.compression,
347
+ self.encodings,
348
+ self.has_dictionary_page,
349
+ self.dictionary_page_offset,
350
+ self.data_page_offset,
351
+ self.total_compressed_size,
352
+ self.total_uncompressed_size)
353
+
354
+ def to_dict(self):
355
+ """
356
+ Get dictionary representation of the column chunk metadata.
357
+
358
+ Returns
359
+ -------
360
+ dict
361
+ Dictionary with a key for each attribute of this class.
362
+ """
363
+ statistics = self.statistics.to_dict() if self.is_stats_set else None
364
+ d = dict(
365
+ file_offset=self.file_offset,
366
+ file_path=self.file_path,
367
+ physical_type=self.physical_type,
368
+ num_values=self.num_values,
369
+ path_in_schema=self.path_in_schema,
370
+ is_stats_set=self.is_stats_set,
371
+ statistics=statistics,
372
+ compression=self.compression,
373
+ encodings=self.encodings,
374
+ has_dictionary_page=self.has_dictionary_page,
375
+ dictionary_page_offset=self.dictionary_page_offset,
376
+ data_page_offset=self.data_page_offset,
377
+ total_compressed_size=self.total_compressed_size,
378
+ total_uncompressed_size=self.total_uncompressed_size
379
+ )
380
+ return d
381
+
382
+ def __eq__(self, other):
383
+ try:
384
+ return self.equals(other)
385
+ except TypeError:
386
+ return NotImplemented
387
+
388
+ def equals(self, ColumnChunkMetaData other):
389
+ """
390
+ Return whether the two column chunk metadata objects are equal.
391
+
392
+ Parameters
393
+ ----------
394
+ other : ColumnChunkMetaData
395
+ Metadata to compare against.
396
+
397
+ Returns
398
+ -------
399
+ are_equal : bool
400
+ """
401
+ return self.metadata.Equals(deref(other.metadata))
402
+
403
+ @property
404
+ def file_offset(self):
405
+ """Offset into file where column chunk is located (int)."""
406
+ return self.metadata.file_offset()
407
+
408
+ @property
409
+ def file_path(self):
410
+ """Optional file path if set (str or None)."""
411
+ return frombytes(self.metadata.file_path())
412
+
413
+ @property
414
+ def physical_type(self):
415
+ """Physical type of column (str)."""
416
+ return physical_type_name_from_enum(self.metadata.type())
417
+
418
+ @property
419
+ def num_values(self):
420
+ """Total number of values (int)."""
421
+ return self.metadata.num_values()
422
+
423
+ @property
424
+ def path_in_schema(self):
425
+ """Nested path to field, separated by periods (str)."""
426
+ path = self.metadata.path_in_schema().get().ToDotString()
427
+ return frombytes(path)
428
+
429
+ @property
430
+ def is_stats_set(self):
431
+ """Whether or not statistics are present in metadata (bool)."""
432
+ return self.metadata.is_stats_set()
433
+
434
+ @property
435
+ def statistics(self):
436
+ """Statistics for column chunk (:class:`Statistics`)."""
437
+ if not self.metadata.is_stats_set():
438
+ return None
439
+ statistics = Statistics()
440
+ statistics.init(self.metadata.statistics(), self)
441
+ return statistics
442
+
443
+ @property
444
+ def compression(self):
445
+ """
446
+ Type of compression used for column (str).
447
+
448
+ One of 'UNCOMPRESSED', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4', 'ZSTD',
449
+ or 'UNKNOWN'.
450
+ """
451
+ return compression_name_from_enum(self.metadata.compression())
452
+
453
+ @property
454
+ def encodings(self):
455
+ """
456
+ Encodings used for column (tuple of str).
457
+
458
+ One of 'PLAIN', 'BIT_PACKED', 'RLE', 'BYTE_STREAM_SPLIT', 'DELTA_BINARY_PACKED',
459
+ 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'.
460
+ """
461
+ return tuple(map(encoding_name_from_enum, self.metadata.encodings()))
462
+
463
+ @property
464
+ def has_dictionary_page(self):
465
+ """Whether there is dictionary data present in the column chunk (bool)."""
466
+ return bool(self.metadata.has_dictionary_page())
467
+
468
+ @property
469
+ def dictionary_page_offset(self):
470
+ """Offset of dictionary page relative to beginning of the file (int)."""
471
+ if self.has_dictionary_page:
472
+ return self.metadata.dictionary_page_offset()
473
+ else:
474
+ return None
475
+
476
+ @property
477
+ def data_page_offset(self):
478
+ """Offset of data page relative to beginning of the file (int)."""
479
+ return self.metadata.data_page_offset()
480
+
481
+ @property
482
+ def has_index_page(self):
483
+ """Not yet supported."""
484
+ raise NotImplementedError('not supported in parquet-cpp')
485
+
486
+ @property
487
+ def index_page_offset(self):
488
+ """Not yet supported."""
489
+ raise NotImplementedError("parquet-cpp doesn't return valid values")
490
+
491
+ @property
492
+ def total_compressed_size(self):
493
+ """Compressed size in bytes (int)."""
494
+ return self.metadata.total_compressed_size()
495
+
496
+ @property
497
+ def total_uncompressed_size(self):
498
+ """Uncompressed size in bytes (int)."""
499
+ return self.metadata.total_uncompressed_size()
500
+
501
+ @property
502
+ def has_offset_index(self):
503
+ """Whether the column chunk has an offset index"""
504
+ return self.metadata.GetOffsetIndexLocation().has_value()
505
+
506
+ @property
507
+ def has_column_index(self):
508
+ """Whether the column chunk has a column index"""
509
+ return self.metadata.GetColumnIndexLocation().has_value()
510
+
511
+ @property
512
+ def metadata(self):
513
+ """Additional metadata as key value pairs (dict[bytes, bytes])."""
514
+ cdef:
515
+ unordered_map[c_string, c_string] metadata
516
+ const CKeyValueMetadata* underlying_metadata
517
+ underlying_metadata = self.metadata.key_value_metadata().get()
518
+ if underlying_metadata != NULL:
519
+ underlying_metadata.ToUnorderedMap(&metadata)
520
+ return metadata
521
+ else:
522
+ return None
523
+
524
+
525
+ cdef class SortingColumn:
526
+ """
527
+ Sorting specification for a single column.
528
+
529
+ Returned by :meth:`RowGroupMetaData.sorting_columns` and used in
530
+ :class:`ParquetWriter` to specify the sort order of the data.
531
+
532
+ Parameters
533
+ ----------
534
+ column_index : int
535
+ Index of column that data is sorted by.
536
+ descending : bool, default False
537
+ Whether column is sorted in descending order.
538
+ nulls_first : bool, default False
539
+ Whether null values appear before valid values.
540
+
541
+ Notes
542
+ -----
543
+
544
+ Column indices are zero-based, refer only to leaf fields, and are in
545
+ depth-first order. This may make the column indices for nested schemas
546
+ different from what you expect. In most cases, it will be easier to
547
+ specify the sort order using column names instead of column indices
548
+ and converting using the ``from_ordering`` method.
549
+
550
+ Examples
551
+ --------
552
+
553
+ In other APIs, sort order is specified by names, such as:
554
+
555
+ >>> sort_order = [('id', 'ascending'), ('timestamp', 'descending')]
556
+
557
+ For Parquet, the column index must be used instead:
558
+
559
+ >>> import pyarrow.parquet as pq
560
+ >>> [pq.SortingColumn(0), pq.SortingColumn(1, descending=True)]
561
+ [SortingColumn(column_index=0, descending=False, nulls_first=False), SortingColumn(column_index=1, descending=True, nulls_first=False)]
562
+
563
+ Convert the sort_order into the list of sorting columns with
564
+ ``from_ordering`` (note that the schema must be provided as well):
565
+
566
+ >>> import pyarrow as pa
567
+ >>> schema = pa.schema([('id', pa.int64()), ('timestamp', pa.timestamp('ms'))])
568
+ >>> sorting_columns = pq.SortingColumn.from_ordering(schema, sort_order)
569
+ >>> sorting_columns
570
+ (SortingColumn(column_index=0, descending=False, nulls_first=False), SortingColumn(column_index=1, descending=True, nulls_first=False))
571
+
572
+ Convert back to the sort order with ``to_ordering``:
573
+
574
+ >>> pq.SortingColumn.to_ordering(schema, sorting_columns)
575
+ ((('id', 'ascending'), ('timestamp', 'descending')), 'at_end')
576
+
577
+ See Also
578
+ --------
579
+ RowGroupMetaData.sorting_columns
580
+ """
581
+ cdef int column_index
582
+ cdef c_bool descending
583
+ cdef c_bool nulls_first
584
+
585
+ def __init__(self, int column_index, c_bool descending=False, c_bool nulls_first=False):
586
+ self.column_index = column_index
587
+ self.descending = descending
588
+ self.nulls_first = nulls_first
589
+
590
+ @classmethod
591
+ def from_ordering(cls, Schema schema, sort_keys, null_placement='at_end'):
592
+ """
593
+ Create a tuple of SortingColumn objects from the same arguments as
594
+ :class:`pyarrow.compute.SortOptions`.
595
+
596
+ Parameters
597
+ ----------
598
+ schema : Schema
599
+ Schema of the input data.
600
+ sort_keys : Sequence of (name, order) tuples
601
+ Names of field/column keys (str) to sort the input on,
602
+ along with the order each field/column is sorted in.
603
+ Accepted values for `order` are "ascending", "descending".
604
+ null_placement : {'at_start', 'at_end'}, default 'at_end'
605
+ Where null values should appear in the sort order.
606
+
607
+ Returns
608
+ -------
609
+ sorting_columns : tuple of SortingColumn
610
+ """
611
+ if null_placement == 'at_start':
612
+ nulls_first = True
613
+ elif null_placement == 'at_end':
614
+ nulls_first = False
615
+ else:
616
+ raise ValueError('null_placement must be "at_start" or "at_end"')
617
+
618
+ col_map = _name_to_index_map(schema)
619
+
620
+ sorting_columns = []
621
+
622
+ for sort_key in sort_keys:
623
+ if isinstance(sort_key, str):
624
+ name = sort_key
625
+ descending = False
626
+ elif (isinstance(sort_key, tuple) and len(sort_key) == 2 and
627
+ isinstance(sort_key[0], str) and
628
+ isinstance(sort_key[1], str)):
629
+ name, descending = sort_key
630
+ if descending == "descending":
631
+ descending = True
632
+ elif descending == "ascending":
633
+ descending = False
634
+ else:
635
+ raise ValueError("Invalid sort key direction: {0}"
636
+ .format(descending))
637
+ else:
638
+ raise ValueError("Invalid sort key: {0}".format(sort_key))
639
+
640
+ try:
641
+ column_index = col_map[name]
642
+ except KeyError:
643
+ raise ValueError("Sort key name '{0}' not found in schema:\n{1}"
644
+ .format(name, schema))
645
+
646
+ sorting_columns.append(
647
+ cls(column_index, descending=descending, nulls_first=nulls_first)
648
+ )
649
+
650
+ return tuple(sorting_columns)
651
+
652
+ @staticmethod
653
+ def to_ordering(Schema schema, sorting_columns):
654
+ """
655
+ Convert a tuple of SortingColumn objects to the same format as
656
+ :class:`pyarrow.compute.SortOptions`.
657
+
658
+ Parameters
659
+ ----------
660
+ schema : Schema
661
+ Schema of the input data.
662
+ sorting_columns : tuple of SortingColumn
663
+ Columns to sort the input on.
664
+
665
+ Returns
666
+ -------
667
+ sort_keys : tuple of (name, order) tuples
668
+ null_placement : {'at_start', 'at_end'}
669
+ """
670
+ col_map = {i: name for name, i in _name_to_index_map(schema).items()}
671
+
672
+ sort_keys = []
673
+ nulls_first = None
674
+
675
+ for sorting_column in sorting_columns:
676
+ name = col_map[sorting_column.column_index]
677
+ if sorting_column.descending:
678
+ order = "descending"
679
+ else:
680
+ order = "ascending"
681
+ sort_keys.append((name, order))
682
+ if nulls_first is None:
683
+ nulls_first = sorting_column.nulls_first
684
+ elif nulls_first != sorting_column.nulls_first:
685
+ raise ValueError("Sorting columns have inconsistent null placement")
686
+
687
+ if nulls_first:
688
+ null_placement = "at_start"
689
+ else:
690
+ null_placement = "at_end"
691
+
692
+ return tuple(sort_keys), null_placement
693
+
694
+ def __repr__(self):
695
+ return """{}(column_index={}, descending={}, nulls_first={})""".format(
696
+ self.__class__.__name__,
697
+ self.column_index, self.descending, self.nulls_first)
698
+
699
+ def __eq__(self, SortingColumn other):
700
+ return (self.column_index == other.column_index and
701
+ self.descending == other.descending and
702
+ self.nulls_first == other.nulls_first)
703
+
704
+ def __hash__(self):
705
+ return hash((self.column_index, self.descending, self.nulls_first))
706
+
707
+ @property
708
+ def column_index(self):
709
+ """"Index of column data is sorted by (int)."""
710
+ return self.column_index
711
+
712
+ @property
713
+ def descending(self):
714
+ """Whether column is sorted in descending order (bool)."""
715
+ return self.descending
716
+
717
+ @property
718
+ def nulls_first(self):
719
+ """Whether null values appear before valid values (bool)."""
720
+ return self.nulls_first
721
+
722
+ def to_dict(self):
723
+ """
724
+ Get dictionary representation of the SortingColumn.
725
+
726
+ Returns
727
+ -------
728
+ dict
729
+ Dictionary with a key for each attribute of this class.
730
+ """
731
+ d = dict(
732
+ column_index=self.column_index,
733
+ descending=self.descending,
734
+ nulls_first=self.nulls_first
735
+ )
736
+ return d
737
+
738
+
739
+ cdef class RowGroupMetaData(_Weakrefable):
740
+ """Metadata for a single row group."""
741
+
742
+ def __cinit__(self, FileMetaData parent, int index):
743
+ if index < 0 or index >= parent.num_row_groups:
744
+ raise IndexError('{0} out of bounds'.format(index))
745
+ self.up_metadata = parent._metadata.RowGroup(index)
746
+ self.metadata = self.up_metadata.get()
747
+ self.parent = parent
748
+ self.index = index
749
+
750
+ def __reduce__(self):
751
+ return RowGroupMetaData, (self.parent, self.index)
752
+
753
+ def __eq__(self, other):
754
+ try:
755
+ return self.equals(other)
756
+ except TypeError:
757
+ return NotImplemented
758
+
759
+ def equals(self, RowGroupMetaData other):
760
+ """
761
+ Return whether the two row group metadata objects are equal.
762
+
763
+ Parameters
764
+ ----------
765
+ other : RowGroupMetaData
766
+ Metadata to compare against.
767
+
768
+ Returns
769
+ -------
770
+ are_equal : bool
771
+ """
772
+ return self.metadata.Equals(deref(other.metadata))
773
+
774
+ def column(self, int i):
775
+ """
776
+ Get column metadata at given index.
777
+
778
+ Parameters
779
+ ----------
780
+ i : int
781
+ Index of column to get metadata for.
782
+
783
+ Returns
784
+ -------
785
+ ColumnChunkMetaData
786
+ Metadata for column within this chunk.
787
+ """
788
+ if i < 0 or i >= self.num_columns:
789
+ raise IndexError('{0} out of bounds'.format(i))
790
+ chunk = ColumnChunkMetaData()
791
+ chunk.init(self, i)
792
+ return chunk
793
+
794
+ def __repr__(self):
795
+ return """{0}
796
+ num_columns: {1}
797
+ num_rows: {2}
798
+ total_byte_size: {3}
799
+ sorting_columns: {4}""".format(object.__repr__(self),
800
+ self.num_columns,
801
+ self.num_rows,
802
+ self.total_byte_size,
803
+ self.sorting_columns)
804
+
805
+ def to_dict(self):
806
+ """
807
+ Get dictionary representation of the row group metadata.
808
+
809
+ Returns
810
+ -------
811
+ dict
812
+ Dictionary with a key for each attribute of this class.
813
+ """
814
+ columns = []
815
+ d = dict(
816
+ num_columns=self.num_columns,
817
+ num_rows=self.num_rows,
818
+ total_byte_size=self.total_byte_size,
819
+ columns=columns,
820
+ sorting_columns=[col.to_dict() for col in self.sorting_columns]
821
+ )
822
+ for i in range(self.num_columns):
823
+ columns.append(self.column(i).to_dict())
824
+ return d
825
+
826
+ @property
827
+ def num_columns(self):
828
+ """Number of columns in this row group (int)."""
829
+ return self.metadata.num_columns()
830
+
831
+ @property
832
+ def num_rows(self):
833
+ """Number of rows in this row group (int)."""
834
+ return self.metadata.num_rows()
835
+
836
+ @property
837
+ def total_byte_size(self):
838
+ """Total byte size of all the uncompressed column data in this row group (int)."""
839
+ return self.metadata.total_byte_size()
840
+
841
+ @property
842
+ def sorting_columns(self):
843
+ """Columns the row group is sorted by (tuple of :class:`SortingColumn`))."""
844
+ out = []
845
+ cdef vector[CSortingColumn] sorting_columns = self.metadata.sorting_columns()
846
+ for sorting_col in sorting_columns:
847
+ out.append(SortingColumn(
848
+ sorting_col.column_idx,
849
+ sorting_col.descending,
850
+ sorting_col.nulls_first
851
+ ))
852
+ return tuple(out)
853
+
854
+
855
+ def _reconstruct_filemetadata(Buffer serialized):
856
+ cdef:
857
+ FileMetaData metadata = FileMetaData.__new__(FileMetaData)
858
+ CBuffer *buffer = serialized.buffer.get()
859
+ uint32_t metadata_len = <uint32_t>buffer.size()
860
+
861
+ metadata.init(CFileMetaData_Make(buffer.data(), &metadata_len))
862
+
863
+ return metadata
864
+
865
+
866
+ cdef class FileMetaData(_Weakrefable):
867
+ """Parquet metadata for a single file."""
868
+
869
+ def __cinit__(self):
870
+ pass
871
+
872
+ def __reduce__(self):
873
+ cdef:
874
+ NativeFile sink = BufferOutputStream()
875
+ COutputStream* c_sink = sink.get_output_stream().get()
876
+ with nogil:
877
+ self._metadata.WriteTo(c_sink)
878
+
879
+ cdef Buffer buffer = sink.getvalue()
880
+ return _reconstruct_filemetadata, (buffer,)
881
+
882
+ def __hash__(self):
883
+ return hash((self.schema,
884
+ self.num_rows,
885
+ self.num_row_groups,
886
+ self.format_version,
887
+ self.serialized_size))
888
+
889
+ def __repr__(self):
890
+ return """{0}
891
+ created_by: {1}
892
+ num_columns: {2}
893
+ num_rows: {3}
894
+ num_row_groups: {4}
895
+ format_version: {5}
896
+ serialized_size: {6}""".format(object.__repr__(self),
897
+ self.created_by, self.num_columns,
898
+ self.num_rows, self.num_row_groups,
899
+ self.format_version,
900
+ self.serialized_size)
901
+
902
+ def to_dict(self):
903
+ """
904
+ Get dictionary representation of the file metadata.
905
+
906
+ Returns
907
+ -------
908
+ dict
909
+ Dictionary with a key for each attribute of this class.
910
+ """
911
+ row_groups = []
912
+ d = dict(
913
+ created_by=self.created_by,
914
+ num_columns=self.num_columns,
915
+ num_rows=self.num_rows,
916
+ num_row_groups=self.num_row_groups,
917
+ row_groups=row_groups,
918
+ format_version=self.format_version,
919
+ serialized_size=self.serialized_size
920
+ )
921
+ for i in range(self.num_row_groups):
922
+ row_groups.append(self.row_group(i).to_dict())
923
+ return d
924
+
925
+ def __eq__(self, other):
926
+ try:
927
+ return self.equals(other)
928
+ except TypeError:
929
+ return NotImplemented
930
+
931
+ def equals(self, FileMetaData other not None):
932
+ """
933
+ Return whether the two file metadata objects are equal.
934
+
935
+ Parameters
936
+ ----------
937
+ other : FileMetaData
938
+ Metadata to compare against.
939
+
940
+ Returns
941
+ -------
942
+ are_equal : bool
943
+ """
944
+ return self._metadata.Equals(deref(other._metadata))
945
+
946
+ @property
947
+ def schema(self):
948
+ """Schema of the file (:class:`ParquetSchema`)."""
949
+ if self._schema is None:
950
+ self._schema = ParquetSchema(self)
951
+ return self._schema
952
+
953
+ @property
954
+ def serialized_size(self):
955
+ """Size of the original thrift encoded metadata footer (int)."""
956
+ return self._metadata.size()
957
+
958
+ @property
959
+ def num_columns(self):
960
+ """Number of columns in file (int)."""
961
+ return self._metadata.num_columns()
962
+
963
+ @property
964
+ def num_rows(self):
965
+ """Total number of rows in file (int)."""
966
+ return self._metadata.num_rows()
967
+
968
+ @property
969
+ def num_row_groups(self):
970
+ """Number of row groups in file (int)."""
971
+ return self._metadata.num_row_groups()
972
+
973
+ @property
974
+ def format_version(self):
975
+ """
976
+ Parquet format version used in file (str, such as '1.0', '2.4').
977
+
978
+ If version is missing or unparsable, will default to assuming '2.6'.
979
+ """
980
+ cdef ParquetVersion version = self._metadata.version()
981
+ if version == ParquetVersion_V1:
982
+ return '1.0'
983
+ elif version == ParquetVersion_V2_0:
984
+ return 'pseudo-2.0'
985
+ elif version == ParquetVersion_V2_4:
986
+ return '2.4'
987
+ elif version == ParquetVersion_V2_6:
988
+ return '2.6'
989
+ else:
990
+ warnings.warn('Unrecognized file version, assuming 2.6: {}'
991
+ .format(version))
992
+ return '2.6'
993
+
994
+ @property
995
+ def created_by(self):
996
+ """
997
+ String describing source of the parquet file (str).
998
+
999
+ This typically includes library name and version number. For example, Arrow 7.0's
1000
+ writer returns 'parquet-cpp-arrow version 7.0.0'.
1001
+ """
1002
+ return frombytes(self._metadata.created_by())
1003
+
1004
+ @property
1005
+ def metadata(self):
1006
+ """Additional metadata as key value pairs (dict[bytes, bytes])."""
1007
+ cdef:
1008
+ unordered_map[c_string, c_string] metadata
1009
+ const CKeyValueMetadata* underlying_metadata
1010
+ underlying_metadata = self._metadata.key_value_metadata().get()
1011
+ if underlying_metadata != NULL:
1012
+ underlying_metadata.ToUnorderedMap(&metadata)
1013
+ return metadata
1014
+ else:
1015
+ return None
1016
+
1017
+ def row_group(self, int i):
1018
+ """
1019
+ Get metadata for row group at index i.
1020
+
1021
+ Parameters
1022
+ ----------
1023
+ i : int
1024
+ Row group index to get.
1025
+
1026
+ Returns
1027
+ -------
1028
+ row_group_metadata : RowGroupMetaData
1029
+ """
1030
+ return RowGroupMetaData(self, i)
1031
+
1032
+ def set_file_path(self, path):
1033
+ """
1034
+ Set ColumnChunk file paths to the given value.
1035
+
1036
+ This method modifies the ``file_path`` field of each ColumnChunk
1037
+ in the FileMetaData to be a particular value.
1038
+
1039
+ Parameters
1040
+ ----------
1041
+ path : str
1042
+ The file path to set on all ColumnChunks.
1043
+ """
1044
+ cdef:
1045
+ c_string c_path = tobytes(path)
1046
+ self._metadata.set_file_path(c_path)
1047
+
1048
+ def append_row_groups(self, FileMetaData other):
1049
+ """
1050
+ Append row groups from other FileMetaData object.
1051
+
1052
+ Parameters
1053
+ ----------
1054
+ other : FileMetaData
1055
+ Other metadata to append row groups from.
1056
+ """
1057
+ cdef shared_ptr[CFileMetaData] c_metadata
1058
+
1059
+ c_metadata = other.sp_metadata
1060
+ self._metadata.AppendRowGroups(deref(c_metadata))
1061
+
1062
+ def write_metadata_file(self, where):
1063
+ """
1064
+ Write the metadata to a metadata-only Parquet file.
1065
+
1066
+ Parameters
1067
+ ----------
1068
+ where : path or file-like object
1069
+ Where to write the metadata. Should be a writable path on
1070
+ the local filesystem, or a writable file-like object.
1071
+ """
1072
+ cdef:
1073
+ shared_ptr[COutputStream] sink
1074
+ c_string c_where
1075
+
1076
+ try:
1077
+ where = _stringify_path(where)
1078
+ except TypeError:
1079
+ get_writer(where, &sink)
1080
+ else:
1081
+ c_where = tobytes(where)
1082
+ with nogil:
1083
+ sink = GetResultValue(FileOutputStream.Open(c_where))
1084
+
1085
+ with nogil:
1086
+ check_status(
1087
+ WriteMetaDataFile(deref(self._metadata), sink.get()))
1088
+
1089
+
1090
+ cdef class ParquetSchema(_Weakrefable):
1091
+ """A Parquet schema."""
1092
+
1093
+ def __cinit__(self, FileMetaData container):
1094
+ self.parent = container
1095
+ self.schema = container._metadata.schema()
1096
+
1097
+ def __repr__(self):
1098
+ return "{0}\n{1}".format(
1099
+ object.__repr__(self),
1100
+ frombytes(self.schema.ToString(), safe=True))
1101
+
1102
+ def __reduce__(self):
1103
+ return ParquetSchema, (self.parent,)
1104
+
1105
+ def __len__(self):
1106
+ return self.schema.num_columns()
1107
+
1108
+ def __getitem__(self, i):
1109
+ return self.column(i)
1110
+
1111
+ def __hash__(self):
1112
+ return hash(self.schema.ToString())
1113
+
1114
+ @property
1115
+ def names(self):
1116
+ """Name of each field (list of str)."""
1117
+ return [self[i].name for i in range(len(self))]
1118
+
1119
+ def to_arrow_schema(self):
1120
+ """
1121
+ Convert Parquet schema to effective Arrow schema.
1122
+
1123
+ Returns
1124
+ -------
1125
+ schema : Schema
1126
+ """
1127
+ cdef shared_ptr[CSchema] sp_arrow_schema
1128
+
1129
+ with nogil:
1130
+ check_status(FromParquetSchema(
1131
+ self.schema, default_arrow_reader_properties(),
1132
+ self.parent._metadata.key_value_metadata(),
1133
+ &sp_arrow_schema))
1134
+
1135
+ return pyarrow_wrap_schema(sp_arrow_schema)
1136
+
1137
+ def __eq__(self, other):
1138
+ try:
1139
+ return self.equals(other)
1140
+ except TypeError:
1141
+ return NotImplemented
1142
+
1143
+ def equals(self, ParquetSchema other):
1144
+ """
1145
+ Return whether the two schemas are equal.
1146
+
1147
+ Parameters
1148
+ ----------
1149
+ other : ParquetSchema
1150
+ Schema to compare against.
1151
+
1152
+ Returns
1153
+ -------
1154
+ are_equal : bool
1155
+ """
1156
+ return self.schema.Equals(deref(other.schema))
1157
+
1158
+ def column(self, i):
1159
+ """
1160
+ Return the schema for a single column.
1161
+
1162
+ Parameters
1163
+ ----------
1164
+ i : int
1165
+ Index of column in schema.
1166
+
1167
+ Returns
1168
+ -------
1169
+ column_schema : ColumnSchema
1170
+ """
1171
+ if i < 0 or i >= len(self):
1172
+ raise IndexError('{0} out of bounds'.format(i))
1173
+
1174
+ return ColumnSchema(self, i)
1175
+
1176
+
1177
+ cdef class ColumnSchema(_Weakrefable):
1178
+ """Schema for a single column."""
1179
+ cdef:
1180
+ int index
1181
+ ParquetSchema parent
1182
+ const ColumnDescriptor* descr
1183
+
1184
+ def __cinit__(self, ParquetSchema schema, int index):
1185
+ self.parent = schema
1186
+ self.index = index # for pickling support
1187
+ self.descr = schema.schema.Column(index)
1188
+
1189
+ def __eq__(self, other):
1190
+ try:
1191
+ return self.equals(other)
1192
+ except TypeError:
1193
+ return NotImplemented
1194
+
1195
+ def __reduce__(self):
1196
+ return ColumnSchema, (self.parent, self.index)
1197
+
1198
+ def equals(self, ColumnSchema other):
1199
+ """
1200
+ Return whether the two column schemas are equal.
1201
+
1202
+ Parameters
1203
+ ----------
1204
+ other : ColumnSchema
1205
+ Schema to compare against.
1206
+
1207
+ Returns
1208
+ -------
1209
+ are_equal : bool
1210
+ """
1211
+ return self.descr.Equals(deref(other.descr))
1212
+
1213
+ def __repr__(self):
1214
+ physical_type = self.physical_type
1215
+ converted_type = self.converted_type
1216
+ if converted_type == 'DECIMAL':
1217
+ converted_type = 'DECIMAL({0}, {1})'.format(self.precision,
1218
+ self.scale)
1219
+ elif physical_type == 'FIXED_LEN_BYTE_ARRAY':
1220
+ converted_type = ('FIXED_LEN_BYTE_ARRAY(length={0})'
1221
+ .format(self.length))
1222
+
1223
+ return """<ParquetColumnSchema>
1224
+ name: {0}
1225
+ path: {1}
1226
+ max_definition_level: {2}
1227
+ max_repetition_level: {3}
1228
+ physical_type: {4}
1229
+ logical_type: {5}
1230
+ converted_type (legacy): {6}""".format(self.name, self.path,
1231
+ self.max_definition_level,
1232
+ self.max_repetition_level,
1233
+ physical_type,
1234
+ str(self.logical_type),
1235
+ converted_type)
1236
+
1237
+ @property
1238
+ def name(self):
1239
+ """Name of field (str)."""
1240
+ return frombytes(self.descr.name())
1241
+
1242
+ @property
1243
+ def path(self):
1244
+ """Nested path to field, separated by periods (str)."""
1245
+ return frombytes(self.descr.path().get().ToDotString())
1246
+
1247
+ @property
1248
+ def max_definition_level(self):
1249
+ """Maximum definition level (int)."""
1250
+ return self.descr.max_definition_level()
1251
+
1252
+ @property
1253
+ def max_repetition_level(self):
1254
+ """Maximum repetition level (int)."""
1255
+ return self.descr.max_repetition_level()
1256
+
1257
+ @property
1258
+ def physical_type(self):
1259
+ """Name of physical type (str)."""
1260
+ return physical_type_name_from_enum(self.descr.physical_type())
1261
+
1262
+ @property
1263
+ def logical_type(self):
1264
+ """Logical type of column (:class:`ParquetLogicalType`)."""
1265
+ return wrap_logical_type(self.descr.logical_type())
1266
+
1267
+ @property
1268
+ def converted_type(self):
1269
+ """Legacy converted type (str or None)."""
1270
+ return converted_type_name_from_enum(self.descr.converted_type())
1271
+
1272
+ # FIXED_LEN_BYTE_ARRAY attribute
1273
+ @property
1274
+ def length(self):
1275
+ """Array length if fixed length byte array type, None otherwise (int or None)."""
1276
+ return self.descr.type_length()
1277
+
1278
+ # Decimal attributes
1279
+ @property
1280
+ def precision(self):
1281
+ """Precision if decimal type, None otherwise (int or None)."""
1282
+ return self.descr.type_precision()
1283
+
1284
+ @property
1285
+ def scale(self):
1286
+ """Scale if decimal type, None otherwise (int or None)."""
1287
+ return self.descr.type_scale()
1288
+
1289
+
1290
+ cdef physical_type_name_from_enum(ParquetType type_):
1291
+ return {
1292
+ ParquetType_BOOLEAN: 'BOOLEAN',
1293
+ ParquetType_INT32: 'INT32',
1294
+ ParquetType_INT64: 'INT64',
1295
+ ParquetType_INT96: 'INT96',
1296
+ ParquetType_FLOAT: 'FLOAT',
1297
+ ParquetType_DOUBLE: 'DOUBLE',
1298
+ ParquetType_BYTE_ARRAY: 'BYTE_ARRAY',
1299
+ ParquetType_FIXED_LEN_BYTE_ARRAY: 'FIXED_LEN_BYTE_ARRAY',
1300
+ }.get(type_, 'UNKNOWN')
1301
+
1302
+
1303
+ cdef logical_type_name_from_enum(ParquetLogicalTypeId type_):
1304
+ return {
1305
+ ParquetLogicalType_UNDEFINED: 'UNDEFINED',
1306
+ ParquetLogicalType_STRING: 'STRING',
1307
+ ParquetLogicalType_MAP: 'MAP',
1308
+ ParquetLogicalType_LIST: 'LIST',
1309
+ ParquetLogicalType_ENUM: 'ENUM',
1310
+ ParquetLogicalType_DECIMAL: 'DECIMAL',
1311
+ ParquetLogicalType_DATE: 'DATE',
1312
+ ParquetLogicalType_TIME: 'TIME',
1313
+ ParquetLogicalType_TIMESTAMP: 'TIMESTAMP',
1314
+ ParquetLogicalType_INT: 'INT',
1315
+ ParquetLogicalType_FLOAT16: 'FLOAT16',
1316
+ ParquetLogicalType_JSON: 'JSON',
1317
+ ParquetLogicalType_BSON: 'BSON',
1318
+ ParquetLogicalType_UUID: 'UUID',
1319
+ ParquetLogicalType_NONE: 'NONE',
1320
+ }.get(type_, 'UNKNOWN')
1321
+
1322
+
1323
+ cdef converted_type_name_from_enum(ParquetConvertedType type_):
1324
+ return {
1325
+ ParquetConvertedType_NONE: 'NONE',
1326
+ ParquetConvertedType_UTF8: 'UTF8',
1327
+ ParquetConvertedType_MAP: 'MAP',
1328
+ ParquetConvertedType_MAP_KEY_VALUE: 'MAP_KEY_VALUE',
1329
+ ParquetConvertedType_LIST: 'LIST',
1330
+ ParquetConvertedType_ENUM: 'ENUM',
1331
+ ParquetConvertedType_DECIMAL: 'DECIMAL',
1332
+ ParquetConvertedType_DATE: 'DATE',
1333
+ ParquetConvertedType_TIME_MILLIS: 'TIME_MILLIS',
1334
+ ParquetConvertedType_TIME_MICROS: 'TIME_MICROS',
1335
+ ParquetConvertedType_TIMESTAMP_MILLIS: 'TIMESTAMP_MILLIS',
1336
+ ParquetConvertedType_TIMESTAMP_MICROS: 'TIMESTAMP_MICROS',
1337
+ ParquetConvertedType_UINT_8: 'UINT_8',
1338
+ ParquetConvertedType_UINT_16: 'UINT_16',
1339
+ ParquetConvertedType_UINT_32: 'UINT_32',
1340
+ ParquetConvertedType_UINT_64: 'UINT_64',
1341
+ ParquetConvertedType_INT_8: 'INT_8',
1342
+ ParquetConvertedType_INT_16: 'INT_16',
1343
+ ParquetConvertedType_INT_32: 'INT_32',
1344
+ ParquetConvertedType_INT_64: 'INT_64',
1345
+ ParquetConvertedType_JSON: 'JSON',
1346
+ ParquetConvertedType_BSON: 'BSON',
1347
+ ParquetConvertedType_INTERVAL: 'INTERVAL',
1348
+ }.get(type_, 'UNKNOWN')
1349
+
1350
+
1351
+ cdef encoding_name_from_enum(ParquetEncoding encoding_):
1352
+ return {
1353
+ ParquetEncoding_PLAIN: 'PLAIN',
1354
+ ParquetEncoding_PLAIN_DICTIONARY: 'PLAIN_DICTIONARY',
1355
+ ParquetEncoding_RLE: 'RLE',
1356
+ ParquetEncoding_BIT_PACKED: 'BIT_PACKED',
1357
+ ParquetEncoding_DELTA_BINARY_PACKED: 'DELTA_BINARY_PACKED',
1358
+ ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY: 'DELTA_LENGTH_BYTE_ARRAY',
1359
+ ParquetEncoding_DELTA_BYTE_ARRAY: 'DELTA_BYTE_ARRAY',
1360
+ ParquetEncoding_RLE_DICTIONARY: 'RLE_DICTIONARY',
1361
+ ParquetEncoding_BYTE_STREAM_SPLIT: 'BYTE_STREAM_SPLIT',
1362
+ }.get(encoding_, 'UNKNOWN')
1363
+
1364
+
1365
+ cdef encoding_enum_from_name(str encoding_name):
1366
+ enc = {
1367
+ 'PLAIN': ParquetEncoding_PLAIN,
1368
+ 'BIT_PACKED': ParquetEncoding_BIT_PACKED,
1369
+ 'RLE': ParquetEncoding_RLE,
1370
+ 'BYTE_STREAM_SPLIT': ParquetEncoding_BYTE_STREAM_SPLIT,
1371
+ 'DELTA_BINARY_PACKED': ParquetEncoding_DELTA_BINARY_PACKED,
1372
+ 'DELTA_LENGTH_BYTE_ARRAY': ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY,
1373
+ 'DELTA_BYTE_ARRAY': ParquetEncoding_DELTA_BYTE_ARRAY,
1374
+ 'RLE_DICTIONARY': 'dict',
1375
+ 'PLAIN_DICTIONARY': 'dict',
1376
+ }.get(encoding_name, None)
1377
+ if enc is None:
1378
+ raise ValueError(f"Unsupported column encoding: {encoding_name!r}")
1379
+ elif enc == 'dict':
1380
+ raise ValueError(f"{encoding_name!r} is already used by default.")
1381
+ else:
1382
+ return enc
1383
+
1384
+
1385
+ cdef compression_name_from_enum(ParquetCompression compression_):
1386
+ return {
1387
+ ParquetCompression_UNCOMPRESSED: 'UNCOMPRESSED',
1388
+ ParquetCompression_SNAPPY: 'SNAPPY',
1389
+ ParquetCompression_GZIP: 'GZIP',
1390
+ ParquetCompression_LZO: 'LZO',
1391
+ ParquetCompression_BROTLI: 'BROTLI',
1392
+ ParquetCompression_LZ4: 'LZ4',
1393
+ ParquetCompression_ZSTD: 'ZSTD',
1394
+ }.get(compression_, 'UNKNOWN')
1395
+
1396
+
1397
+ cdef int check_compression_name(name) except -1:
1398
+ if name.upper() not in {'NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4',
1399
+ 'ZSTD'}:
1400
+ raise ArrowException("Unsupported compression: " + name)
1401
+ return 0
1402
+
1403
+
1404
+ cdef ParquetCompression compression_from_name(name):
1405
+ name = name.upper()
1406
+ if name == 'SNAPPY':
1407
+ return ParquetCompression_SNAPPY
1408
+ elif name == 'GZIP':
1409
+ return ParquetCompression_GZIP
1410
+ elif name == 'LZO':
1411
+ return ParquetCompression_LZO
1412
+ elif name == 'BROTLI':
1413
+ return ParquetCompression_BROTLI
1414
+ elif name == 'LZ4':
1415
+ return ParquetCompression_LZ4
1416
+ elif name == 'ZSTD':
1417
+ return ParquetCompression_ZSTD
1418
+ else:
1419
+ return ParquetCompression_UNCOMPRESSED
1420
+
1421
+
1422
+ cdef class ParquetReader(_Weakrefable):
1423
+ cdef:
1424
+ object source
1425
+ CMemoryPool* pool
1426
+ UniquePtrNoGIL[FileReader] reader
1427
+ FileMetaData _metadata
1428
+ shared_ptr[CRandomAccessFile] rd_handle
1429
+
1430
+ cdef public:
1431
+ _column_idx_map
1432
+
1433
+ def __cinit__(self, MemoryPool memory_pool=None):
1434
+ self.pool = maybe_unbox_memory_pool(memory_pool)
1435
+ self._metadata = None
1436
+
1437
+ def open(self, object source not None, *, bint use_memory_map=False,
1438
+ read_dictionary=None, FileMetaData metadata=None,
1439
+ int buffer_size=0, bint pre_buffer=False,
1440
+ coerce_int96_timestamp_unit=None,
1441
+ FileDecryptionProperties decryption_properties=None,
1442
+ thrift_string_size_limit=None,
1443
+ thrift_container_size_limit=None,
1444
+ page_checksum_verification=False):
1445
+ """
1446
+ Open a parquet file for reading.
1447
+
1448
+ Parameters
1449
+ ----------
1450
+ source : str, pathlib.Path, pyarrow.NativeFile, or file-like object
1451
+ use_memory_map : bool, default False
1452
+ read_dictionary : iterable[int or str], optional
1453
+ metadata : FileMetaData, optional
1454
+ buffer_size : int, default 0
1455
+ pre_buffer : bool, default False
1456
+ coerce_int96_timestamp_unit : str, optional
1457
+ decryption_properties : FileDecryptionProperties, optional
1458
+ thrift_string_size_limit : int, optional
1459
+ thrift_container_size_limit : int, optional
1460
+ page_checksum_verification : bool, default False
1461
+ """
1462
+ cdef:
1463
+ shared_ptr[CFileMetaData] c_metadata
1464
+ CReaderProperties properties = default_reader_properties()
1465
+ ArrowReaderProperties arrow_props = (
1466
+ default_arrow_reader_properties())
1467
+ FileReaderBuilder builder
1468
+
1469
+ if pre_buffer and not is_threading_enabled():
1470
+ pre_buffer = False
1471
+
1472
+ if metadata is not None:
1473
+ c_metadata = metadata.sp_metadata
1474
+
1475
+ if buffer_size > 0:
1476
+ properties.enable_buffered_stream()
1477
+ properties.set_buffer_size(buffer_size)
1478
+ elif buffer_size == 0:
1479
+ properties.disable_buffered_stream()
1480
+ else:
1481
+ raise ValueError('Buffer size must be larger than zero')
1482
+
1483
+ if thrift_string_size_limit is not None:
1484
+ if thrift_string_size_limit <= 0:
1485
+ raise ValueError("thrift_string_size_limit "
1486
+ "must be larger than zero")
1487
+ properties.set_thrift_string_size_limit(thrift_string_size_limit)
1488
+ if thrift_container_size_limit is not None:
1489
+ if thrift_container_size_limit <= 0:
1490
+ raise ValueError("thrift_container_size_limit "
1491
+ "must be larger than zero")
1492
+ properties.set_thrift_container_size_limit(
1493
+ thrift_container_size_limit)
1494
+
1495
+ if decryption_properties is not None:
1496
+ properties.file_decryption_properties(
1497
+ decryption_properties.unwrap())
1498
+
1499
+ arrow_props.set_pre_buffer(pre_buffer)
1500
+
1501
+ properties.set_page_checksum_verification(page_checksum_verification)
1502
+
1503
+ if coerce_int96_timestamp_unit is None:
1504
+ # use the default defined in default_arrow_reader_properties()
1505
+ pass
1506
+ else:
1507
+ arrow_props.set_coerce_int96_timestamp_unit(
1508
+ string_to_timeunit(coerce_int96_timestamp_unit))
1509
+
1510
+ self.source = source
1511
+ get_reader(source, use_memory_map, &self.rd_handle)
1512
+
1513
+ with nogil:
1514
+ check_status(builder.Open(self.rd_handle, properties, c_metadata))
1515
+
1516
+ # Set up metadata
1517
+ with nogil:
1518
+ c_metadata = builder.raw_reader().metadata()
1519
+ self._metadata = result = FileMetaData()
1520
+ result.init(c_metadata)
1521
+
1522
+ if read_dictionary is not None:
1523
+ self._set_read_dictionary(read_dictionary, &arrow_props)
1524
+
1525
+ with nogil:
1526
+ check_status(builder.memory_pool(self.pool)
1527
+ .properties(arrow_props)
1528
+ .Build(&self.reader))
1529
+
1530
+ cdef _set_read_dictionary(self, read_dictionary,
1531
+ ArrowReaderProperties* props):
1532
+ for column in read_dictionary:
1533
+ if not isinstance(column, int):
1534
+ column = self.column_name_idx(column)
1535
+ props.set_read_dictionary(column, True)
1536
+
1537
+ @property
1538
+ def column_paths(self):
1539
+ cdef:
1540
+ FileMetaData container = self.metadata
1541
+ const CFileMetaData* metadata = container._metadata
1542
+ vector[c_string] path
1543
+ int i = 0
1544
+
1545
+ paths = []
1546
+ for i in range(0, metadata.num_columns()):
1547
+ path = (metadata.schema().Column(i)
1548
+ .path().get().ToDotVector())
1549
+ paths.append([frombytes(x) for x in path])
1550
+
1551
+ return paths
1552
+
1553
+ @property
1554
+ def metadata(self):
1555
+ return self._metadata
1556
+
1557
+ @property
1558
+ def schema_arrow(self):
1559
+ cdef shared_ptr[CSchema] out
1560
+ with nogil:
1561
+ check_status(self.reader.get().GetSchema(&out))
1562
+ return pyarrow_wrap_schema(out)
1563
+
1564
+ @property
1565
+ def num_row_groups(self):
1566
+ return self.reader.get().num_row_groups()
1567
+
1568
+ def set_use_threads(self, bint use_threads):
1569
+ """
1570
+ Parameters
1571
+ ----------
1572
+ use_threads : bool
1573
+ """
1574
+ if is_threading_enabled():
1575
+ self.reader.get().set_use_threads(use_threads)
1576
+ else:
1577
+ self.reader.get().set_use_threads(False)
1578
+
1579
+ def set_batch_size(self, int64_t batch_size):
1580
+ """
1581
+ Parameters
1582
+ ----------
1583
+ batch_size : int64
1584
+ """
1585
+ self.reader.get().set_batch_size(batch_size)
1586
+
1587
+ def iter_batches(self, int64_t batch_size, row_groups, column_indices=None,
1588
+ bint use_threads=True):
1589
+ """
1590
+ Parameters
1591
+ ----------
1592
+ batch_size : int64
1593
+ row_groups : list[int]
1594
+ column_indices : list[int], optional
1595
+ use_threads : bool, default True
1596
+
1597
+ Yields
1598
+ ------
1599
+ next : RecordBatch
1600
+ """
1601
+ cdef:
1602
+ vector[int] c_row_groups
1603
+ vector[int] c_column_indices
1604
+ shared_ptr[CRecordBatch] record_batch
1605
+ UniquePtrNoGIL[CRecordBatchReader] recordbatchreader
1606
+
1607
+ self.set_batch_size(batch_size)
1608
+
1609
+ if use_threads:
1610
+ self.set_use_threads(use_threads)
1611
+
1612
+ for row_group in row_groups:
1613
+ c_row_groups.push_back(row_group)
1614
+
1615
+ if column_indices is not None:
1616
+ for index in column_indices:
1617
+ c_column_indices.push_back(index)
1618
+ with nogil:
1619
+ check_status(
1620
+ self.reader.get().GetRecordBatchReader(
1621
+ c_row_groups, c_column_indices, &recordbatchreader
1622
+ )
1623
+ )
1624
+ else:
1625
+ with nogil:
1626
+ check_status(
1627
+ self.reader.get().GetRecordBatchReader(
1628
+ c_row_groups, &recordbatchreader
1629
+ )
1630
+ )
1631
+
1632
+ while True:
1633
+ with nogil:
1634
+ check_status(
1635
+ recordbatchreader.get().ReadNext(&record_batch)
1636
+ )
1637
+ if record_batch.get() == NULL:
1638
+ break
1639
+
1640
+ yield pyarrow_wrap_batch(record_batch)
1641
+
1642
+ def read_row_group(self, int i, column_indices=None,
1643
+ bint use_threads=True):
1644
+ """
1645
+ Parameters
1646
+ ----------
1647
+ i : int
1648
+ column_indices : list[int], optional
1649
+ use_threads : bool, default True
1650
+
1651
+ Returns
1652
+ -------
1653
+ table : pyarrow.Table
1654
+ """
1655
+ return self.read_row_groups([i], column_indices, use_threads)
1656
+
1657
+ def read_row_groups(self, row_groups not None, column_indices=None,
1658
+ bint use_threads=True):
1659
+ """
1660
+ Parameters
1661
+ ----------
1662
+ row_groups : list[int]
1663
+ column_indices : list[int], optional
1664
+ use_threads : bool, default True
1665
+
1666
+ Returns
1667
+ -------
1668
+ table : pyarrow.Table
1669
+ """
1670
+ cdef:
1671
+ shared_ptr[CTable] ctable
1672
+ vector[int] c_row_groups
1673
+ vector[int] c_column_indices
1674
+
1675
+ self.set_use_threads(use_threads)
1676
+
1677
+ for row_group in row_groups:
1678
+ c_row_groups.push_back(row_group)
1679
+
1680
+ if column_indices is not None:
1681
+ for index in column_indices:
1682
+ c_column_indices.push_back(index)
1683
+
1684
+ with nogil:
1685
+ check_status(self.reader.get()
1686
+ .ReadRowGroups(c_row_groups, c_column_indices,
1687
+ &ctable))
1688
+ else:
1689
+ # Read all columns
1690
+ with nogil:
1691
+ check_status(self.reader.get()
1692
+ .ReadRowGroups(c_row_groups, &ctable))
1693
+ return pyarrow_wrap_table(ctable)
1694
+
1695
+ def read_all(self, column_indices=None, bint use_threads=True):
1696
+ """
1697
+ Parameters
1698
+ ----------
1699
+ column_indices : list[int], optional
1700
+ use_threads : bool, default True
1701
+
1702
+ Returns
1703
+ -------
1704
+ table : pyarrow.Table
1705
+ """
1706
+ cdef:
1707
+ shared_ptr[CTable] ctable
1708
+ vector[int] c_column_indices
1709
+
1710
+ self.set_use_threads(use_threads)
1711
+
1712
+ if column_indices is not None:
1713
+ for index in column_indices:
1714
+ c_column_indices.push_back(index)
1715
+
1716
+ with nogil:
1717
+ check_status(self.reader.get()
1718
+ .ReadTable(c_column_indices, &ctable))
1719
+ else:
1720
+ # Read all columns
1721
+ with nogil:
1722
+ check_status(self.reader.get()
1723
+ .ReadTable(&ctable))
1724
+ return pyarrow_wrap_table(ctable)
1725
+
1726
+ def scan_contents(self, column_indices=None, batch_size=65536):
1727
+ """
1728
+ Parameters
1729
+ ----------
1730
+ column_indices : list[int], optional
1731
+ batch_size : int32, default 65536
1732
+
1733
+ Returns
1734
+ -------
1735
+ num_rows : int64
1736
+ """
1737
+ cdef:
1738
+ vector[int] c_column_indices
1739
+ int32_t c_batch_size
1740
+ int64_t c_num_rows
1741
+
1742
+ if column_indices is not None:
1743
+ for index in column_indices:
1744
+ c_column_indices.push_back(index)
1745
+
1746
+ c_batch_size = batch_size
1747
+
1748
+ with nogil:
1749
+ check_status(self.reader.get()
1750
+ .ScanContents(c_column_indices, c_batch_size,
1751
+ &c_num_rows))
1752
+
1753
+ return c_num_rows
1754
+
1755
+ def column_name_idx(self, column_name):
1756
+ """
1757
+ Find the index of a column by its name.
1758
+
1759
+ Parameters
1760
+ ----------
1761
+ column_name : str
1762
+ Name of the column; separation of nesting levels is done via ".".
1763
+
1764
+ Returns
1765
+ -------
1766
+ column_idx : int
1767
+ Integer index of the column in the schema.
1768
+ """
1769
+ cdef:
1770
+ FileMetaData container = self.metadata
1771
+ const CFileMetaData* metadata = container._metadata
1772
+ int i = 0
1773
+
1774
+ if self._column_idx_map is None:
1775
+ self._column_idx_map = {}
1776
+ for i in range(0, metadata.num_columns()):
1777
+ col_bytes = tobytes(metadata.schema().Column(i)
1778
+ .path().get().ToDotString())
1779
+ self._column_idx_map[col_bytes] = i
1780
+
1781
+ return self._column_idx_map[tobytes(column_name)]
1782
+
1783
+ def read_column(self, int column_index):
1784
+ """
1785
+ Read the column at the specified index.
1786
+
1787
+ Parameters
1788
+ ----------
1789
+ column_index : int
1790
+ Index of the column.
1791
+
1792
+ Returns
1793
+ -------
1794
+ column : pyarrow.ChunkedArray
1795
+ """
1796
+ cdef shared_ptr[CChunkedArray] out
1797
+ with nogil:
1798
+ check_status(self.reader.get()
1799
+ .ReadColumn(column_index, &out))
1800
+ return pyarrow_wrap_chunked_array(out)
1801
+
1802
+ def close(self):
1803
+ if not self.closed:
1804
+ with nogil:
1805
+ check_status(self.rd_handle.get().Close())
1806
+
1807
+ @property
1808
+ def closed(self):
1809
+ if self.rd_handle == NULL:
1810
+ return True
1811
+ with nogil:
1812
+ closed = self.rd_handle.get().closed()
1813
+ return closed
1814
+
1815
+
1816
+ cdef CSortingColumn _convert_sorting_column(SortingColumn sorting_column):
1817
+ cdef CSortingColumn c_sorting_column
1818
+
1819
+ c_sorting_column.column_idx = sorting_column.column_index
1820
+ c_sorting_column.descending = sorting_column.descending
1821
+ c_sorting_column.nulls_first = sorting_column.nulls_first
1822
+
1823
+ return c_sorting_column
1824
+
1825
+
1826
+ cdef vector[CSortingColumn] _convert_sorting_columns(sorting_columns) except *:
1827
+ if not (isinstance(sorting_columns, Sequence)
1828
+ and all(isinstance(col, SortingColumn) for col in sorting_columns)):
1829
+ raise ValueError(
1830
+ "'sorting_columns' must be a list of `SortingColumn`")
1831
+
1832
+ cdef vector[CSortingColumn] c_sorting_columns = [_convert_sorting_column(col)
1833
+ for col in sorting_columns]
1834
+
1835
+ return c_sorting_columns
1836
+
1837
+
1838
+ cdef shared_ptr[WriterProperties] _create_writer_properties(
1839
+ use_dictionary=None,
1840
+ compression=None,
1841
+ version=None,
1842
+ write_statistics=None,
1843
+ data_page_size=None,
1844
+ compression_level=None,
1845
+ use_byte_stream_split=False,
1846
+ column_encoding=None,
1847
+ data_page_version=None,
1848
+ FileEncryptionProperties encryption_properties=None,
1849
+ write_batch_size=None,
1850
+ dictionary_pagesize_limit=None,
1851
+ write_page_index=False,
1852
+ write_page_checksum=False,
1853
+ sorting_columns=None,
1854
+ store_decimal_as_integer=False) except *:
1855
+
1856
+ """General writer properties"""
1857
+ cdef:
1858
+ shared_ptr[WriterProperties] properties
1859
+ WriterProperties.Builder props
1860
+
1861
+ # data_page_version
1862
+
1863
+ if data_page_version is not None:
1864
+ if data_page_version == "1.0":
1865
+ props.data_page_version(ParquetDataPageVersion_V1)
1866
+ elif data_page_version == "2.0":
1867
+ props.data_page_version(ParquetDataPageVersion_V2)
1868
+ else:
1869
+ raise ValueError("Unsupported Parquet data page version: {0}"
1870
+ .format(data_page_version))
1871
+
1872
+ # version
1873
+
1874
+ if version is not None:
1875
+ if version == "1.0":
1876
+ props.version(ParquetVersion_V1)
1877
+ elif version in ("2.0", "pseudo-2.0"):
1878
+ warnings.warn(
1879
+ "Parquet format '2.0' pseudo version is deprecated, use "
1880
+ "'2.4' or '2.6' for fine-grained feature selection",
1881
+ FutureWarning, stacklevel=2)
1882
+ props.version(ParquetVersion_V2_0)
1883
+ elif version == "2.4":
1884
+ props.version(ParquetVersion_V2_4)
1885
+ elif version == "2.6":
1886
+ props.version(ParquetVersion_V2_6)
1887
+ else:
1888
+ raise ValueError("Unsupported Parquet format version: {0}"
1889
+ .format(version))
1890
+
1891
+ # compression
1892
+
1893
+ if isinstance(compression, basestring):
1894
+ check_compression_name(compression)
1895
+ props.compression(compression_from_name(compression))
1896
+ elif compression is not None:
1897
+ for column, codec in compression.iteritems():
1898
+ check_compression_name(codec)
1899
+ props.compression(tobytes(column), compression_from_name(codec))
1900
+
1901
+ if isinstance(compression_level, int):
1902
+ props.compression_level(compression_level)
1903
+ elif compression_level is not None:
1904
+ for column, level in compression_level.iteritems():
1905
+ props.compression_level(tobytes(column), level)
1906
+
1907
+ # use_dictionary
1908
+
1909
+ if isinstance(use_dictionary, bool):
1910
+ if use_dictionary:
1911
+ props.enable_dictionary()
1912
+ if column_encoding is not None:
1913
+ raise ValueError(
1914
+ "To use 'column_encoding' set 'use_dictionary' to False")
1915
+ else:
1916
+ props.disable_dictionary()
1917
+ elif use_dictionary is not None:
1918
+ # Deactivate dictionary encoding by default
1919
+ props.disable_dictionary()
1920
+ for column in use_dictionary:
1921
+ props.enable_dictionary(tobytes(column))
1922
+ if (column_encoding is not None and
1923
+ column_encoding.get(column) is not None):
1924
+ raise ValueError(
1925
+ "To use 'column_encoding' set 'use_dictionary' to False")
1926
+
1927
+ # write_statistics
1928
+
1929
+ if isinstance(write_statistics, bool):
1930
+ if write_statistics:
1931
+ props.enable_statistics()
1932
+ else:
1933
+ props.disable_statistics()
1934
+ elif write_statistics is not None:
1935
+ # Deactivate statistics by default and enable for specified columns
1936
+ props.disable_statistics()
1937
+ for column in write_statistics:
1938
+ props.enable_statistics(tobytes(column))
1939
+
1940
+ # sorting_columns
1941
+
1942
+ if sorting_columns is not None:
1943
+ props.set_sorting_columns(_convert_sorting_columns(sorting_columns))
1944
+
1945
+ # use_byte_stream_split
1946
+
1947
+ if isinstance(use_byte_stream_split, bool):
1948
+ if use_byte_stream_split:
1949
+ if column_encoding is not None:
1950
+ raise ValueError(
1951
+ "'use_byte_stream_split' cannot be passed"
1952
+ "together with 'column_encoding'")
1953
+ else:
1954
+ props.encoding(ParquetEncoding_BYTE_STREAM_SPLIT)
1955
+ elif use_byte_stream_split is not None:
1956
+ for column in use_byte_stream_split:
1957
+ if column_encoding is None:
1958
+ column_encoding = {column: 'BYTE_STREAM_SPLIT'}
1959
+ elif column_encoding.get(column, None) is None:
1960
+ column_encoding[column] = 'BYTE_STREAM_SPLIT'
1961
+ else:
1962
+ raise ValueError(
1963
+ "'use_byte_stream_split' cannot be passed"
1964
+ "together with 'column_encoding'")
1965
+
1966
+ # store_decimal_as_integer
1967
+
1968
+ if isinstance(store_decimal_as_integer, bool):
1969
+ if store_decimal_as_integer:
1970
+ props.enable_store_decimal_as_integer()
1971
+ else:
1972
+ props.disable_store_decimal_as_integer()
1973
+ else:
1974
+ raise TypeError("'store_decimal_as_integer' must be a boolean")
1975
+
1976
+ # column_encoding
1977
+ # encoding map - encode individual columns
1978
+
1979
+ if column_encoding is not None:
1980
+ if isinstance(column_encoding, dict):
1981
+ for column, _encoding in column_encoding.items():
1982
+ props.encoding(tobytes(column),
1983
+ encoding_enum_from_name(_encoding))
1984
+ elif isinstance(column_encoding, str):
1985
+ props.encoding(encoding_enum_from_name(column_encoding))
1986
+ else:
1987
+ raise TypeError(
1988
+ "'column_encoding' should be a dictionary or a string")
1989
+
1990
+ if data_page_size is not None:
1991
+ props.data_pagesize(data_page_size)
1992
+
1993
+ if write_batch_size is not None:
1994
+ props.write_batch_size(write_batch_size)
1995
+
1996
+ if dictionary_pagesize_limit is not None:
1997
+ props.dictionary_pagesize_limit(dictionary_pagesize_limit)
1998
+
1999
+ # encryption
2000
+
2001
+ if encryption_properties is not None:
2002
+ props.encryption(
2003
+ (<FileEncryptionProperties>encryption_properties).unwrap())
2004
+
2005
+ # For backwards compatibility reasons we cap the maximum row group size
2006
+ # at 64Mi rows. This could be changed in the future, though it would be
2007
+ # a breaking change.
2008
+ #
2009
+ # The user can always specify a smaller row group size (and the default
2010
+ # is smaller) when calling write_table. If the call to write_table uses
2011
+ # a size larger than this then it will be latched to this value.
2012
+ props.max_row_group_length(_MAX_ROW_GROUP_SIZE)
2013
+
2014
+ # checksum
2015
+
2016
+ if write_page_checksum:
2017
+ props.enable_page_checksum()
2018
+ else:
2019
+ props.disable_page_checksum()
2020
+
2021
+ # page index
2022
+
2023
+ if write_page_index:
2024
+ props.enable_write_page_index()
2025
+ else:
2026
+ props.disable_write_page_index()
2027
+
2028
+ properties = props.build()
2029
+
2030
+ return properties
2031
+
2032
+
2033
+ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
2034
+ use_deprecated_int96_timestamps=False,
2035
+ coerce_timestamps=None,
2036
+ allow_truncated_timestamps=False,
2037
+ writer_engine_version=None,
2038
+ use_compliant_nested_type=True,
2039
+ store_schema=True) except *:
2040
+ """Arrow writer properties"""
2041
+ cdef:
2042
+ shared_ptr[ArrowWriterProperties] arrow_properties
2043
+ ArrowWriterProperties.Builder arrow_props
2044
+
2045
+ # Store the original Arrow schema so things like dictionary types can
2046
+ # be automatically reconstructed
2047
+ if store_schema:
2048
+ arrow_props.store_schema()
2049
+
2050
+ # int96 support
2051
+
2052
+ if use_deprecated_int96_timestamps:
2053
+ arrow_props.enable_deprecated_int96_timestamps()
2054
+ else:
2055
+ arrow_props.disable_deprecated_int96_timestamps()
2056
+
2057
+ # coerce_timestamps
2058
+
2059
+ if coerce_timestamps == 'ms':
2060
+ arrow_props.coerce_timestamps(TimeUnit_MILLI)
2061
+ elif coerce_timestamps == 'us':
2062
+ arrow_props.coerce_timestamps(TimeUnit_MICRO)
2063
+ elif coerce_timestamps is not None:
2064
+ raise ValueError('Invalid value for coerce_timestamps: {0}'
2065
+ .format(coerce_timestamps))
2066
+
2067
+ # allow_truncated_timestamps
2068
+
2069
+ if allow_truncated_timestamps:
2070
+ arrow_props.allow_truncated_timestamps()
2071
+ else:
2072
+ arrow_props.disallow_truncated_timestamps()
2073
+
2074
+ # use_compliant_nested_type
2075
+
2076
+ if use_compliant_nested_type:
2077
+ arrow_props.enable_compliant_nested_types()
2078
+ else:
2079
+ arrow_props.disable_compliant_nested_types()
2080
+
2081
+ # writer_engine_version
2082
+
2083
+ if writer_engine_version == "V1":
2084
+ warnings.warn("V1 parquet writer engine is a no-op. Use V2.")
2085
+ arrow_props.set_engine_version(ArrowWriterEngineVersion.V1)
2086
+ elif writer_engine_version != "V2":
2087
+ raise ValueError("Unsupported Writer Engine Version: {0}"
2088
+ .format(writer_engine_version))
2089
+
2090
+ arrow_properties = arrow_props.build()
2091
+
2092
+ return arrow_properties
2093
+
2094
+ cdef _name_to_index_map(Schema arrow_schema):
2095
+ cdef:
2096
+ shared_ptr[CSchema] sp_arrow_schema
2097
+ shared_ptr[SchemaDescriptor] sp_parquet_schema
2098
+ shared_ptr[WriterProperties] props = _create_writer_properties()
2099
+ shared_ptr[ArrowWriterProperties] arrow_props = _create_arrow_writer_properties(
2100
+ use_deprecated_int96_timestamps=False,
2101
+ coerce_timestamps=None,
2102
+ allow_truncated_timestamps=False,
2103
+ writer_engine_version="V2"
2104
+ )
2105
+
2106
+ sp_arrow_schema = pyarrow_unwrap_schema(arrow_schema)
2107
+
2108
+ with nogil:
2109
+ check_status(ToParquetSchema(
2110
+ sp_arrow_schema.get(), deref(props.get()), deref(arrow_props.get()), &sp_parquet_schema))
2111
+
2112
+ out = dict()
2113
+
2114
+ cdef SchemaDescriptor* parquet_schema = sp_parquet_schema.get()
2115
+
2116
+ for i in range(parquet_schema.num_columns()):
2117
+ name = frombytes(parquet_schema.Column(i).path().get().ToDotString())
2118
+ out[name] = i
2119
+
2120
+ return out
2121
+
2122
+
2123
+ cdef class ParquetWriter(_Weakrefable):
2124
+ cdef:
2125
+ unique_ptr[FileWriter] writer
2126
+ shared_ptr[COutputStream] sink
2127
+ bint own_sink
2128
+
2129
+ cdef readonly:
2130
+ object use_dictionary
2131
+ object use_deprecated_int96_timestamps
2132
+ object use_byte_stream_split
2133
+ object column_encoding
2134
+ object coerce_timestamps
2135
+ object allow_truncated_timestamps
2136
+ object compression
2137
+ object compression_level
2138
+ object data_page_version
2139
+ object use_compliant_nested_type
2140
+ object version
2141
+ object write_statistics
2142
+ object writer_engine_version
2143
+ int row_group_size
2144
+ int64_t data_page_size
2145
+ FileEncryptionProperties encryption_properties
2146
+ int64_t write_batch_size
2147
+ int64_t dictionary_pagesize_limit
2148
+ object store_schema
2149
+ object store_decimal_as_integer
2150
+
2151
+ def __cinit__(self, where, Schema schema not None, use_dictionary=None,
2152
+ compression=None, version=None,
2153
+ write_statistics=None,
2154
+ MemoryPool memory_pool=None,
2155
+ use_deprecated_int96_timestamps=False,
2156
+ coerce_timestamps=None,
2157
+ data_page_size=None,
2158
+ allow_truncated_timestamps=False,
2159
+ compression_level=None,
2160
+ use_byte_stream_split=False,
2161
+ column_encoding=None,
2162
+ writer_engine_version=None,
2163
+ data_page_version=None,
2164
+ use_compliant_nested_type=True,
2165
+ encryption_properties=None,
2166
+ write_batch_size=None,
2167
+ dictionary_pagesize_limit=None,
2168
+ store_schema=True,
2169
+ write_page_index=False,
2170
+ write_page_checksum=False,
2171
+ sorting_columns=None,
2172
+ store_decimal_as_integer=False):
2173
+ cdef:
2174
+ shared_ptr[WriterProperties] properties
2175
+ shared_ptr[ArrowWriterProperties] arrow_properties
2176
+ c_string c_where
2177
+ CMemoryPool* pool
2178
+
2179
+ try:
2180
+ where = _stringify_path(where)
2181
+ except TypeError:
2182
+ get_writer(where, &self.sink)
2183
+ self.own_sink = False
2184
+ else:
2185
+ c_where = tobytes(where)
2186
+ with nogil:
2187
+ self.sink = GetResultValue(FileOutputStream.Open(c_where))
2188
+ self.own_sink = True
2189
+
2190
+ properties = _create_writer_properties(
2191
+ use_dictionary=use_dictionary,
2192
+ compression=compression,
2193
+ version=version,
2194
+ write_statistics=write_statistics,
2195
+ data_page_size=data_page_size,
2196
+ compression_level=compression_level,
2197
+ use_byte_stream_split=use_byte_stream_split,
2198
+ column_encoding=column_encoding,
2199
+ data_page_version=data_page_version,
2200
+ encryption_properties=encryption_properties,
2201
+ write_batch_size=write_batch_size,
2202
+ dictionary_pagesize_limit=dictionary_pagesize_limit,
2203
+ write_page_index=write_page_index,
2204
+ write_page_checksum=write_page_checksum,
2205
+ sorting_columns=sorting_columns,
2206
+ store_decimal_as_integer=store_decimal_as_integer,
2207
+ )
2208
+ arrow_properties = _create_arrow_writer_properties(
2209
+ use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
2210
+ coerce_timestamps=coerce_timestamps,
2211
+ allow_truncated_timestamps=allow_truncated_timestamps,
2212
+ writer_engine_version=writer_engine_version,
2213
+ use_compliant_nested_type=use_compliant_nested_type,
2214
+ store_schema=store_schema,
2215
+ )
2216
+
2217
+ pool = maybe_unbox_memory_pool(memory_pool)
2218
+ with nogil:
2219
+ self.writer = move(GetResultValue(
2220
+ FileWriter.Open(deref(schema.schema), pool,
2221
+ self.sink, properties, arrow_properties)))
2222
+
2223
+ def close(self):
2224
+ with nogil:
2225
+ check_status(self.writer.get().Close())
2226
+ if self.own_sink:
2227
+ check_status(self.sink.get().Close())
2228
+
2229
+ def write_table(self, Table table, row_group_size=None):
2230
+ cdef:
2231
+ CTable* ctable = table.table
2232
+ int64_t c_row_group_size
2233
+
2234
+ if row_group_size is None or row_group_size == -1:
2235
+ c_row_group_size = min(ctable.num_rows(), _DEFAULT_ROW_GROUP_SIZE)
2236
+ elif row_group_size == 0:
2237
+ raise ValueError('Row group size cannot be 0')
2238
+ else:
2239
+ c_row_group_size = row_group_size
2240
+
2241
+ with nogil:
2242
+ check_status(self.writer.get()
2243
+ .WriteTable(deref(ctable), c_row_group_size))
2244
+
2245
+ def add_key_value_metadata(self, key_value_metadata):
2246
+ cdef:
2247
+ shared_ptr[const CKeyValueMetadata] c_metadata
2248
+
2249
+ c_metadata = pyarrow_unwrap_metadata(KeyValueMetadata(key_value_metadata))
2250
+ with nogil:
2251
+ check_status(self.writer.get()
2252
+ .AddKeyValueMetadata(c_metadata))
2253
+
2254
+ @property
2255
+ def metadata(self):
2256
+ cdef:
2257
+ shared_ptr[CFileMetaData] metadata
2258
+ FileMetaData result
2259
+ with nogil:
2260
+ metadata = self.writer.get().metadata()
2261
+ if metadata:
2262
+ result = FileMetaData()
2263
+ result.init(metadata)
2264
+ return result
2265
+ raise RuntimeError(
2266
+ 'file metadata is only available after writer close')
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_pyarrow_cpp_tests.pxd ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # distutils: language = c++
19
+ # cython: language_level = 3
20
+
21
+ from pyarrow.includes.common cimport *
22
+ from pyarrow.includes.libarrow cimport CStatus
23
+
24
+
25
+ ctypedef CStatus cb_test_func()
26
+
27
+ cdef extern from "arrow/python/python_test.h" namespace "arrow::py::testing" nogil:
28
+
29
+ cdef cppclass CTestCase "arrow::py::testing::TestCase":
30
+ c_string name
31
+ cb_test_func func
32
+
33
+ vector[CTestCase] GetCppTestCases()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_substrait.pyx ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # cython: language_level = 3
19
+ from cython.operator cimport dereference as deref
20
+ from libcpp.vector cimport vector as std_vector
21
+
22
+ from pyarrow import Buffer, py_buffer
23
+ from pyarrow._compute cimport Expression
24
+ from pyarrow.lib import frombytes, tobytes
25
+ from pyarrow.lib cimport *
26
+ from pyarrow.includes.libarrow cimport *
27
+ from pyarrow.includes.libarrow_substrait cimport *
28
+
29
+ try:
30
+ import substrait as py_substrait
31
+ except ImportError:
32
+ py_substrait = None
33
+ else:
34
+ import substrait.proto # no-cython-lint
35
+
36
+
37
+ # TODO GH-37235: Fix exception handling
38
+ cdef CDeclaration _create_named_table_provider(
39
+ dict named_args, const std_vector[c_string]& names, const CSchema& schema
40
+ ) noexcept:
41
+ cdef:
42
+ c_string c_name
43
+ shared_ptr[CTable] c_in_table
44
+ shared_ptr[CTableSourceNodeOptions] c_tablesourceopts
45
+ shared_ptr[CExecNodeOptions] c_input_node_opts
46
+ vector[CDeclaration.Input] no_c_inputs
47
+
48
+ py_names = []
49
+ for i in range(names.size()):
50
+ c_name = names[i]
51
+ py_names.append(frombytes(c_name))
52
+ py_schema = pyarrow_wrap_schema(make_shared[CSchema](schema))
53
+
54
+ py_table = named_args["provider"](py_names, py_schema)
55
+ c_in_table = pyarrow_unwrap_table(py_table)
56
+ c_tablesourceopts = make_shared[CTableSourceNodeOptions](c_in_table)
57
+ c_input_node_opts = static_pointer_cast[CExecNodeOptions, CTableSourceNodeOptions](
58
+ c_tablesourceopts)
59
+ return CDeclaration(tobytes("table_source"),
60
+ no_c_inputs, c_input_node_opts)
61
+
62
+
63
+ def run_query(plan, *, table_provider=None, use_threads=True):
64
+ """
65
+ Execute a Substrait plan and read the results as a RecordBatchReader.
66
+
67
+ Parameters
68
+ ----------
69
+ plan : Union[Buffer, bytes]
70
+ The serialized Substrait plan to execute.
71
+ table_provider : object (optional)
72
+ A function to resolve any NamedTable relation to a table.
73
+ The function will receive two arguments which will be a list
74
+ of strings representing the table name and a pyarrow.Schema representing
75
+ the expected schema and should return a pyarrow.Table.
76
+ use_threads : bool, default True
77
+ If True then multiple threads will be used to run the query. If False then
78
+ all CPU intensive work will be done on the calling thread.
79
+
80
+ Returns
81
+ -------
82
+ RecordBatchReader
83
+ A reader containing the result of the executed query
84
+
85
+ Examples
86
+ --------
87
+ >>> import pyarrow as pa
88
+ >>> from pyarrow.lib import tobytes
89
+ >>> import pyarrow.substrait as substrait
90
+ >>> test_table_1 = pa.Table.from_pydict({"x": [1, 2, 3]})
91
+ >>> test_table_2 = pa.Table.from_pydict({"x": [4, 5, 6]})
92
+ >>> def table_provider(names, schema):
93
+ ... if not names:
94
+ ... raise Exception("No names provided")
95
+ ... elif names[0] == "t1":
96
+ ... return test_table_1
97
+ ... elif names[1] == "t2":
98
+ ... return test_table_2
99
+ ... else:
100
+ ... raise Exception("Unrecognized table name")
101
+ ...
102
+ >>> substrait_query = '''
103
+ ... {
104
+ ... "relations": [
105
+ ... {"rel": {
106
+ ... "read": {
107
+ ... "base_schema": {
108
+ ... "struct": {
109
+ ... "types": [
110
+ ... {"i64": {}}
111
+ ... ]
112
+ ... },
113
+ ... "names": [
114
+ ... "x"
115
+ ... ]
116
+ ... },
117
+ ... "namedTable": {
118
+ ... "names": ["t1"]
119
+ ... }
120
+ ... }
121
+ ... }}
122
+ ... ]
123
+ ... }
124
+ ... '''
125
+ >>> buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
126
+ >>> reader = pa.substrait.run_query(buf, table_provider=table_provider)
127
+ >>> reader.read_all()
128
+ pyarrow.Table
129
+ x: int64
130
+ ----
131
+ x: [[1,2,3]]
132
+ """
133
+
134
+ cdef:
135
+ CResult[shared_ptr[CRecordBatchReader]] c_res_reader
136
+ shared_ptr[CRecordBatchReader] c_reader
137
+ RecordBatchReader reader
138
+ shared_ptr[CBuffer] c_buf_plan
139
+ CConversionOptions c_conversion_options
140
+ c_bool c_use_threads
141
+
142
+ c_use_threads = use_threads
143
+ if isinstance(plan, (bytes, memoryview)):
144
+ c_buf_plan = pyarrow_unwrap_buffer(py_buffer(plan))
145
+ elif isinstance(plan, Buffer):
146
+ c_buf_plan = pyarrow_unwrap_buffer(plan)
147
+ else:
148
+ raise TypeError(
149
+ f"Expected 'pyarrow.Buffer' or bytes, got '{type(plan)}'")
150
+
151
+ if table_provider is not None:
152
+ named_table_args = {
153
+ "provider": table_provider
154
+ }
155
+ c_conversion_options.named_table_provider = BindFunction[CNamedTableProvider](
156
+ &_create_named_table_provider, named_table_args)
157
+
158
+ with nogil:
159
+ c_res_reader = ExecuteSerializedPlan(
160
+ deref(c_buf_plan), default_extension_id_registry(),
161
+ GetFunctionRegistry(), c_conversion_options, c_use_threads)
162
+
163
+ c_reader = GetResultValue(c_res_reader)
164
+
165
+ reader = RecordBatchReader.__new__(RecordBatchReader)
166
+ reader.reader = c_reader
167
+ return reader
168
+
169
+
170
+ def _parse_json_plan(plan):
171
+ """
172
+ Parse a JSON plan into equivalent serialized Protobuf.
173
+
174
+ Parameters
175
+ ----------
176
+ plan : bytes
177
+ Substrait plan in JSON.
178
+
179
+ Returns
180
+ -------
181
+ Buffer
182
+ A buffer containing the serialized Protobuf plan.
183
+ """
184
+
185
+ cdef:
186
+ CResult[shared_ptr[CBuffer]] c_res_buffer
187
+ c_string c_str_plan
188
+ shared_ptr[CBuffer] c_buf_plan
189
+
190
+ c_str_plan = plan
191
+ c_res_buffer = SerializeJsonPlan(c_str_plan)
192
+ with nogil:
193
+ c_buf_plan = GetResultValue(c_res_buffer)
194
+ return pyarrow_wrap_buffer(c_buf_plan)
195
+
196
+
197
+ class SubstraitSchema:
198
+ """A Schema encoded for Substrait usage.
199
+
200
+ The SubstraitSchema contains a schema represented
201
+ both as a substrait ``NamedStruct`` and as an
202
+ ``ExtendedExpression``.
203
+
204
+ The ``ExtendedExpression`` is available for cases where types
205
+ used by the schema require extensions to decode them.
206
+ In such case the schema will be the ``base_schema`` of the
207
+ ``ExtendedExpression`` and all extensions will be provided.
208
+ """
209
+
210
+ def __init__(self, schema, expression):
211
+ self.schema = schema
212
+ self.expression = expression
213
+
214
+ def to_pysubstrait(self):
215
+ """Convert the schema to a substrait-python ExtendedExpression object."""
216
+ if py_substrait is None:
217
+ raise ImportError("The 'substrait' package is required.")
218
+ return py_substrait.proto.ExtendedExpression.FromString(self.expression)
219
+
220
+
221
+ def serialize_schema(schema):
222
+ """
223
+ Serialize a schema into a SubstraitSchema object.
224
+
225
+ Parameters
226
+ ----------
227
+ schema : Schema
228
+ The schema to serialize
229
+
230
+ Returns
231
+ -------
232
+ SubstraitSchema
233
+ The schema stored in a SubstraitSchema object.
234
+ """
235
+ return SubstraitSchema(
236
+ schema=_serialize_namedstruct_schema(schema),
237
+ expression=serialize_expressions([], [], schema, allow_arrow_extensions=True)
238
+ )
239
+
240
+
241
+ def _serialize_namedstruct_schema(schema):
242
+ cdef:
243
+ CResult[shared_ptr[CBuffer]] c_res_buffer
244
+ shared_ptr[CBuffer] c_buffer
245
+ CConversionOptions c_conversion_options
246
+ CExtensionSet c_extensions
247
+
248
+ with nogil:
249
+ c_res_buffer = SerializeSchema(deref((<Schema> schema).sp_schema), &c_extensions, c_conversion_options)
250
+ c_buffer = GetResultValue(c_res_buffer)
251
+
252
+ return memoryview(pyarrow_wrap_buffer(c_buffer))
253
+
254
+
255
+ def deserialize_schema(buf):
256
+ """
257
+ Deserialize a ``NamedStruct`` Substrait message
258
+ or a SubstraitSchema object into an Arrow Schema object
259
+
260
+ Parameters
261
+ ----------
262
+ buf : Buffer or bytes or SubstraitSchema
263
+ The message to deserialize
264
+
265
+ Returns
266
+ -------
267
+ Schema
268
+ The deserialized schema
269
+ """
270
+ cdef:
271
+ shared_ptr[CBuffer] c_buffer
272
+ CResult[shared_ptr[CSchema]] c_res_schema
273
+ shared_ptr[CSchema] c_schema
274
+ CConversionOptions c_conversion_options
275
+ CExtensionSet c_extensions
276
+
277
+ if isinstance(buf, SubstraitSchema):
278
+ return deserialize_expressions(buf.expression).schema
279
+
280
+ if isinstance(buf, (bytes, memoryview)):
281
+ c_buffer = pyarrow_unwrap_buffer(py_buffer(buf))
282
+ elif isinstance(buf, Buffer):
283
+ c_buffer = pyarrow_unwrap_buffer(buf)
284
+ else:
285
+ raise TypeError(
286
+ f"Expected 'pyarrow.Buffer' or bytes, got '{type(buf)}'")
287
+
288
+ with nogil:
289
+ c_res_schema = DeserializeSchema(
290
+ deref(c_buffer), c_extensions, c_conversion_options)
291
+ c_schema = GetResultValue(c_res_schema)
292
+
293
+ return pyarrow_wrap_schema(c_schema)
294
+
295
+
296
+ def serialize_expressions(exprs, names, schema, *, allow_arrow_extensions=False):
297
+ """
298
+ Serialize a collection of expressions into Substrait
299
+
300
+ Substrait expressions must be bound to a schema. For example,
301
+ the Substrait expression ``a:i32 + b:i32`` is different from the
302
+ Substrait expression ``a:i64 + b:i64``. Pyarrow expressions are
303
+ typically unbound. For example, both of the above expressions
304
+ would be represented as ``a + b`` in pyarrow.
305
+
306
+ This means a schema must be provided when serializing an expression.
307
+ It also means that the serialization may fail if a matching function
308
+ call cannot be found for the expression.
309
+
310
+ Parameters
311
+ ----------
312
+ exprs : list of Expression
313
+ The expressions to serialize
314
+ names : list of str
315
+ Names for the expressions
316
+ schema : Schema
317
+ The schema the expressions will be bound to
318
+ allow_arrow_extensions : bool, default False
319
+ If False then only functions that are part of the core Substrait function
320
+ definitions will be allowed. Set this to True to allow pyarrow-specific functions
321
+ and user defined functions but the result may not be accepted by other
322
+ compute libraries.
323
+
324
+ Returns
325
+ -------
326
+ Buffer
327
+ An ExtendedExpression message containing the serialized expressions
328
+ """
329
+ cdef:
330
+ CResult[shared_ptr[CBuffer]] c_res_buffer
331
+ shared_ptr[CBuffer] c_buffer
332
+ CNamedExpression c_named_expr
333
+ CBoundExpressions c_bound_exprs
334
+ CConversionOptions c_conversion_options
335
+
336
+ if len(exprs) != len(names):
337
+ raise ValueError("exprs and names need to have the same length")
338
+ for expr, name in zip(exprs, names):
339
+ if not isinstance(expr, Expression):
340
+ raise TypeError(f"Expected Expression, got '{type(expr)}' in exprs")
341
+ if not isinstance(name, str):
342
+ raise TypeError(f"Expected str, got '{type(name)}' in names")
343
+ c_named_expr.expression = (<Expression> expr).unwrap()
344
+ c_named_expr.name = tobytes(<str> name)
345
+ c_bound_exprs.named_expressions.push_back(c_named_expr)
346
+
347
+ c_bound_exprs.schema = (<Schema> schema).sp_schema
348
+
349
+ c_conversion_options.allow_arrow_extensions = allow_arrow_extensions
350
+
351
+ with nogil:
352
+ c_res_buffer = SerializeExpressions(c_bound_exprs, c_conversion_options)
353
+ c_buffer = GetResultValue(c_res_buffer)
354
+ return memoryview(pyarrow_wrap_buffer(c_buffer))
355
+
356
+
357
+ cdef class BoundExpressions(_Weakrefable):
358
+ """
359
+ A collection of named expressions and the schema they are bound to
360
+
361
+ This is equivalent to the Substrait ExtendedExpression message
362
+ """
363
+
364
+ cdef:
365
+ CBoundExpressions c_bound_exprs
366
+
367
+ def __init__(self):
368
+ msg = 'BoundExpressions is an abstract class thus cannot be initialized.'
369
+ raise TypeError(msg)
370
+
371
+ cdef void init(self, CBoundExpressions bound_expressions):
372
+ self.c_bound_exprs = bound_expressions
373
+
374
+ @property
375
+ def schema(self):
376
+ """
377
+ The common schema that all expressions are bound to
378
+ """
379
+ return pyarrow_wrap_schema(self.c_bound_exprs.schema)
380
+
381
+ @property
382
+ def expressions(self):
383
+ """
384
+ A dict from expression name to expression
385
+ """
386
+ expr_dict = {}
387
+ for named_expr in self.c_bound_exprs.named_expressions:
388
+ name = frombytes(named_expr.name)
389
+ expr = Expression.wrap(named_expr.expression)
390
+ expr_dict[name] = expr
391
+ return expr_dict
392
+
393
+ @staticmethod
394
+ cdef wrap(const CBoundExpressions& bound_expressions):
395
+ cdef BoundExpressions self = BoundExpressions.__new__(BoundExpressions)
396
+ self.init(bound_expressions)
397
+ return self
398
+
399
+ @classmethod
400
+ def from_substrait(cls, message):
401
+ """
402
+ Convert a Substrait message into a BoundExpressions object
403
+
404
+ Parameters
405
+ ----------
406
+ message : Buffer or bytes or protobuf Message
407
+ The message to convert to a BoundExpressions object
408
+
409
+ Returns
410
+ -------
411
+ BoundExpressions
412
+ The converted expressions, their names, and the bound schema
413
+ """
414
+ if isinstance(message, (bytes, memoryview)):
415
+ return deserialize_expressions(message)
416
+ elif isinstance(message, Buffer):
417
+ return deserialize_expressions(message)
418
+ else:
419
+ try:
420
+ return deserialize_expressions(message.SerializeToString())
421
+ except AttributeError:
422
+ raise TypeError(
423
+ f"Expected 'pyarrow.Buffer' or bytes or protobuf Message, got '{type(message)}'")
424
+
425
+
426
+ def deserialize_expressions(buf):
427
+ """
428
+ Deserialize an ExtendedExpression Substrait message into a BoundExpressions object
429
+
430
+ Parameters
431
+ ----------
432
+ buf : Buffer or bytes
433
+ The message to deserialize
434
+
435
+ Returns
436
+ -------
437
+ BoundExpressions
438
+ The deserialized expressions, their names, and the bound schema
439
+ """
440
+ cdef:
441
+ shared_ptr[CBuffer] c_buffer
442
+ CResult[CBoundExpressions] c_res_bound_exprs
443
+ CBoundExpressions c_bound_exprs
444
+
445
+ if isinstance(buf, (bytes, memoryview)):
446
+ c_buffer = pyarrow_unwrap_buffer(py_buffer(buf))
447
+ elif isinstance(buf, Buffer):
448
+ c_buffer = pyarrow_unwrap_buffer(buf)
449
+ else:
450
+ raise TypeError(
451
+ f"Expected 'pyarrow.Buffer' or bytes, got '{type(buf)}'")
452
+
453
+ with nogil:
454
+ c_res_bound_exprs = DeserializeExpressions(deref(c_buffer))
455
+ c_bound_exprs = GetResultValue(c_res_bound_exprs)
456
+
457
+ return BoundExpressions.wrap(c_bound_exprs)
458
+
459
+
460
+ def get_supported_functions():
461
+ """
462
+ Get a list of Substrait functions that the underlying
463
+ engine currently supports.
464
+
465
+ Returns
466
+ -------
467
+ list[str]
468
+ A list of function ids encoded as '{uri}#{name}'
469
+ """
470
+
471
+ cdef:
472
+ ExtensionIdRegistry* c_id_registry
473
+ std_vector[c_string] c_ids
474
+
475
+ c_id_registry = default_extension_id_registry()
476
+ c_ids = c_id_registry.GetSupportedSubstraitFunctions()
477
+
478
+ functions_list = []
479
+ for c_id in c_ids:
480
+ functions_list.append(frombytes(c_id))
481
+ return functions_list
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cffi.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from __future__ import absolute_import
19
+
20
+ import cffi
21
+
22
+ c_source = """
23
+ struct ArrowSchema {
24
+ // Array type description
25
+ const char* format;
26
+ const char* name;
27
+ const char* metadata;
28
+ int64_t flags;
29
+ int64_t n_children;
30
+ struct ArrowSchema** children;
31
+ struct ArrowSchema* dictionary;
32
+
33
+ // Release callback
34
+ void (*release)(struct ArrowSchema*);
35
+ // Opaque producer-specific data
36
+ void* private_data;
37
+ };
38
+
39
+ struct ArrowArray {
40
+ // Array data description
41
+ int64_t length;
42
+ int64_t null_count;
43
+ int64_t offset;
44
+ int64_t n_buffers;
45
+ int64_t n_children;
46
+ const void** buffers;
47
+ struct ArrowArray** children;
48
+ struct ArrowArray* dictionary;
49
+
50
+ // Release callback
51
+ void (*release)(struct ArrowArray*);
52
+ // Opaque producer-specific data
53
+ void* private_data;
54
+ };
55
+
56
+ struct ArrowArrayStream {
57
+ int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
58
+ int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
59
+
60
+ const char* (*get_last_error)(struct ArrowArrayStream*);
61
+
62
+ // Release callback
63
+ void (*release)(struct ArrowArrayStream*);
64
+ // Opaque producer-specific data
65
+ void* private_data;
66
+ };
67
+
68
+ typedef int32_t ArrowDeviceType;
69
+
70
+ struct ArrowDeviceArray {
71
+ struct ArrowArray array;
72
+ int64_t device_id;
73
+ ArrowDeviceType device_type;
74
+ void* sync_event;
75
+ int64_t reserved[3];
76
+ };
77
+ """
78
+
79
+ # TODO use out-of-line mode for faster import and avoid C parsing
80
+ ffi = cffi.FFI()
81
+ ffi.cdef(c_source)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/compute.py ADDED
@@ -0,0 +1,744 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from pyarrow._compute import ( # noqa
19
+ Function,
20
+ FunctionOptions,
21
+ FunctionRegistry,
22
+ HashAggregateFunction,
23
+ HashAggregateKernel,
24
+ Kernel,
25
+ ScalarAggregateFunction,
26
+ ScalarAggregateKernel,
27
+ ScalarFunction,
28
+ ScalarKernel,
29
+ VectorFunction,
30
+ VectorKernel,
31
+ # Option classes
32
+ ArraySortOptions,
33
+ AssumeTimezoneOptions,
34
+ CastOptions,
35
+ CountOptions,
36
+ CumulativeOptions,
37
+ CumulativeSumOptions,
38
+ DayOfWeekOptions,
39
+ DictionaryEncodeOptions,
40
+ RunEndEncodeOptions,
41
+ ElementWiseAggregateOptions,
42
+ ExtractRegexOptions,
43
+ FilterOptions,
44
+ IndexOptions,
45
+ JoinOptions,
46
+ ListSliceOptions,
47
+ ListFlattenOptions,
48
+ MakeStructOptions,
49
+ MapLookupOptions,
50
+ MatchSubstringOptions,
51
+ ModeOptions,
52
+ NullOptions,
53
+ PadOptions,
54
+ PairwiseOptions,
55
+ PartitionNthOptions,
56
+ QuantileOptions,
57
+ RandomOptions,
58
+ RankOptions,
59
+ ReplaceSliceOptions,
60
+ ReplaceSubstringOptions,
61
+ RoundBinaryOptions,
62
+ RoundOptions,
63
+ RoundTemporalOptions,
64
+ RoundToMultipleOptions,
65
+ ScalarAggregateOptions,
66
+ SelectKOptions,
67
+ SetLookupOptions,
68
+ SliceOptions,
69
+ SortOptions,
70
+ SplitOptions,
71
+ SplitPatternOptions,
72
+ StrftimeOptions,
73
+ StrptimeOptions,
74
+ StructFieldOptions,
75
+ TakeOptions,
76
+ TDigestOptions,
77
+ TrimOptions,
78
+ Utf8NormalizeOptions,
79
+ VarianceOptions,
80
+ WeekOptions,
81
+ # Functions
82
+ call_function,
83
+ function_registry,
84
+ get_function,
85
+ list_functions,
86
+ # Udf
87
+ call_tabular_function,
88
+ register_scalar_function,
89
+ register_tabular_function,
90
+ register_aggregate_function,
91
+ register_vector_function,
92
+ UdfContext,
93
+ # Expressions
94
+ Expression,
95
+ )
96
+
97
+ from collections import namedtuple
98
+ import inspect
99
+ from textwrap import dedent
100
+ import warnings
101
+
102
+ import pyarrow as pa
103
+ from pyarrow import _compute_docstrings
104
+ from pyarrow.vendored import docscrape
105
+
106
+
107
+ def _get_arg_names(func):
108
+ return func._doc.arg_names
109
+
110
+
111
+ _OptionsClassDoc = namedtuple('_OptionsClassDoc', ('params',))
112
+
113
+
114
+ def _scrape_options_class_doc(options_class):
115
+ if not options_class.__doc__:
116
+ return None
117
+ doc = docscrape.NumpyDocString(options_class.__doc__)
118
+ return _OptionsClassDoc(doc['Parameters'])
119
+
120
+
121
+ def _decorate_compute_function(wrapper, exposed_name, func, options_class):
122
+ # Decorate the given compute function wrapper with useful metadata
123
+ # and documentation.
124
+ cpp_doc = func._doc
125
+
126
+ wrapper.__arrow_compute_function__ = dict(
127
+ name=func.name,
128
+ arity=func.arity,
129
+ options_class=cpp_doc.options_class,
130
+ options_required=cpp_doc.options_required)
131
+ wrapper.__name__ = exposed_name
132
+ wrapper.__qualname__ = exposed_name
133
+
134
+ doc_pieces = []
135
+
136
+ # 1. One-line summary
137
+ summary = cpp_doc.summary
138
+ if not summary:
139
+ arg_str = "arguments" if func.arity > 1 else "argument"
140
+ summary = ("Call compute function {!r} with the given {}"
141
+ .format(func.name, arg_str))
142
+
143
+ doc_pieces.append(f"{summary}.\n\n")
144
+
145
+ # 2. Multi-line description
146
+ description = cpp_doc.description
147
+ if description:
148
+ doc_pieces.append(f"{description}\n\n")
149
+
150
+ doc_addition = _compute_docstrings.function_doc_additions.get(func.name)
151
+
152
+ # 3. Parameter description
153
+ doc_pieces.append(dedent("""\
154
+ Parameters
155
+ ----------
156
+ """))
157
+
158
+ # 3a. Compute function parameters
159
+ arg_names = _get_arg_names(func)
160
+ for arg_name in arg_names:
161
+ if func.kind in ('vector', 'scalar_aggregate'):
162
+ arg_type = 'Array-like'
163
+ else:
164
+ arg_type = 'Array-like or scalar-like'
165
+ doc_pieces.append(f"{arg_name} : {arg_type}\n")
166
+ doc_pieces.append(" Argument to compute function.\n")
167
+
168
+ # 3b. Compute function option values
169
+ if options_class is not None:
170
+ options_class_doc = _scrape_options_class_doc(options_class)
171
+ if options_class_doc:
172
+ for p in options_class_doc.params:
173
+ doc_pieces.append(f"{p.name} : {p.type}\n")
174
+ for s in p.desc:
175
+ doc_pieces.append(f" {s}\n")
176
+ else:
177
+ warnings.warn(f"Options class {options_class.__name__} "
178
+ f"does not have a docstring", RuntimeWarning)
179
+ options_sig = inspect.signature(options_class)
180
+ for p in options_sig.parameters.values():
181
+ doc_pieces.append(dedent("""\
182
+ {0} : optional
183
+ Parameter for {1} constructor. Either `options`
184
+ or `{0}` can be passed, but not both at the same time.
185
+ """.format(p.name, options_class.__name__)))
186
+ doc_pieces.append(dedent(f"""\
187
+ options : pyarrow.compute.{options_class.__name__}, optional
188
+ Alternative way of passing options.
189
+ """))
190
+
191
+ doc_pieces.append(dedent("""\
192
+ memory_pool : pyarrow.MemoryPool, optional
193
+ If not passed, will allocate memory from the default memory pool.
194
+ """))
195
+
196
+ # 4. Custom addition (e.g. examples)
197
+ if doc_addition is not None:
198
+ doc_pieces.append("\n{}\n".format(dedent(doc_addition).strip("\n")))
199
+
200
+ wrapper.__doc__ = "".join(doc_pieces)
201
+ return wrapper
202
+
203
+
204
+ def _get_options_class(func):
205
+ class_name = func._doc.options_class
206
+ if not class_name:
207
+ return None
208
+ try:
209
+ return globals()[class_name]
210
+ except KeyError:
211
+ warnings.warn("Python binding for {} not exposed"
212
+ .format(class_name), RuntimeWarning)
213
+ return None
214
+
215
+
216
+ def _handle_options(name, options_class, options, args, kwargs):
217
+ if args or kwargs:
218
+ if options is not None:
219
+ raise TypeError(
220
+ "Function {!r} called with both an 'options' argument "
221
+ "and additional arguments"
222
+ .format(name))
223
+ return options_class(*args, **kwargs)
224
+
225
+ if options is not None:
226
+ if isinstance(options, dict):
227
+ return options_class(**options)
228
+ elif isinstance(options, options_class):
229
+ return options
230
+ raise TypeError(
231
+ "Function {!r} expected a {} parameter, got {}"
232
+ .format(name, options_class, type(options)))
233
+
234
+ return None
235
+
236
+
237
+ def _make_generic_wrapper(func_name, func, options_class, arity):
238
+ if options_class is None:
239
+ def wrapper(*args, memory_pool=None):
240
+ if arity is not Ellipsis and len(args) != arity:
241
+ raise TypeError(
242
+ f"{func_name} takes {arity} positional argument(s), "
243
+ f"but {len(args)} were given"
244
+ )
245
+ if args and isinstance(args[0], Expression):
246
+ return Expression._call(func_name, list(args))
247
+ return func.call(args, None, memory_pool)
248
+ else:
249
+ def wrapper(*args, memory_pool=None, options=None, **kwargs):
250
+ if arity is not Ellipsis:
251
+ if len(args) < arity:
252
+ raise TypeError(
253
+ f"{func_name} takes {arity} positional argument(s), "
254
+ f"but {len(args)} were given"
255
+ )
256
+ option_args = args[arity:]
257
+ args = args[:arity]
258
+ else:
259
+ option_args = ()
260
+ options = _handle_options(func_name, options_class, options,
261
+ option_args, kwargs)
262
+ if args and isinstance(args[0], Expression):
263
+ return Expression._call(func_name, list(args), options)
264
+ return func.call(args, options, memory_pool)
265
+ return wrapper
266
+
267
+
268
+ def _make_signature(arg_names, var_arg_names, options_class):
269
+ from inspect import Parameter
270
+ params = []
271
+ for name in arg_names:
272
+ params.append(Parameter(name, Parameter.POSITIONAL_ONLY))
273
+ for name in var_arg_names:
274
+ params.append(Parameter(name, Parameter.VAR_POSITIONAL))
275
+ if options_class is not None:
276
+ options_sig = inspect.signature(options_class)
277
+ for p in options_sig.parameters.values():
278
+ assert p.kind in (Parameter.POSITIONAL_OR_KEYWORD,
279
+ Parameter.KEYWORD_ONLY)
280
+ if var_arg_names:
281
+ # Cannot have a positional argument after a *args
282
+ p = p.replace(kind=Parameter.KEYWORD_ONLY)
283
+ params.append(p)
284
+ params.append(Parameter("options", Parameter.KEYWORD_ONLY,
285
+ default=None))
286
+ params.append(Parameter("memory_pool", Parameter.KEYWORD_ONLY,
287
+ default=None))
288
+ return inspect.Signature(params)
289
+
290
+
291
+ def _wrap_function(name, func):
292
+ options_class = _get_options_class(func)
293
+ arg_names = _get_arg_names(func)
294
+ has_vararg = arg_names and arg_names[-1].startswith('*')
295
+ if has_vararg:
296
+ var_arg_names = [arg_names.pop().lstrip('*')]
297
+ else:
298
+ var_arg_names = []
299
+
300
+ wrapper = _make_generic_wrapper(
301
+ name, func, options_class, arity=func.arity)
302
+ wrapper.__signature__ = _make_signature(arg_names, var_arg_names,
303
+ options_class)
304
+ return _decorate_compute_function(wrapper, name, func, options_class)
305
+
306
+
307
+ def _make_global_functions():
308
+ """
309
+ Make global functions wrapping each compute function.
310
+
311
+ Note that some of the automatically-generated wrappers may be overridden
312
+ by custom versions below.
313
+ """
314
+ g = globals()
315
+ reg = function_registry()
316
+
317
+ # Avoid clashes with Python keywords
318
+ rewrites = {'and': 'and_',
319
+ 'or': 'or_'}
320
+
321
+ for cpp_name in reg.list_functions():
322
+ name = rewrites.get(cpp_name, cpp_name)
323
+ func = reg.get_function(cpp_name)
324
+ if func.kind == "hash_aggregate":
325
+ # Hash aggregate functions are not callable,
326
+ # so let's not expose them at module level.
327
+ continue
328
+ if func.kind == "scalar_aggregate" and func.arity == 0:
329
+ # Nullary scalar aggregate functions are not callable
330
+ # directly so let's not expose them at module level.
331
+ continue
332
+ assert name not in g, name
333
+ g[cpp_name] = g[name] = _wrap_function(name, func)
334
+
335
+
336
+ _make_global_functions()
337
+
338
+
339
+ def cast(arr, target_type=None, safe=None, options=None, memory_pool=None):
340
+ """
341
+ Cast array values to another data type. Can also be invoked as an array
342
+ instance method.
343
+
344
+ Parameters
345
+ ----------
346
+ arr : Array-like
347
+ target_type : DataType or str
348
+ Type to cast to
349
+ safe : bool, default True
350
+ Check for overflows or other unsafe conversions
351
+ options : CastOptions, default None
352
+ Additional checks pass by CastOptions
353
+ memory_pool : MemoryPool, optional
354
+ memory pool to use for allocations during function execution.
355
+
356
+ Examples
357
+ --------
358
+ >>> from datetime import datetime
359
+ >>> import pyarrow as pa
360
+ >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
361
+ >>> arr.type
362
+ TimestampType(timestamp[us])
363
+
364
+ You can use ``pyarrow.DataType`` objects to specify the target type:
365
+
366
+ >>> cast(arr, pa.timestamp('ms'))
367
+ <pyarrow.lib.TimestampArray object at ...>
368
+ [
369
+ 2010-01-01 00:00:00.000,
370
+ 2015-01-01 00:00:00.000
371
+ ]
372
+
373
+ >>> cast(arr, pa.timestamp('ms')).type
374
+ TimestampType(timestamp[ms])
375
+
376
+ Alternatively, it is also supported to use the string aliases for these
377
+ types:
378
+
379
+ >>> arr.cast('timestamp[ms]')
380
+ <pyarrow.lib.TimestampArray object at ...>
381
+ [
382
+ 2010-01-01 00:00:00.000,
383
+ 2015-01-01 00:00:00.000
384
+ ]
385
+ >>> arr.cast('timestamp[ms]').type
386
+ TimestampType(timestamp[ms])
387
+
388
+ Returns
389
+ -------
390
+ casted : Array
391
+ The cast result as a new Array
392
+ """
393
+ safe_vars_passed = (safe is not None) or (target_type is not None)
394
+
395
+ if safe_vars_passed and (options is not None):
396
+ raise ValueError("Must either pass values for 'target_type' and 'safe'"
397
+ " or pass a value for 'options'")
398
+
399
+ if options is None:
400
+ target_type = pa.types.lib.ensure_type(target_type)
401
+ if safe is False:
402
+ options = CastOptions.unsafe(target_type)
403
+ else:
404
+ options = CastOptions.safe(target_type)
405
+ return call_function("cast", [arr], options, memory_pool)
406
+
407
+
408
+ def index(data, value, start=None, end=None, *, memory_pool=None):
409
+ """
410
+ Find the index of the first occurrence of a given value.
411
+
412
+ Parameters
413
+ ----------
414
+ data : Array-like
415
+ value : Scalar-like object
416
+ The value to search for.
417
+ start : int, optional
418
+ end : int, optional
419
+ memory_pool : MemoryPool, optional
420
+ If not passed, will allocate memory from the default memory pool.
421
+
422
+ Returns
423
+ -------
424
+ index : int
425
+ the index, or -1 if not found
426
+
427
+ Examples
428
+ --------
429
+ >>> import pyarrow as pa
430
+ >>> import pyarrow.compute as pc
431
+ >>> arr = pa.array(["Lorem", "ipsum", "dolor", "sit", "Lorem", "ipsum"])
432
+ >>> pc.index(arr, "ipsum")
433
+ <pyarrow.Int64Scalar: 1>
434
+ >>> pc.index(arr, "ipsum", start=2)
435
+ <pyarrow.Int64Scalar: 5>
436
+ >>> pc.index(arr, "amet")
437
+ <pyarrow.Int64Scalar: -1>
438
+ """
439
+ if start is not None:
440
+ if end is not None:
441
+ data = data.slice(start, end - start)
442
+ else:
443
+ data = data.slice(start)
444
+ elif end is not None:
445
+ data = data.slice(0, end)
446
+
447
+ if not isinstance(value, pa.Scalar):
448
+ value = pa.scalar(value, type=data.type)
449
+ elif data.type != value.type:
450
+ value = pa.scalar(value.as_py(), type=data.type)
451
+ options = IndexOptions(value=value)
452
+ result = call_function('index', [data], options, memory_pool)
453
+ if start is not None and result.as_py() >= 0:
454
+ result = pa.scalar(result.as_py() + start, type=pa.int64())
455
+ return result
456
+
457
+
458
+ def take(data, indices, *, boundscheck=True, memory_pool=None):
459
+ """
460
+ Select values (or records) from array- or table-like data given integer
461
+ selection indices.
462
+
463
+ The result will be of the same type(s) as the input, with elements taken
464
+ from the input array (or record batch / table fields) at the given
465
+ indices. If an index is null then the corresponding value in the output
466
+ will be null.
467
+
468
+ Parameters
469
+ ----------
470
+ data : Array, ChunkedArray, RecordBatch, or Table
471
+ indices : Array, ChunkedArray
472
+ Must be of integer type
473
+ boundscheck : boolean, default True
474
+ Whether to boundscheck the indices. If False and there is an out of
475
+ bounds index, will likely cause the process to crash.
476
+ memory_pool : MemoryPool, optional
477
+ If not passed, will allocate memory from the default memory pool.
478
+
479
+ Returns
480
+ -------
481
+ result : depends on inputs
482
+ Selected values for the given indices
483
+
484
+ Examples
485
+ --------
486
+ >>> import pyarrow as pa
487
+ >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
488
+ >>> indices = pa.array([0, None, 4, 3])
489
+ >>> arr.take(indices)
490
+ <pyarrow.lib.StringArray object at ...>
491
+ [
492
+ "a",
493
+ null,
494
+ "e",
495
+ null
496
+ ]
497
+ """
498
+ options = TakeOptions(boundscheck=boundscheck)
499
+ return call_function('take', [data, indices], options, memory_pool)
500
+
501
+
502
+ def fill_null(values, fill_value):
503
+ """Replace each null element in values with a corresponding
504
+ element from fill_value.
505
+
506
+ If fill_value is scalar-like, then every null element in values
507
+ will be replaced with fill_value. If fill_value is array-like,
508
+ then the i-th element in values will be replaced with the i-th
509
+ element in fill_value.
510
+
511
+ The fill_value's type must be the same as that of values, or it
512
+ must be able to be implicitly casted to the array's type.
513
+
514
+ This is an alias for :func:`coalesce`.
515
+
516
+ Parameters
517
+ ----------
518
+ values : Array, ChunkedArray, or Scalar-like object
519
+ Each null element is replaced with the corresponding value
520
+ from fill_value.
521
+ fill_value : Array, ChunkedArray, or Scalar-like object
522
+ If not same type as values, will attempt to cast.
523
+
524
+ Returns
525
+ -------
526
+ result : depends on inputs
527
+ Values with all null elements replaced
528
+
529
+ Examples
530
+ --------
531
+ >>> import pyarrow as pa
532
+ >>> arr = pa.array([1, 2, None, 3], type=pa.int8())
533
+ >>> fill_value = pa.scalar(5, type=pa.int8())
534
+ >>> arr.fill_null(fill_value)
535
+ <pyarrow.lib.Int8Array object at ...>
536
+ [
537
+ 1,
538
+ 2,
539
+ 5,
540
+ 3
541
+ ]
542
+ >>> arr = pa.array([1, 2, None, 4, None])
543
+ >>> arr.fill_null(pa.array([10, 20, 30, 40, 50]))
544
+ <pyarrow.lib.Int64Array object at ...>
545
+ [
546
+ 1,
547
+ 2,
548
+ 30,
549
+ 4,
550
+ 50
551
+ ]
552
+ """
553
+ if not isinstance(fill_value, (pa.Array, pa.ChunkedArray, pa.Scalar)):
554
+ fill_value = pa.scalar(fill_value, type=values.type)
555
+ elif values.type != fill_value.type:
556
+ fill_value = pa.scalar(fill_value.as_py(), type=values.type)
557
+
558
+ return call_function("coalesce", [values, fill_value])
559
+
560
+
561
+ def top_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
562
+ """
563
+ Select the indices of the top-k ordered elements from array- or table-like
564
+ data.
565
+
566
+ This is a specialization for :func:`select_k_unstable`. Output is not
567
+ guaranteed to be stable.
568
+
569
+ Parameters
570
+ ----------
571
+ values : Array, ChunkedArray, RecordBatch, or Table
572
+ Data to sort and get top indices from.
573
+ k : int
574
+ The number of `k` elements to keep.
575
+ sort_keys : List-like
576
+ Column key names to order by when input is table-like data.
577
+ memory_pool : MemoryPool, optional
578
+ If not passed, will allocate memory from the default memory pool.
579
+
580
+ Returns
581
+ -------
582
+ result : Array
583
+ Indices of the top-k ordered elements
584
+
585
+ Examples
586
+ --------
587
+ >>> import pyarrow as pa
588
+ >>> import pyarrow.compute as pc
589
+ >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
590
+ >>> pc.top_k_unstable(arr, k=3)
591
+ <pyarrow.lib.UInt64Array object at ...>
592
+ [
593
+ 5,
594
+ 4,
595
+ 2
596
+ ]
597
+ """
598
+ if sort_keys is None:
599
+ sort_keys = []
600
+ if isinstance(values, (pa.Array, pa.ChunkedArray)):
601
+ sort_keys.append(("dummy", "descending"))
602
+ else:
603
+ sort_keys = map(lambda key_name: (key_name, "descending"), sort_keys)
604
+ options = SelectKOptions(k, sort_keys)
605
+ return call_function("select_k_unstable", [values], options, memory_pool)
606
+
607
+
608
+ def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
609
+ """
610
+ Select the indices of the bottom-k ordered elements from
611
+ array- or table-like data.
612
+
613
+ This is a specialization for :func:`select_k_unstable`. Output is not
614
+ guaranteed to be stable.
615
+
616
+ Parameters
617
+ ----------
618
+ values : Array, ChunkedArray, RecordBatch, or Table
619
+ Data to sort and get bottom indices from.
620
+ k : int
621
+ The number of `k` elements to keep.
622
+ sort_keys : List-like
623
+ Column key names to order by when input is table-like data.
624
+ memory_pool : MemoryPool, optional
625
+ If not passed, will allocate memory from the default memory pool.
626
+
627
+ Returns
628
+ -------
629
+ result : Array of indices
630
+ Indices of the bottom-k ordered elements
631
+
632
+ Examples
633
+ --------
634
+ >>> import pyarrow as pa
635
+ >>> import pyarrow.compute as pc
636
+ >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
637
+ >>> pc.bottom_k_unstable(arr, k=3)
638
+ <pyarrow.lib.UInt64Array object at ...>
639
+ [
640
+ 0,
641
+ 1,
642
+ 2
643
+ ]
644
+ """
645
+ if sort_keys is None:
646
+ sort_keys = []
647
+ if isinstance(values, (pa.Array, pa.ChunkedArray)):
648
+ sort_keys.append(("dummy", "ascending"))
649
+ else:
650
+ sort_keys = map(lambda key_name: (key_name, "ascending"), sort_keys)
651
+ options = SelectKOptions(k, sort_keys)
652
+ return call_function("select_k_unstable", [values], options, memory_pool)
653
+
654
+
655
+ def random(n, *, initializer='system', options=None, memory_pool=None):
656
+ """
657
+ Generate numbers in the range [0, 1).
658
+
659
+ Generated values are uniformly-distributed, double-precision
660
+ in range [0, 1). Algorithm and seed can be changed via RandomOptions.
661
+
662
+ Parameters
663
+ ----------
664
+ n : int
665
+ Number of values to generate, must be greater than or equal to 0
666
+ initializer : int or str
667
+ How to initialize the underlying random generator.
668
+ If an integer is given, it is used as a seed.
669
+ If "system" is given, the random generator is initialized with
670
+ a system-specific source of (hopefully true) randomness.
671
+ Other values are invalid.
672
+ options : pyarrow.compute.RandomOptions, optional
673
+ Alternative way of passing options.
674
+ memory_pool : pyarrow.MemoryPool, optional
675
+ If not passed, will allocate memory from the default memory pool.
676
+ """
677
+ options = RandomOptions(initializer=initializer)
678
+ return call_function("random", [], options, memory_pool, length=n)
679
+
680
+
681
+ def field(*name_or_index):
682
+ """Reference a column of the dataset.
683
+
684
+ Stores only the field's name. Type and other information is known only when
685
+ the expression is bound to a dataset having an explicit scheme.
686
+
687
+ Nested references are allowed by passing multiple names or a tuple of
688
+ names. For example ``('foo', 'bar')`` references the field named "bar"
689
+ inside the field named "foo".
690
+
691
+ Parameters
692
+ ----------
693
+ *name_or_index : string, multiple strings, tuple or int
694
+ The name or index of the (possibly nested) field the expression
695
+ references to.
696
+
697
+ Returns
698
+ -------
699
+ field_expr : Expression
700
+ Reference to the given field
701
+
702
+ Examples
703
+ --------
704
+ >>> import pyarrow.compute as pc
705
+ >>> pc.field("a")
706
+ <pyarrow.compute.Expression a>
707
+ >>> pc.field(1)
708
+ <pyarrow.compute.Expression FieldPath(1)>
709
+ >>> pc.field(("a", "b"))
710
+ <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
711
+ >>> pc.field("a", "b")
712
+ <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
713
+ """
714
+ n = len(name_or_index)
715
+ if n == 1:
716
+ if isinstance(name_or_index[0], (str, int)):
717
+ return Expression._field(name_or_index[0])
718
+ elif isinstance(name_or_index[0], tuple):
719
+ return Expression._nested_field(name_or_index[0])
720
+ else:
721
+ raise TypeError(
722
+ "field reference should be str, multiple str, tuple or "
723
+ f"integer, got {type(name_or_index[0])}"
724
+ )
725
+ # In case of multiple strings not supplied in a tuple
726
+ else:
727
+ return Expression._nested_field(name_or_index)
728
+
729
+
730
+ def scalar(value):
731
+ """Expression representing a scalar value.
732
+
733
+ Parameters
734
+ ----------
735
+ value : bool, int, float or string
736
+ Python value of the scalar. Note that only a subset of types are
737
+ currently supported.
738
+
739
+ Returns
740
+ -------
741
+ scalar_expr : Expression
742
+ An Expression representing the scalar value
743
+ """
744
+ return Expression._scalar(value)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/config.pxi ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from pyarrow.includes.libarrow cimport GetBuildInfo
19
+
20
+ from collections import namedtuple
21
+ import os
22
+
23
+
24
+ VersionInfo = namedtuple('VersionInfo', ('major', 'minor', 'patch'))
25
+
26
+ BuildInfo = namedtuple(
27
+ 'BuildInfo',
28
+ ('version', 'version_info', 'so_version', 'full_so_version',
29
+ 'compiler_id', 'compiler_version', 'compiler_flags',
30
+ 'git_id', 'git_description', 'package_kind', 'build_type'))
31
+
32
+ RuntimeInfo = namedtuple('RuntimeInfo',
33
+ ('simd_level', 'detected_simd_level'))
34
+
35
+ cdef _build_info():
36
+ cdef:
37
+ const CBuildInfo* c_info
38
+
39
+ c_info = &GetBuildInfo()
40
+
41
+ return BuildInfo(version=frombytes(c_info.version_string),
42
+ version_info=VersionInfo(c_info.version_major,
43
+ c_info.version_minor,
44
+ c_info.version_patch),
45
+ so_version=frombytes(c_info.so_version),
46
+ full_so_version=frombytes(c_info.full_so_version),
47
+ compiler_id=frombytes(c_info.compiler_id),
48
+ compiler_version=frombytes(c_info.compiler_version),
49
+ compiler_flags=frombytes(c_info.compiler_flags),
50
+ git_id=frombytes(c_info.git_id),
51
+ git_description=frombytes(c_info.git_description),
52
+ package_kind=frombytes(c_info.package_kind),
53
+ build_type=frombytes(c_info.build_type).lower(),
54
+ )
55
+
56
+
57
+ cpp_build_info = _build_info()
58
+ cpp_version = cpp_build_info.version
59
+ cpp_version_info = cpp_build_info.version_info
60
+
61
+
62
+ def runtime_info():
63
+ """
64
+ Get runtime information.
65
+
66
+ Returns
67
+ -------
68
+ info : pyarrow.RuntimeInfo
69
+ """
70
+ cdef:
71
+ CRuntimeInfo c_info
72
+
73
+ c_info = GetRuntimeInfo()
74
+
75
+ return RuntimeInfo(
76
+ simd_level=frombytes(c_info.simd_level),
77
+ detected_simd_level=frombytes(c_info.detected_simd_level))
78
+
79
+
80
+ def set_timezone_db_path(path):
81
+ """
82
+ Configure the path to text timezone database on Windows.
83
+
84
+ Parameters
85
+ ----------
86
+ path : str
87
+ Path to text timezone database.
88
+ """
89
+ cdef:
90
+ CGlobalOptions options
91
+
92
+ if path is not None:
93
+ options.timezone_db_path = <c_string>tobytes(path)
94
+
95
+ check_status(Initialize(options))
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cuda.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # flake8: noqa
19
+
20
+
21
+ from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,
22
+ HostBuffer, BufferReader, BufferWriter,
23
+ new_host_buffer,
24
+ serialize_record_batch, read_message,
25
+ read_record_batch)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/error.pxi ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from cpython.exc cimport PyErr_CheckSignals, PyErr_SetInterrupt
19
+
20
+ from pyarrow.includes.libarrow cimport CStatus
21
+ from pyarrow.includes.libarrow_python cimport IsPyError, RestorePyError
22
+ from pyarrow.includes.common cimport c_string
23
+
24
+ from contextlib import contextmanager
25
+ import os
26
+ import signal
27
+ import threading
28
+
29
+ from pyarrow.lib import is_threading_enabled
30
+ from pyarrow.util import _break_traceback_cycle_from_frame
31
+
32
+
33
+ class ArrowException(Exception):
34
+ pass
35
+
36
+
37
+ class ArrowInvalid(ValueError, ArrowException):
38
+ pass
39
+
40
+
41
+ class ArrowMemoryError(MemoryError, ArrowException):
42
+ pass
43
+
44
+
45
+ class ArrowKeyError(KeyError, ArrowException):
46
+ def __str__(self):
47
+ # Override KeyError.__str__, as it uses the repr() of the key
48
+ return ArrowException.__str__(self)
49
+
50
+
51
+ class ArrowTypeError(TypeError, ArrowException):
52
+ pass
53
+
54
+
55
+ class ArrowNotImplementedError(NotImplementedError, ArrowException):
56
+ pass
57
+
58
+
59
+ class ArrowCapacityError(ArrowException):
60
+ pass
61
+
62
+
63
+ class ArrowIndexError(IndexError, ArrowException):
64
+ pass
65
+
66
+
67
+ class ArrowSerializationError(ArrowException):
68
+ pass
69
+
70
+
71
+ class ArrowCancelled(ArrowException):
72
+ def __init__(self, message, signum=None):
73
+ super().__init__(message)
74
+ self.signum = signum
75
+
76
+
77
+ # Compatibility alias
78
+ ArrowIOError = IOError
79
+
80
+
81
+ # check_status() and convert_status() could be written directly in C++
82
+ # if we didn't define Arrow-specific subclasses (ArrowInvalid etc.)
83
+ cdef int check_status(const CStatus& status) except -1 nogil:
84
+ if status.ok():
85
+ return 0
86
+
87
+ with gil:
88
+ if IsPyError(status):
89
+ RestorePyError(status)
90
+ return -1
91
+
92
+ raise convert_status(status)
93
+
94
+
95
+ cdef object convert_status(const CStatus& status):
96
+ if IsPyError(status):
97
+ try:
98
+ RestorePyError(status)
99
+ except BaseException as e:
100
+ return e
101
+
102
+ # We don't use Status::ToString() as it would redundantly include
103
+ # the C++ class name.
104
+ message = frombytes(status.message(), safe=True)
105
+ detail = status.detail()
106
+ if detail != nullptr:
107
+ message += ". Detail: " + frombytes(detail.get().ToString(),
108
+ safe=True)
109
+
110
+ if status.IsInvalid():
111
+ return ArrowInvalid(message)
112
+ elif status.IsIOError():
113
+ # Note: OSError constructor is
114
+ # OSError(message)
115
+ # or
116
+ # OSError(errno, message, filename=None)
117
+ # or (on Windows)
118
+ # OSError(errno, message, filename, winerror)
119
+ errno = ErrnoFromStatus(status)
120
+ winerror = WinErrorFromStatus(status)
121
+ if winerror != 0:
122
+ return IOError(errno, message, None, winerror)
123
+ elif errno != 0:
124
+ return IOError(errno, message)
125
+ else:
126
+ return IOError(message)
127
+ elif status.IsOutOfMemory():
128
+ return ArrowMemoryError(message)
129
+ elif status.IsKeyError():
130
+ return ArrowKeyError(message)
131
+ elif status.IsNotImplemented():
132
+ return ArrowNotImplementedError(message)
133
+ elif status.IsTypeError():
134
+ return ArrowTypeError(message)
135
+ elif status.IsCapacityError():
136
+ return ArrowCapacityError(message)
137
+ elif status.IsIndexError():
138
+ return ArrowIndexError(message)
139
+ elif status.IsSerializationError():
140
+ return ArrowSerializationError(message)
141
+ elif status.IsCancelled():
142
+ signum = SignalFromStatus(status)
143
+ if signum > 0:
144
+ return ArrowCancelled(message, signum)
145
+ else:
146
+ return ArrowCancelled(message)
147
+ else:
148
+ message = frombytes(status.ToString(), safe=True)
149
+ return ArrowException(message)
150
+
151
+
152
+ # These are API functions for C++ PyArrow
153
+ cdef api int pyarrow_internal_check_status(const CStatus& status) \
154
+ except -1 nogil:
155
+ return check_status(status)
156
+
157
+ cdef api object pyarrow_internal_convert_status(const CStatus& status):
158
+ return convert_status(status)
159
+
160
+
161
+ cdef class StopToken:
162
+ cdef void init(self, CStopToken stop_token):
163
+ self.stop_token = move(stop_token)
164
+
165
+
166
+ cdef c_bool signal_handlers_enabled = True
167
+
168
+
169
+ def enable_signal_handlers(c_bool enable):
170
+ """
171
+ Enable or disable interruption of long-running operations.
172
+
173
+ By default, certain long running operations will detect user
174
+ interruptions, such as by pressing Ctrl-C. This detection relies
175
+ on setting a signal handler for the duration of the long-running
176
+ operation, and may therefore interfere with other frameworks or
177
+ libraries (such as an event loop).
178
+
179
+ Parameters
180
+ ----------
181
+ enable : bool
182
+ Whether to enable user interruption by setting a temporary
183
+ signal handler.
184
+ """
185
+ global signal_handlers_enabled
186
+ signal_handlers_enabled = enable
187
+
188
+
189
+ # For internal use
190
+
191
+ # Whether we need a workaround for https://bugs.python.org/issue42248
192
+ have_signal_refcycle = (sys.version_info < (3, 8, 10) or
193
+ (3, 9) <= sys.version_info < (3, 9, 5) or
194
+ sys.version_info[:2] == (3, 10))
195
+
196
+ cdef class SignalStopHandler:
197
+ cdef:
198
+ StopToken _stop_token
199
+ vector[int] _signals
200
+ c_bool _enabled
201
+
202
+ def __cinit__(self):
203
+ self._enabled = False
204
+
205
+ self._init_signals()
206
+ if have_signal_refcycle:
207
+ _break_traceback_cycle_from_frame(sys._getframe(0))
208
+
209
+ self._stop_token = StopToken()
210
+
211
+ if not self._signals.empty():
212
+ maybe_source = SetSignalStopSource()
213
+ if not maybe_source.ok():
214
+ # See ARROW-11841 / ARROW-17173: in complex interaction
215
+ # scenarios (such as R calling into Python), SetSignalStopSource()
216
+ # may have already activated a signal-receiving StopSource.
217
+ # Just warn instead of erroring out.
218
+ maybe_source.status().Warn()
219
+ else:
220
+ self._stop_token.init(deref(maybe_source).token())
221
+ # signals don't work on Emscripten without threads.
222
+ # and possibly other single-thread environments.
223
+ self._enabled = is_threading_enabled()
224
+
225
+ def _init_signals(self):
226
+ if (signal_handlers_enabled and
227
+ threading.current_thread() is threading.main_thread()):
228
+ self._signals = [
229
+ sig for sig in (signal.SIGINT, signal.SIGTERM)
230
+ if signal.getsignal(sig) not in (signal.SIG_DFL,
231
+ signal.SIG_IGN, None)]
232
+
233
+ def __enter__(self):
234
+ if self._enabled:
235
+ check_status(RegisterCancellingSignalHandler(self._signals))
236
+ return self
237
+
238
+ def __exit__(self, exc_type, exc_value, exc_tb):
239
+ if self._enabled:
240
+ UnregisterCancellingSignalHandler()
241
+ if exc_value is None:
242
+ # Make sure we didn't lose a signal
243
+ try:
244
+ check_status(self._stop_token.stop_token.Poll())
245
+ except ArrowCancelled as e:
246
+ exc_value = e
247
+ if isinstance(exc_value, ArrowCancelled):
248
+ if exc_value.signum:
249
+ # Re-emit the exact same signal. We restored the Python signal
250
+ # handler above, so it should receive it.
251
+ if os.name == 'nt':
252
+ SendSignal(exc_value.signum)
253
+ else:
254
+ SendSignalToThread(exc_value.signum,
255
+ threading.main_thread().ident)
256
+ else:
257
+ # Simulate Python receiving a SIGINT
258
+ # (see https://bugs.python.org/issue43356 for why we can't
259
+ # simulate the exact signal number)
260
+ PyErr_SetInterrupt()
261
+ # Maximize chances of the Python signal handler being executed now.
262
+ # Otherwise a potential KeyboardInterrupt might be missed by an
263
+ # immediately enclosing try/except block.
264
+ PyErr_CheckSignals()
265
+ # ArrowCancelled will be re-raised if PyErr_CheckSignals()
266
+ # returned successfully.
267
+
268
+ def __dealloc__(self):
269
+ if self._enabled:
270
+ ResetSignalStopSource()
271
+
272
+ @property
273
+ def stop_token(self):
274
+ return self._stop_token
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/fs.py ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ """
19
+ FileSystem abstraction to interact with various local and remote filesystems.
20
+ """
21
+
22
+ from pyarrow.util import _is_path_like, _stringify_path
23
+
24
+ from pyarrow._fs import ( # noqa
25
+ FileSelector,
26
+ FileType,
27
+ FileInfo,
28
+ FileSystem,
29
+ LocalFileSystem,
30
+ SubTreeFileSystem,
31
+ _MockFileSystem,
32
+ FileSystemHandler,
33
+ PyFileSystem,
34
+ _copy_files,
35
+ _copy_files_selector,
36
+ )
37
+
38
+ # For backward compatibility.
39
+ FileStats = FileInfo
40
+
41
+ _not_imported = []
42
+ try:
43
+ from pyarrow._azurefs import AzureFileSystem # noqa
44
+ except ImportError:
45
+ _not_imported.append("AzureFileSystem")
46
+
47
+ try:
48
+ from pyarrow._hdfs import HadoopFileSystem # noqa
49
+ except ImportError:
50
+ _not_imported.append("HadoopFileSystem")
51
+
52
+ try:
53
+ from pyarrow._gcsfs import GcsFileSystem # noqa
54
+ except ImportError:
55
+ _not_imported.append("GcsFileSystem")
56
+
57
+ try:
58
+ from pyarrow._s3fs import ( # noqa
59
+ AwsDefaultS3RetryStrategy, AwsStandardS3RetryStrategy,
60
+ S3FileSystem, S3LogLevel, S3RetryStrategy, ensure_s3_initialized,
61
+ finalize_s3, ensure_s3_finalized, initialize_s3, resolve_s3_region)
62
+ except ImportError:
63
+ _not_imported.append("S3FileSystem")
64
+ else:
65
+ # GH-38364: we don't initialize S3 eagerly as that could lead
66
+ # to crashes at shutdown even when S3 isn't used.
67
+ # Instead, S3 is initialized lazily using `ensure_s3_initialized`
68
+ # in assorted places.
69
+ import atexit
70
+ atexit.register(ensure_s3_finalized)
71
+
72
+
73
+ def __getattr__(name):
74
+ if name in _not_imported:
75
+ raise ImportError(
76
+ "The pyarrow installation is not built with support for "
77
+ "'{0}'".format(name)
78
+ )
79
+
80
+ raise AttributeError(
81
+ "module 'pyarrow.fs' has no attribute '{0}'".format(name)
82
+ )
83
+
84
+
85
+ def _filesystem_from_str(uri):
86
+ # instantiate the file system from an uri, if the uri has a path
87
+ # component then it will be treated as a path prefix
88
+ filesystem, prefix = FileSystem.from_uri(uri)
89
+ prefix = filesystem.normalize_path(prefix)
90
+ if prefix:
91
+ # validate that the prefix is pointing to a directory
92
+ prefix_info = filesystem.get_file_info([prefix])[0]
93
+ if prefix_info.type != FileType.Directory:
94
+ raise ValueError(
95
+ "The path component of the filesystem URI must point to a "
96
+ "directory but it has a type: `{}`. The path component "
97
+ "is `{}` and the given filesystem URI is `{}`".format(
98
+ prefix_info.type.name, prefix_info.path, uri
99
+ )
100
+ )
101
+ filesystem = SubTreeFileSystem(prefix, filesystem)
102
+ return filesystem
103
+
104
+
105
+ def _ensure_filesystem(filesystem, *, use_mmap=False):
106
+ if isinstance(filesystem, FileSystem):
107
+ return filesystem
108
+ elif isinstance(filesystem, str):
109
+ if use_mmap:
110
+ raise ValueError(
111
+ "Specifying to use memory mapping not supported for "
112
+ "filesystem specified as an URI string"
113
+ )
114
+ return _filesystem_from_str(filesystem)
115
+
116
+ # handle fsspec-compatible filesystems
117
+ try:
118
+ import fsspec
119
+ except ImportError:
120
+ pass
121
+ else:
122
+ if isinstance(filesystem, fsspec.AbstractFileSystem):
123
+ if type(filesystem).__name__ == 'LocalFileSystem':
124
+ # In case its a simple LocalFileSystem, use native arrow one
125
+ return LocalFileSystem(use_mmap=use_mmap)
126
+ return PyFileSystem(FSSpecHandler(filesystem))
127
+
128
+ raise TypeError(
129
+ "Unrecognized filesystem: {}. `filesystem` argument must be a "
130
+ "FileSystem instance or a valid file system URI'".format(
131
+ type(filesystem))
132
+ )
133
+
134
+
135
+ def _resolve_filesystem_and_path(path, filesystem=None, *, memory_map=False):
136
+ """
137
+ Return filesystem/path from path which could be an URI or a plain
138
+ filesystem path.
139
+ """
140
+ if not _is_path_like(path):
141
+ if filesystem is not None:
142
+ raise ValueError(
143
+ "'filesystem' passed but the specified path is file-like, so"
144
+ " there is nothing to open with 'filesystem'."
145
+ )
146
+ return filesystem, path
147
+
148
+ if filesystem is not None:
149
+ filesystem = _ensure_filesystem(filesystem, use_mmap=memory_map)
150
+ if isinstance(filesystem, LocalFileSystem):
151
+ path = _stringify_path(path)
152
+ elif not isinstance(path, str):
153
+ raise TypeError(
154
+ "Expected string path; path-like objects are only allowed "
155
+ "with a local filesystem"
156
+ )
157
+ path = filesystem.normalize_path(path)
158
+ return filesystem, path
159
+
160
+ path = _stringify_path(path)
161
+
162
+ # if filesystem is not given, try to automatically determine one
163
+ # first check if the file exists as a local (relative) file path
164
+ # if not then try to parse the path as an URI
165
+ filesystem = LocalFileSystem(use_mmap=memory_map)
166
+
167
+ try:
168
+ file_info = filesystem.get_file_info(path)
169
+ except ValueError: # ValueError means path is likely an URI
170
+ file_info = None
171
+ exists_locally = False
172
+ else:
173
+ exists_locally = (file_info.type != FileType.NotFound)
174
+
175
+ # if the file or directory doesn't exists locally, then assume that
176
+ # the path is an URI describing the file system as well
177
+ if not exists_locally:
178
+ try:
179
+ filesystem, path = FileSystem.from_uri(path)
180
+ except ValueError as e:
181
+ # neither an URI nor a locally existing path, so assume that
182
+ # local path was given and propagate a nicer file not found error
183
+ # instead of a more confusing scheme parsing error
184
+ if "empty scheme" not in str(e) \
185
+ and "Cannot parse URI" not in str(e):
186
+ raise
187
+ else:
188
+ path = filesystem.normalize_path(path)
189
+
190
+ return filesystem, path
191
+
192
+
193
+ def copy_files(source, destination,
194
+ source_filesystem=None, destination_filesystem=None,
195
+ *, chunk_size=1024*1024, use_threads=True):
196
+ """
197
+ Copy files between FileSystems.
198
+
199
+ This functions allows you to recursively copy directories of files from
200
+ one file system to another, such as from S3 to your local machine.
201
+
202
+ Parameters
203
+ ----------
204
+ source : string
205
+ Source file path or URI to a single file or directory.
206
+ If a directory, files will be copied recursively from this path.
207
+ destination : string
208
+ Destination file path or URI. If `source` is a file, `destination`
209
+ is also interpreted as the destination file (not directory).
210
+ Directories will be created as necessary.
211
+ source_filesystem : FileSystem, optional
212
+ Source filesystem, needs to be specified if `source` is not a URI,
213
+ otherwise inferred.
214
+ destination_filesystem : FileSystem, optional
215
+ Destination filesystem, needs to be specified if `destination` is not
216
+ a URI, otherwise inferred.
217
+ chunk_size : int, default 1MB
218
+ The maximum size of block to read before flushing to the
219
+ destination file. A larger chunk_size will use more memory while
220
+ copying but may help accommodate high latency FileSystems.
221
+ use_threads : bool, default True
222
+ Whether to use multiple threads to accelerate copying.
223
+
224
+ Examples
225
+ --------
226
+ Inspect an S3 bucket's files:
227
+
228
+ >>> s3, path = fs.FileSystem.from_uri(
229
+ ... "s3://registry.opendata.aws/roda/ndjson/")
230
+ >>> selector = fs.FileSelector(path)
231
+ >>> s3.get_file_info(selector)
232
+ [<FileInfo for 'registry.opendata.aws/roda/ndjson/index.ndjson':...]
233
+
234
+ Copy one file from S3 bucket to a local directory:
235
+
236
+ >>> fs.copy_files("s3://registry.opendata.aws/roda/ndjson/index.ndjson",
237
+ ... "file:///{}/index_copy.ndjson".format(local_path))
238
+
239
+ >>> fs.LocalFileSystem().get_file_info(str(local_path)+
240
+ ... '/index_copy.ndjson')
241
+ <FileInfo for '.../index_copy.ndjson': type=FileType.File, size=...>
242
+
243
+ Copy file using a FileSystem object:
244
+
245
+ >>> fs.copy_files("registry.opendata.aws/roda/ndjson/index.ndjson",
246
+ ... "file:///{}/index_copy.ndjson".format(local_path),
247
+ ... source_filesystem=fs.S3FileSystem())
248
+ """
249
+ source_fs, source_path = _resolve_filesystem_and_path(
250
+ source, source_filesystem
251
+ )
252
+ destination_fs, destination_path = _resolve_filesystem_and_path(
253
+ destination, destination_filesystem
254
+ )
255
+
256
+ file_info = source_fs.get_file_info(source_path)
257
+ if file_info.type == FileType.Directory:
258
+ source_sel = FileSelector(source_path, recursive=True)
259
+ _copy_files_selector(source_fs, source_sel,
260
+ destination_fs, destination_path,
261
+ chunk_size, use_threads)
262
+ else:
263
+ _copy_files(source_fs, source_path,
264
+ destination_fs, destination_path,
265
+ chunk_size, use_threads)
266
+
267
+
268
+ class FSSpecHandler(FileSystemHandler):
269
+ """
270
+ Handler for fsspec-based Python filesystems.
271
+
272
+ https://filesystem-spec.readthedocs.io/en/latest/index.html
273
+
274
+ Parameters
275
+ ----------
276
+ fs : FSSpec-compliant filesystem instance
277
+
278
+ Examples
279
+ --------
280
+ >>> PyFileSystem(FSSpecHandler(fsspec_fs)) # doctest: +SKIP
281
+ """
282
+
283
+ def __init__(self, fs):
284
+ self.fs = fs
285
+
286
+ def __eq__(self, other):
287
+ if isinstance(other, FSSpecHandler):
288
+ return self.fs == other.fs
289
+ return NotImplemented
290
+
291
+ def __ne__(self, other):
292
+ if isinstance(other, FSSpecHandler):
293
+ return self.fs != other.fs
294
+ return NotImplemented
295
+
296
+ def get_type_name(self):
297
+ protocol = self.fs.protocol
298
+ if isinstance(protocol, list):
299
+ protocol = protocol[0]
300
+ return "fsspec+{0}".format(protocol)
301
+
302
+ def normalize_path(self, path):
303
+ return path
304
+
305
+ @staticmethod
306
+ def _create_file_info(path, info):
307
+ size = info["size"]
308
+ if info["type"] == "file":
309
+ ftype = FileType.File
310
+ elif info["type"] == "directory":
311
+ ftype = FileType.Directory
312
+ # some fsspec filesystems include a file size for directories
313
+ size = None
314
+ else:
315
+ ftype = FileType.Unknown
316
+ return FileInfo(path, ftype, size=size, mtime=info.get("mtime", None))
317
+
318
+ def get_file_info(self, paths):
319
+ infos = []
320
+ for path in paths:
321
+ try:
322
+ info = self.fs.info(path)
323
+ except FileNotFoundError:
324
+ infos.append(FileInfo(path, FileType.NotFound))
325
+ else:
326
+ infos.append(self._create_file_info(path, info))
327
+ return infos
328
+
329
+ def get_file_info_selector(self, selector):
330
+ if not self.fs.isdir(selector.base_dir):
331
+ if self.fs.exists(selector.base_dir):
332
+ raise NotADirectoryError(selector.base_dir)
333
+ else:
334
+ if selector.allow_not_found:
335
+ return []
336
+ else:
337
+ raise FileNotFoundError(selector.base_dir)
338
+
339
+ if selector.recursive:
340
+ maxdepth = None
341
+ else:
342
+ maxdepth = 1
343
+
344
+ infos = []
345
+ selected_files = self.fs.find(
346
+ selector.base_dir, maxdepth=maxdepth, withdirs=True, detail=True
347
+ )
348
+ for path, info in selected_files.items():
349
+ _path = path.strip("/")
350
+ base_dir = selector.base_dir.strip("/")
351
+ # Need to exclude base directory from selected files if present
352
+ # (fsspec filesystems, see GH-37555)
353
+ if _path != base_dir:
354
+ infos.append(self._create_file_info(path, info))
355
+
356
+ return infos
357
+
358
+ def create_dir(self, path, recursive):
359
+ # mkdir also raises FileNotFoundError when base directory is not found
360
+ try:
361
+ self.fs.mkdir(path, create_parents=recursive)
362
+ except FileExistsError:
363
+ pass
364
+
365
+ def delete_dir(self, path):
366
+ self.fs.rm(path, recursive=True)
367
+
368
+ def _delete_dir_contents(self, path, missing_dir_ok):
369
+ try:
370
+ subpaths = self.fs.listdir(path, detail=False)
371
+ except FileNotFoundError:
372
+ if missing_dir_ok:
373
+ return
374
+ raise
375
+ for subpath in subpaths:
376
+ if self.fs.isdir(subpath):
377
+ self.fs.rm(subpath, recursive=True)
378
+ elif self.fs.isfile(subpath):
379
+ self.fs.rm(subpath)
380
+
381
+ def delete_dir_contents(self, path, missing_dir_ok):
382
+ if path.strip("/") == "":
383
+ raise ValueError(
384
+ "delete_dir_contents called on path '", path, "'")
385
+ self._delete_dir_contents(path, missing_dir_ok)
386
+
387
+ def delete_root_dir_contents(self):
388
+ self._delete_dir_contents("/")
389
+
390
+ def delete_file(self, path):
391
+ # fs.rm correctly raises IsADirectoryError when `path` is a directory
392
+ # instead of a file and `recursive` is not set to True
393
+ if not self.fs.exists(path):
394
+ raise FileNotFoundError(path)
395
+ self.fs.rm(path)
396
+
397
+ def move(self, src, dest):
398
+ self.fs.mv(src, dest, recursive=True)
399
+
400
+ def copy_file(self, src, dest):
401
+ # fs.copy correctly raises IsADirectoryError when `src` is a directory
402
+ # instead of a file
403
+ self.fs.copy(src, dest)
404
+
405
+ # TODO can we read/pass metadata (e.g. Content-Type) in the methods below?
406
+
407
+ def open_input_stream(self, path):
408
+ from pyarrow import PythonFile
409
+
410
+ if not self.fs.isfile(path):
411
+ raise FileNotFoundError(path)
412
+
413
+ return PythonFile(self.fs.open(path, mode="rb"), mode="r")
414
+
415
+ def open_input_file(self, path):
416
+ from pyarrow import PythonFile
417
+
418
+ if not self.fs.isfile(path):
419
+ raise FileNotFoundError(path)
420
+
421
+ return PythonFile(self.fs.open(path, mode="rb"), mode="r")
422
+
423
+ def open_output_stream(self, path, metadata):
424
+ from pyarrow import PythonFile
425
+
426
+ return PythonFile(self.fs.open(path, mode="wb"), mode="w")
427
+
428
+ def open_append_stream(self, path, metadata):
429
+ from pyarrow import PythonFile
430
+
431
+ return PythonFile(self.fs.open(path, mode="ab"), mode="w")
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.pxi ADDED
@@ -0,0 +1,1403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from cpython.pycapsule cimport PyCapsule_CheckExact, PyCapsule_GetPointer, PyCapsule_New
19
+
20
+ from collections import namedtuple
21
+ import warnings
22
+ from cython import sizeof
23
+
24
+ cpdef enum MetadataVersion:
25
+ V1 = <char> CMetadataVersion_V1
26
+ V2 = <char> CMetadataVersion_V2
27
+ V3 = <char> CMetadataVersion_V3
28
+ V4 = <char> CMetadataVersion_V4
29
+ V5 = <char> CMetadataVersion_V5
30
+
31
+
32
+ cdef object _wrap_metadata_version(CMetadataVersion version):
33
+ return MetadataVersion(<char> version)
34
+
35
+
36
+ cdef CMetadataVersion _unwrap_metadata_version(
37
+ MetadataVersion version) except *:
38
+ if version == MetadataVersion.V1:
39
+ return CMetadataVersion_V1
40
+ elif version == MetadataVersion.V2:
41
+ return CMetadataVersion_V2
42
+ elif version == MetadataVersion.V3:
43
+ return CMetadataVersion_V3
44
+ elif version == MetadataVersion.V4:
45
+ return CMetadataVersion_V4
46
+ elif version == MetadataVersion.V5:
47
+ return CMetadataVersion_V5
48
+ raise ValueError("Not a metadata version: " + repr(version))
49
+
50
+
51
+ _WriteStats = namedtuple(
52
+ 'WriteStats',
53
+ ('num_messages', 'num_record_batches', 'num_dictionary_batches',
54
+ 'num_dictionary_deltas', 'num_replaced_dictionaries'))
55
+
56
+
57
+ class WriteStats(_WriteStats):
58
+ """IPC write statistics
59
+
60
+ Parameters
61
+ ----------
62
+ num_messages : int
63
+ Number of messages.
64
+ num_record_batches : int
65
+ Number of record batches.
66
+ num_dictionary_batches : int
67
+ Number of dictionary batches.
68
+ num_dictionary_deltas : int
69
+ Delta of dictionaries.
70
+ num_replaced_dictionaries : int
71
+ Number of replaced dictionaries.
72
+ """
73
+ __slots__ = ()
74
+
75
+
76
+ @staticmethod
77
+ cdef _wrap_write_stats(CIpcWriteStats c):
78
+ return WriteStats(c.num_messages, c.num_record_batches,
79
+ c.num_dictionary_batches, c.num_dictionary_deltas,
80
+ c.num_replaced_dictionaries)
81
+
82
+
83
+ _ReadStats = namedtuple(
84
+ 'ReadStats',
85
+ ('num_messages', 'num_record_batches', 'num_dictionary_batches',
86
+ 'num_dictionary_deltas', 'num_replaced_dictionaries'))
87
+
88
+
89
+ class ReadStats(_ReadStats):
90
+ """IPC read statistics
91
+
92
+ Parameters
93
+ ----------
94
+ num_messages : int
95
+ Number of messages.
96
+ num_record_batches : int
97
+ Number of record batches.
98
+ num_dictionary_batches : int
99
+ Number of dictionary batches.
100
+ num_dictionary_deltas : int
101
+ Delta of dictionaries.
102
+ num_replaced_dictionaries : int
103
+ Number of replaced dictionaries.
104
+ """
105
+ __slots__ = ()
106
+
107
+
108
+ @staticmethod
109
+ cdef _wrap_read_stats(CIpcReadStats c):
110
+ return ReadStats(c.num_messages, c.num_record_batches,
111
+ c.num_dictionary_batches, c.num_dictionary_deltas,
112
+ c.num_replaced_dictionaries)
113
+
114
+
115
+ cdef class IpcReadOptions(_Weakrefable):
116
+ """
117
+ Serialization options for reading IPC format.
118
+
119
+ Parameters
120
+ ----------
121
+ ensure_native_endian : bool, default True
122
+ Whether to convert incoming data to platform-native endianness.
123
+ use_threads : bool
124
+ Whether to use the global CPU thread pool to parallelize any
125
+ computational tasks like decompression
126
+ included_fields : list
127
+ If empty (the default), return all deserialized fields.
128
+ If non-empty, the values are the indices of fields to read on
129
+ the top-level schema
130
+ """
131
+ __slots__ = ()
132
+
133
+ # cdef block is in lib.pxd
134
+
135
+ def __init__(self, *, bint ensure_native_endian=True,
136
+ bint use_threads=True, list included_fields=None):
137
+ self.c_options = CIpcReadOptions.Defaults()
138
+ self.ensure_native_endian = ensure_native_endian
139
+ self.use_threads = use_threads
140
+ if included_fields is not None:
141
+ self.included_fields = included_fields
142
+
143
+ @property
144
+ def ensure_native_endian(self):
145
+ return self.c_options.ensure_native_endian
146
+
147
+ @ensure_native_endian.setter
148
+ def ensure_native_endian(self, bint value):
149
+ self.c_options.ensure_native_endian = value
150
+
151
+ @property
152
+ def use_threads(self):
153
+ return self.c_options.use_threads
154
+
155
+ @use_threads.setter
156
+ def use_threads(self, bint value):
157
+ self.c_options.use_threads = value
158
+
159
+ @property
160
+ def included_fields(self):
161
+ return self.c_options.included_fields
162
+
163
+ @included_fields.setter
164
+ def included_fields(self, list value not None):
165
+ self.c_options.included_fields = value
166
+
167
+
168
+ cdef class IpcWriteOptions(_Weakrefable):
169
+ """
170
+ Serialization options for the IPC format.
171
+
172
+ Parameters
173
+ ----------
174
+ metadata_version : MetadataVersion, default MetadataVersion.V5
175
+ The metadata version to write. V5 is the current and latest,
176
+ V4 is the pre-1.0 metadata version (with incompatible Union layout).
177
+ allow_64bit : bool, default False
178
+ If true, allow field lengths that don't fit in a signed 32-bit int.
179
+ use_legacy_format : bool, default False
180
+ Whether to use the pre-Arrow 0.15 IPC format.
181
+ compression : str, Codec, or None
182
+ compression codec to use for record batch buffers.
183
+ If None then batch buffers will be uncompressed.
184
+ Must be "lz4", "zstd" or None.
185
+ To specify a compression_level use `pyarrow.Codec`
186
+ use_threads : bool
187
+ Whether to use the global CPU thread pool to parallelize any
188
+ computational tasks like compression.
189
+ emit_dictionary_deltas : bool
190
+ Whether to emit dictionary deltas. Default is false for maximum
191
+ stream compatibility.
192
+ unify_dictionaries : bool
193
+ If true then calls to write_table will attempt to unify dictionaries
194
+ across all batches in the table. This can help avoid the need for
195
+ replacement dictionaries (which the file format does not support)
196
+ but requires computing the unified dictionary and then remapping
197
+ the indices arrays.
198
+
199
+ This parameter is ignored when writing to the IPC stream format as
200
+ the IPC stream format can support replacement dictionaries.
201
+ """
202
+ __slots__ = ()
203
+
204
+ # cdef block is in lib.pxd
205
+
206
+ def __init__(self, *, metadata_version=MetadataVersion.V5,
207
+ bint allow_64bit=False, use_legacy_format=False,
208
+ compression=None, bint use_threads=True,
209
+ bint emit_dictionary_deltas=False,
210
+ bint unify_dictionaries=False):
211
+ self.c_options = CIpcWriteOptions.Defaults()
212
+ self.allow_64bit = allow_64bit
213
+ self.use_legacy_format = use_legacy_format
214
+ self.metadata_version = metadata_version
215
+ if compression is not None:
216
+ self.compression = compression
217
+ self.use_threads = use_threads
218
+ self.emit_dictionary_deltas = emit_dictionary_deltas
219
+ self.unify_dictionaries = unify_dictionaries
220
+
221
+ @property
222
+ def allow_64bit(self):
223
+ return self.c_options.allow_64bit
224
+
225
+ @allow_64bit.setter
226
+ def allow_64bit(self, bint value):
227
+ self.c_options.allow_64bit = value
228
+
229
+ @property
230
+ def use_legacy_format(self):
231
+ return self.c_options.write_legacy_ipc_format
232
+
233
+ @use_legacy_format.setter
234
+ def use_legacy_format(self, bint value):
235
+ self.c_options.write_legacy_ipc_format = value
236
+
237
+ @property
238
+ def metadata_version(self):
239
+ return _wrap_metadata_version(self.c_options.metadata_version)
240
+
241
+ @metadata_version.setter
242
+ def metadata_version(self, value):
243
+ self.c_options.metadata_version = _unwrap_metadata_version(value)
244
+
245
+ @property
246
+ def compression(self):
247
+ if self.c_options.codec == nullptr:
248
+ return None
249
+ else:
250
+ return frombytes(self.c_options.codec.get().name())
251
+
252
+ @compression.setter
253
+ def compression(self, value):
254
+ if value is None:
255
+ self.c_options.codec.reset()
256
+ elif isinstance(value, str):
257
+ codec_type = _ensure_compression(value)
258
+ if codec_type != CCompressionType_ZSTD and codec_type != CCompressionType_LZ4_FRAME:
259
+ raise ValueError("Compression type must be lz4, zstd or None")
260
+ self.c_options.codec = shared_ptr[CCodec](GetResultValue(
261
+ CCodec.Create(codec_type)).release())
262
+ elif isinstance(value, Codec):
263
+ if value.name != "lz4" and value.name != "zstd":
264
+ raise ValueError("Compression type must be lz4, zstd or None")
265
+ self.c_options.codec = (<Codec>value).wrapped
266
+ else:
267
+ raise TypeError(
268
+ "Property `compression` must be None, str, or pyarrow.Codec")
269
+
270
+ @property
271
+ def use_threads(self):
272
+ return self.c_options.use_threads
273
+
274
+ @use_threads.setter
275
+ def use_threads(self, bint value):
276
+ self.c_options.use_threads = value
277
+
278
+ @property
279
+ def emit_dictionary_deltas(self):
280
+ return self.c_options.emit_dictionary_deltas
281
+
282
+ @emit_dictionary_deltas.setter
283
+ def emit_dictionary_deltas(self, bint value):
284
+ self.c_options.emit_dictionary_deltas = value
285
+
286
+ @property
287
+ def unify_dictionaries(self):
288
+ return self.c_options.unify_dictionaries
289
+
290
+ @unify_dictionaries.setter
291
+ def unify_dictionaries(self, bint value):
292
+ self.c_options.unify_dictionaries = value
293
+
294
+
295
+ cdef class Message(_Weakrefable):
296
+ """
297
+ Container for an Arrow IPC message with metadata and optional body
298
+ """
299
+
300
+ def __cinit__(self):
301
+ pass
302
+
303
+ def __init__(self):
304
+ raise TypeError("Do not call {}'s constructor directly, use "
305
+ "`pyarrow.ipc.read_message` function instead."
306
+ .format(self.__class__.__name__))
307
+
308
+ @property
309
+ def type(self):
310
+ return frombytes(FormatMessageType(self.message.get().type()))
311
+
312
+ @property
313
+ def metadata(self):
314
+ return pyarrow_wrap_buffer(self.message.get().metadata())
315
+
316
+ @property
317
+ def metadata_version(self):
318
+ return _wrap_metadata_version(self.message.get().metadata_version())
319
+
320
+ @property
321
+ def body(self):
322
+ cdef shared_ptr[CBuffer] body = self.message.get().body()
323
+ if body.get() == NULL:
324
+ return None
325
+ else:
326
+ return pyarrow_wrap_buffer(body)
327
+
328
+ def equals(self, Message other):
329
+ """
330
+ Returns True if the message contents (metadata and body) are identical
331
+
332
+ Parameters
333
+ ----------
334
+ other : Message
335
+
336
+ Returns
337
+ -------
338
+ are_equal : bool
339
+ """
340
+ cdef c_bool result
341
+ with nogil:
342
+ result = self.message.get().Equals(deref(other.message.get()))
343
+ return result
344
+
345
+ def serialize_to(self, NativeFile sink, alignment=8, memory_pool=None):
346
+ """
347
+ Write message to generic OutputStream
348
+
349
+ Parameters
350
+ ----------
351
+ sink : NativeFile
352
+ alignment : int, default 8
353
+ Byte alignment for metadata and body
354
+ memory_pool : MemoryPool, default None
355
+ Uses default memory pool if not specified
356
+ """
357
+ cdef:
358
+ int64_t output_length = 0
359
+ COutputStream* out
360
+ CIpcWriteOptions options
361
+
362
+ options.alignment = alignment
363
+ out = sink.get_output_stream().get()
364
+ with nogil:
365
+ check_status(self.message.get()
366
+ .SerializeTo(out, options, &output_length))
367
+
368
+ def serialize(self, alignment=8, memory_pool=None):
369
+ """
370
+ Write message as encapsulated IPC message
371
+
372
+ Parameters
373
+ ----------
374
+ alignment : int, default 8
375
+ Byte alignment for metadata and body
376
+ memory_pool : MemoryPool, default None
377
+ Uses default memory pool if not specified
378
+
379
+ Returns
380
+ -------
381
+ serialized : Buffer
382
+ """
383
+ stream = BufferOutputStream(memory_pool)
384
+ self.serialize_to(stream, alignment=alignment, memory_pool=memory_pool)
385
+ return stream.getvalue()
386
+
387
+ def __repr__(self):
388
+ if self.message == nullptr:
389
+ return """pyarrow.Message(uninitialized)"""
390
+
391
+ metadata_len = self.metadata.size
392
+ body = self.body
393
+ body_len = 0 if body is None else body.size
394
+
395
+ return """pyarrow.Message
396
+ type: {0}
397
+ metadata length: {1}
398
+ body length: {2}""".format(self.type, metadata_len, body_len)
399
+
400
+
401
+ cdef class MessageReader(_Weakrefable):
402
+ """
403
+ Interface for reading Message objects from some source (like an
404
+ InputStream)
405
+ """
406
+ cdef:
407
+ unique_ptr[CMessageReader] reader
408
+
409
+ def __cinit__(self):
410
+ pass
411
+
412
+ def __init__(self):
413
+ raise TypeError("Do not call {}'s constructor directly, use "
414
+ "`pyarrow.ipc.MessageReader.open_stream` function "
415
+ "instead.".format(self.__class__.__name__))
416
+
417
+ @staticmethod
418
+ def open_stream(source):
419
+ """
420
+ Open stream from source, if you want to use memory map use
421
+ MemoryMappedFile as source.
422
+
423
+ Parameters
424
+ ----------
425
+ source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
426
+ A readable source, like an InputStream
427
+ """
428
+ cdef:
429
+ MessageReader result = MessageReader.__new__(MessageReader)
430
+ shared_ptr[CInputStream] in_stream
431
+ unique_ptr[CMessageReader] reader
432
+
433
+ _get_input_stream(source, &in_stream)
434
+ with nogil:
435
+ reader = CMessageReader.Open(in_stream)
436
+ result.reader.reset(reader.release())
437
+
438
+ return result
439
+
440
+ def __iter__(self):
441
+ return self
442
+
443
+ def __next__(self):
444
+ return self.read_next_message()
445
+
446
+ def read_next_message(self):
447
+ """
448
+ Read next Message from the stream.
449
+
450
+ Raises
451
+ ------
452
+ StopIteration
453
+ At end of stream
454
+ """
455
+ cdef Message result = Message.__new__(Message)
456
+
457
+ with nogil:
458
+ result.message = move(GetResultValue(self.reader.get()
459
+ .ReadNextMessage()))
460
+
461
+ if result.message.get() == NULL:
462
+ raise StopIteration
463
+
464
+ return result
465
+
466
+ # ----------------------------------------------------------------------
467
+ # File and stream readers and writers
468
+
469
+ cdef class _CRecordBatchWriter(_Weakrefable):
470
+ """The base RecordBatchWriter wrapper.
471
+
472
+ Provides common implementations of convenience methods. Should not
473
+ be instantiated directly by user code.
474
+ """
475
+
476
+ # cdef block is in lib.pxd
477
+
478
+ def write(self, table_or_batch):
479
+ """
480
+ Write RecordBatch or Table to stream.
481
+
482
+ Parameters
483
+ ----------
484
+ table_or_batch : {RecordBatch, Table}
485
+ """
486
+ if isinstance(table_or_batch, RecordBatch):
487
+ self.write_batch(table_or_batch)
488
+ elif isinstance(table_or_batch, Table):
489
+ self.write_table(table_or_batch)
490
+ else:
491
+ raise ValueError(type(table_or_batch))
492
+
493
+ def write_batch(self, RecordBatch batch, custom_metadata=None):
494
+ """
495
+ Write RecordBatch to stream.
496
+
497
+ Parameters
498
+ ----------
499
+ batch : RecordBatch
500
+ custom_metadata : mapping or KeyValueMetadata
501
+ Keys and values must be string-like / coercible to bytes
502
+ """
503
+ metadata = ensure_metadata(custom_metadata, allow_none=True)
504
+ c_meta = pyarrow_unwrap_metadata(metadata)
505
+
506
+ with nogil:
507
+ check_status(self.writer.get()
508
+ .WriteRecordBatch(deref(batch.batch), c_meta))
509
+
510
+ def write_table(self, Table table, max_chunksize=None):
511
+ """
512
+ Write Table to stream in (contiguous) RecordBatch objects.
513
+
514
+ Parameters
515
+ ----------
516
+ table : Table
517
+ max_chunksize : int, default None
518
+ Maximum number of rows for RecordBatch chunks. Individual chunks may
519
+ be smaller depending on the chunk layout of individual columns.
520
+ """
521
+ cdef:
522
+ # max_chunksize must be > 0 to have any impact
523
+ int64_t c_max_chunksize = -1
524
+
525
+ if max_chunksize is not None:
526
+ c_max_chunksize = max_chunksize
527
+
528
+ with nogil:
529
+ check_status(self.writer.get().WriteTable(table.table[0],
530
+ c_max_chunksize))
531
+
532
+ def close(self):
533
+ """
534
+ Close stream and write end-of-stream 0 marker.
535
+ """
536
+ with nogil:
537
+ check_status(self.writer.get().Close())
538
+
539
+ def __enter__(self):
540
+ return self
541
+
542
+ def __exit__(self, exc_type, exc_val, exc_tb):
543
+ self.close()
544
+
545
+ @property
546
+ def stats(self):
547
+ """
548
+ Current IPC write statistics.
549
+ """
550
+ if not self.writer:
551
+ raise ValueError("Operation on closed writer")
552
+ return _wrap_write_stats(self.writer.get().stats())
553
+
554
+
555
+ cdef class _RecordBatchStreamWriter(_CRecordBatchWriter):
556
+ cdef:
557
+ CIpcWriteOptions options
558
+ bint closed
559
+
560
+ def __cinit__(self):
561
+ pass
562
+
563
+ def __dealloc__(self):
564
+ pass
565
+
566
+ @property
567
+ def _use_legacy_format(self):
568
+ # For testing (see test_ipc.py)
569
+ return self.options.write_legacy_ipc_format
570
+
571
+ @property
572
+ def _metadata_version(self):
573
+ # For testing (see test_ipc.py)
574
+ return _wrap_metadata_version(self.options.metadata_version)
575
+
576
+ def _open(self, sink, Schema schema not None,
577
+ IpcWriteOptions options=IpcWriteOptions()):
578
+ cdef:
579
+ shared_ptr[COutputStream] c_sink
580
+
581
+ self.options = options.c_options
582
+ get_writer(sink, &c_sink)
583
+ with nogil:
584
+ self.writer = GetResultValue(
585
+ MakeStreamWriter(c_sink, schema.sp_schema,
586
+ self.options))
587
+
588
+
589
+ cdef _get_input_stream(object source, shared_ptr[CInputStream]* out):
590
+ try:
591
+ source = as_buffer(source)
592
+ except TypeError:
593
+ # Non-buffer-like
594
+ pass
595
+
596
+ get_input_stream(source, True, out)
597
+
598
+
599
+ class _ReadPandasMixin:
600
+
601
+ def read_pandas(self, **options):
602
+ """
603
+ Read contents of stream to a pandas.DataFrame.
604
+
605
+ Read all record batches as a pyarrow.Table then convert it to a
606
+ pandas.DataFrame using Table.to_pandas.
607
+
608
+ Parameters
609
+ ----------
610
+ **options
611
+ Arguments to forward to :meth:`Table.to_pandas`.
612
+
613
+ Returns
614
+ -------
615
+ df : pandas.DataFrame
616
+ """
617
+ table = self.read_all()
618
+ return table.to_pandas(**options)
619
+
620
+
621
+ cdef class RecordBatchReader(_Weakrefable):
622
+ """Base class for reading stream of record batches.
623
+
624
+ Record batch readers function as iterators of record batches that also
625
+ provide the schema (without the need to get any batches).
626
+
627
+ Warnings
628
+ --------
629
+ Do not call this class's constructor directly, use one of the
630
+ ``RecordBatchReader.from_*`` functions instead.
631
+
632
+ Notes
633
+ -----
634
+ To import and export using the Arrow C stream interface, use the
635
+ ``_import_from_c`` and ``_export_to_c`` methods. However, keep in mind this
636
+ interface is intended for expert users.
637
+
638
+ Examples
639
+ --------
640
+ >>> import pyarrow as pa
641
+ >>> schema = pa.schema([('x', pa.int64())])
642
+ >>> def iter_record_batches():
643
+ ... for i in range(2):
644
+ ... yield pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], schema=schema)
645
+ >>> reader = pa.RecordBatchReader.from_batches(schema, iter_record_batches())
646
+ >>> print(reader.schema)
647
+ x: int64
648
+ >>> for batch in reader:
649
+ ... print(batch)
650
+ pyarrow.RecordBatch
651
+ x: int64
652
+ ----
653
+ x: [1,2,3]
654
+ pyarrow.RecordBatch
655
+ x: int64
656
+ ----
657
+ x: [1,2,3]
658
+ """
659
+
660
+ # cdef block is in lib.pxd
661
+
662
+ def __init__(self):
663
+ raise TypeError("Do not call {}'s constructor directly, "
664
+ "use one of the RecordBatchReader.from_* functions instead."
665
+ .format(self.__class__.__name__))
666
+
667
+ def __iter__(self):
668
+ return self
669
+
670
+ def __next__(self):
671
+ return self.read_next_batch()
672
+
673
+ @property
674
+ def schema(self):
675
+ """
676
+ Shared schema of the record batches in the stream.
677
+
678
+ Returns
679
+ -------
680
+ Schema
681
+ """
682
+ cdef shared_ptr[CSchema] c_schema
683
+
684
+ with nogil:
685
+ c_schema = self.reader.get().schema()
686
+
687
+ return pyarrow_wrap_schema(c_schema)
688
+
689
+ def read_next_batch(self):
690
+ """
691
+ Read next RecordBatch from the stream.
692
+
693
+ Raises
694
+ ------
695
+ StopIteration:
696
+ At end of stream.
697
+
698
+ Returns
699
+ -------
700
+ RecordBatch
701
+ """
702
+ cdef shared_ptr[CRecordBatch] batch
703
+
704
+ with nogil:
705
+ check_status(self.reader.get().ReadNext(&batch))
706
+
707
+ if batch.get() == NULL:
708
+ raise StopIteration
709
+
710
+ return pyarrow_wrap_batch(batch)
711
+
712
+ def read_next_batch_with_custom_metadata(self):
713
+ """
714
+ Read next RecordBatch from the stream along with its custom metadata.
715
+
716
+ Raises
717
+ ------
718
+ StopIteration:
719
+ At end of stream.
720
+
721
+ Returns
722
+ -------
723
+ batch : RecordBatch
724
+ custom_metadata : KeyValueMetadata
725
+ """
726
+ cdef:
727
+ CRecordBatchWithMetadata batch_with_metadata
728
+
729
+ with nogil:
730
+ batch_with_metadata = GetResultValue(self.reader.get().ReadNext())
731
+
732
+ if batch_with_metadata.batch.get() == NULL:
733
+ raise StopIteration
734
+
735
+ return _wrap_record_batch_with_metadata(batch_with_metadata)
736
+
737
+ def iter_batches_with_custom_metadata(self):
738
+ """
739
+ Iterate over record batches from the stream along with their custom
740
+ metadata.
741
+
742
+ Yields
743
+ ------
744
+ RecordBatchWithMetadata
745
+ """
746
+ while True:
747
+ try:
748
+ yield self.read_next_batch_with_custom_metadata()
749
+ except StopIteration:
750
+ return
751
+
752
+ def read_all(self):
753
+ """
754
+ Read all record batches as a pyarrow.Table.
755
+
756
+ Returns
757
+ -------
758
+ Table
759
+ """
760
+ cdef shared_ptr[CTable] table
761
+ with nogil:
762
+ check_status(self.reader.get().ToTable().Value(&table))
763
+ return pyarrow_wrap_table(table)
764
+
765
+ read_pandas = _ReadPandasMixin.read_pandas
766
+
767
+ def close(self):
768
+ """
769
+ Release any resources associated with the reader.
770
+ """
771
+ with nogil:
772
+ check_status(self.reader.get().Close())
773
+
774
+ def __enter__(self):
775
+ return self
776
+
777
+ def __exit__(self, exc_type, exc_val, exc_tb):
778
+ self.close()
779
+
780
+ def cast(self, target_schema):
781
+ """
782
+ Wrap this reader with one that casts each batch lazily as it is pulled.
783
+ Currently only a safe cast to target_schema is implemented.
784
+
785
+ Parameters
786
+ ----------
787
+ target_schema : Schema
788
+ Schema to cast to, the names and order of fields must match.
789
+
790
+ Returns
791
+ -------
792
+ RecordBatchReader
793
+ """
794
+ cdef:
795
+ shared_ptr[CSchema] c_schema
796
+ shared_ptr[CRecordBatchReader] c_reader
797
+ RecordBatchReader out
798
+
799
+ if self.schema.names != target_schema.names:
800
+ raise ValueError("Target schema's field names are not matching "
801
+ f"the table's field names: {self.schema.names}, "
802
+ f"{target_schema.names}")
803
+
804
+ c_schema = pyarrow_unwrap_schema(target_schema)
805
+ c_reader = GetResultValue(CCastingRecordBatchReader.Make(
806
+ self.reader, c_schema))
807
+
808
+ out = RecordBatchReader.__new__(RecordBatchReader)
809
+ out.reader = c_reader
810
+ return out
811
+
812
+ def _export_to_c(self, out_ptr):
813
+ """
814
+ Export to a C ArrowArrayStream struct, given its pointer.
815
+
816
+ Parameters
817
+ ----------
818
+ out_ptr: int
819
+ The raw pointer to a C ArrowArrayStream struct.
820
+
821
+ Be careful: if you don't pass the ArrowArrayStream struct to a
822
+ consumer, array memory will leak. This is a low-level function
823
+ intended for expert users.
824
+ """
825
+ cdef:
826
+ void* c_ptr = _as_c_pointer(out_ptr)
827
+ with nogil:
828
+ check_status(ExportRecordBatchReader(
829
+ self.reader, <ArrowArrayStream*> c_ptr))
830
+
831
+ @staticmethod
832
+ def _import_from_c(in_ptr):
833
+ """
834
+ Import RecordBatchReader from a C ArrowArrayStream struct,
835
+ given its pointer.
836
+
837
+ Parameters
838
+ ----------
839
+ in_ptr: int
840
+ The raw pointer to a C ArrowArrayStream struct.
841
+
842
+ This is a low-level function intended for expert users.
843
+ """
844
+ cdef:
845
+ void* c_ptr = _as_c_pointer(in_ptr)
846
+ shared_ptr[CRecordBatchReader] c_reader
847
+ RecordBatchReader self
848
+
849
+ with nogil:
850
+ c_reader = GetResultValue(ImportRecordBatchReader(
851
+ <ArrowArrayStream*> c_ptr))
852
+
853
+ self = RecordBatchReader.__new__(RecordBatchReader)
854
+ self.reader = c_reader
855
+ return self
856
+
857
+ def __arrow_c_stream__(self, requested_schema=None):
858
+ """
859
+ Export to a C ArrowArrayStream PyCapsule.
860
+
861
+ Parameters
862
+ ----------
863
+ requested_schema : PyCapsule, default None
864
+ The schema to which the stream should be casted, passed as a
865
+ PyCapsule containing a C ArrowSchema representation of the
866
+ requested schema.
867
+
868
+ Returns
869
+ -------
870
+ PyCapsule
871
+ A capsule containing a C ArrowArrayStream struct.
872
+ """
873
+ cdef:
874
+ ArrowArrayStream* c_stream
875
+
876
+ if requested_schema is not None:
877
+ out_schema = Schema._import_from_c_capsule(requested_schema)
878
+ if self.schema != out_schema:
879
+ return self.cast(out_schema).__arrow_c_stream__()
880
+
881
+ stream_capsule = alloc_c_stream(&c_stream)
882
+
883
+ with nogil:
884
+ check_status(ExportRecordBatchReader(self.reader, c_stream))
885
+
886
+ return stream_capsule
887
+
888
+ @staticmethod
889
+ def _import_from_c_capsule(stream):
890
+ """
891
+ Import RecordBatchReader from a C ArrowArrayStream PyCapsule.
892
+
893
+ Parameters
894
+ ----------
895
+ stream: PyCapsule
896
+ A capsule containing a C ArrowArrayStream PyCapsule.
897
+
898
+ Returns
899
+ -------
900
+ RecordBatchReader
901
+ """
902
+ cdef:
903
+ ArrowArrayStream* c_stream
904
+ shared_ptr[CRecordBatchReader] c_reader
905
+ RecordBatchReader self
906
+
907
+ c_stream = <ArrowArrayStream*>PyCapsule_GetPointer(
908
+ stream, 'arrow_array_stream'
909
+ )
910
+
911
+ with nogil:
912
+ c_reader = GetResultValue(ImportRecordBatchReader(c_stream))
913
+
914
+ self = RecordBatchReader.__new__(RecordBatchReader)
915
+ self.reader = c_reader
916
+ return self
917
+
918
+ @staticmethod
919
+ def from_stream(data, schema=None):
920
+ """
921
+ Create RecordBatchReader from a Arrow-compatible stream object.
922
+
923
+ This accepts objects implementing the Arrow PyCapsule Protocol for
924
+ streams, i.e. objects that have a ``__arrow_c_stream__`` method.
925
+
926
+ Parameters
927
+ ----------
928
+ data : Arrow-compatible stream object
929
+ Any object that implements the Arrow PyCapsule Protocol for
930
+ streams.
931
+ schema : Schema, default None
932
+ The schema to which the stream should be casted, if supported
933
+ by the stream object.
934
+
935
+ Returns
936
+ -------
937
+ RecordBatchReader
938
+ """
939
+
940
+ if not hasattr(data, "__arrow_c_stream__"):
941
+ raise TypeError(
942
+ "Expected an object implementing the Arrow PyCapsule Protocol for "
943
+ "streams (i.e. having a `__arrow_c_stream__` method), "
944
+ f"got {type(data)!r}."
945
+ )
946
+
947
+ if schema is not None:
948
+ if not hasattr(schema, "__arrow_c_schema__"):
949
+ raise TypeError(
950
+ "Expected an object implementing the Arrow PyCapsule Protocol for "
951
+ "schema (i.e. having a `__arrow_c_schema__` method), "
952
+ f"got {type(schema)!r}."
953
+ )
954
+ requested = schema.__arrow_c_schema__()
955
+ else:
956
+ requested = None
957
+
958
+ capsule = data.__arrow_c_stream__(requested)
959
+ return RecordBatchReader._import_from_c_capsule(capsule)
960
+
961
+ @staticmethod
962
+ def from_batches(Schema schema not None, batches):
963
+ """
964
+ Create RecordBatchReader from an iterable of batches.
965
+
966
+ Parameters
967
+ ----------
968
+ schema : Schema
969
+ The shared schema of the record batches
970
+ batches : Iterable[RecordBatch]
971
+ The batches that this reader will return.
972
+
973
+ Returns
974
+ -------
975
+ reader : RecordBatchReader
976
+ """
977
+ cdef:
978
+ shared_ptr[CSchema] c_schema
979
+ shared_ptr[CRecordBatchReader] c_reader
980
+ RecordBatchReader self
981
+
982
+ c_schema = pyarrow_unwrap_schema(schema)
983
+ c_reader = GetResultValue(CPyRecordBatchReader.Make(
984
+ c_schema, batches))
985
+
986
+ self = RecordBatchReader.__new__(RecordBatchReader)
987
+ self.reader = c_reader
988
+ return self
989
+
990
+
991
+ cdef class _RecordBatchStreamReader(RecordBatchReader):
992
+ cdef:
993
+ shared_ptr[CInputStream] in_stream
994
+ CIpcReadOptions options
995
+ CRecordBatchStreamReader* stream_reader
996
+
997
+ def __cinit__(self):
998
+ pass
999
+
1000
+ def _open(self, source, IpcReadOptions options=IpcReadOptions(),
1001
+ MemoryPool memory_pool=None):
1002
+ self.options = options.c_options
1003
+ self.options.memory_pool = maybe_unbox_memory_pool(memory_pool)
1004
+ _get_input_stream(source, &self.in_stream)
1005
+ with nogil:
1006
+ self.reader = GetResultValue(CRecordBatchStreamReader.Open(
1007
+ self.in_stream, self.options))
1008
+ self.stream_reader = <CRecordBatchStreamReader*> self.reader.get()
1009
+
1010
+ @property
1011
+ def stats(self):
1012
+ """
1013
+ Current IPC read statistics.
1014
+ """
1015
+ if not self.reader:
1016
+ raise ValueError("Operation on closed reader")
1017
+ return _wrap_read_stats(self.stream_reader.stats())
1018
+
1019
+
1020
+ cdef class _RecordBatchFileWriter(_RecordBatchStreamWriter):
1021
+
1022
+ def _open(self, sink, Schema schema not None,
1023
+ IpcWriteOptions options=IpcWriteOptions()):
1024
+ cdef:
1025
+ shared_ptr[COutputStream] c_sink
1026
+
1027
+ self.options = options.c_options
1028
+ get_writer(sink, &c_sink)
1029
+ with nogil:
1030
+ self.writer = GetResultValue(
1031
+ MakeFileWriter(c_sink, schema.sp_schema, self.options))
1032
+
1033
+ _RecordBatchWithMetadata = namedtuple(
1034
+ 'RecordBatchWithMetadata',
1035
+ ('batch', 'custom_metadata'))
1036
+
1037
+
1038
+ class RecordBatchWithMetadata(_RecordBatchWithMetadata):
1039
+ """RecordBatch with its custom metadata
1040
+
1041
+ Parameters
1042
+ ----------
1043
+ batch : RecordBatch
1044
+ custom_metadata : KeyValueMetadata
1045
+ """
1046
+ __slots__ = ()
1047
+
1048
+
1049
+ @staticmethod
1050
+ cdef _wrap_record_batch_with_metadata(CRecordBatchWithMetadata c):
1051
+ return RecordBatchWithMetadata(pyarrow_wrap_batch(c.batch),
1052
+ pyarrow_wrap_metadata(c.custom_metadata))
1053
+
1054
+
1055
+ cdef class _RecordBatchFileReader(_Weakrefable):
1056
+ cdef:
1057
+ SharedPtrNoGIL[CRecordBatchFileReader] reader
1058
+ shared_ptr[CRandomAccessFile] file
1059
+ CIpcReadOptions options
1060
+
1061
+ cdef readonly:
1062
+ Schema schema
1063
+
1064
+ def __cinit__(self):
1065
+ pass
1066
+
1067
+ def _open(self, source, footer_offset=None,
1068
+ IpcReadOptions options=IpcReadOptions(),
1069
+ MemoryPool memory_pool=None):
1070
+ self.options = options.c_options
1071
+ self.options.memory_pool = maybe_unbox_memory_pool(memory_pool)
1072
+ try:
1073
+ source = as_buffer(source)
1074
+ except TypeError:
1075
+ pass
1076
+
1077
+ get_reader(source, False, &self.file)
1078
+
1079
+ cdef int64_t offset = 0
1080
+ if footer_offset is not None:
1081
+ offset = footer_offset
1082
+
1083
+ with nogil:
1084
+ if offset != 0:
1085
+ self.reader = GetResultValue(
1086
+ CRecordBatchFileReader.Open2(self.file.get(), offset,
1087
+ self.options))
1088
+
1089
+ else:
1090
+ self.reader = GetResultValue(
1091
+ CRecordBatchFileReader.Open(self.file.get(),
1092
+ self.options))
1093
+
1094
+ self.schema = pyarrow_wrap_schema(self.reader.get().schema())
1095
+
1096
+ @property
1097
+ def num_record_batches(self):
1098
+ """
1099
+ The number of record batches in the IPC file.
1100
+ """
1101
+ return self.reader.get().num_record_batches()
1102
+
1103
+ def get_batch(self, int i):
1104
+ """
1105
+ Read the record batch with the given index.
1106
+
1107
+ Parameters
1108
+ ----------
1109
+ i : int
1110
+ The index of the record batch in the IPC file.
1111
+
1112
+ Returns
1113
+ -------
1114
+ batch : RecordBatch
1115
+ """
1116
+ cdef shared_ptr[CRecordBatch] batch
1117
+
1118
+ if i < 0 or i >= self.num_record_batches:
1119
+ raise ValueError('Batch number {0} out of range'.format(i))
1120
+
1121
+ with nogil:
1122
+ batch = GetResultValue(self.reader.get().ReadRecordBatch(i))
1123
+
1124
+ return pyarrow_wrap_batch(batch)
1125
+
1126
+ # TODO(wesm): ARROW-503: Function was renamed. Remove after a period of
1127
+ # time has passed
1128
+ get_record_batch = get_batch
1129
+
1130
+ def get_batch_with_custom_metadata(self, int i):
1131
+ """
1132
+ Read the record batch with the given index along with
1133
+ its custom metadata
1134
+
1135
+ Parameters
1136
+ ----------
1137
+ i : int
1138
+ The index of the record batch in the IPC file.
1139
+
1140
+ Returns
1141
+ -------
1142
+ batch : RecordBatch
1143
+ custom_metadata : KeyValueMetadata
1144
+ """
1145
+ cdef:
1146
+ CRecordBatchWithMetadata batch_with_metadata
1147
+
1148
+ if i < 0 or i >= self.num_record_batches:
1149
+ raise ValueError('Batch number {0} out of range'.format(i))
1150
+
1151
+ with nogil:
1152
+ batch_with_metadata = GetResultValue(
1153
+ self.reader.get().ReadRecordBatchWithCustomMetadata(i))
1154
+
1155
+ return _wrap_record_batch_with_metadata(batch_with_metadata)
1156
+
1157
+ def read_all(self):
1158
+ """
1159
+ Read all record batches as a pyarrow.Table
1160
+ """
1161
+ cdef:
1162
+ vector[shared_ptr[CRecordBatch]] batches
1163
+ shared_ptr[CTable] table
1164
+ int i, nbatches
1165
+
1166
+ nbatches = self.num_record_batches
1167
+
1168
+ batches.resize(nbatches)
1169
+ with nogil:
1170
+ for i in range(nbatches):
1171
+ batches[i] = GetResultValue(self.reader.get()
1172
+ .ReadRecordBatch(i))
1173
+ table = GetResultValue(
1174
+ CTable.FromRecordBatches(self.schema.sp_schema, move(batches)))
1175
+
1176
+ return pyarrow_wrap_table(table)
1177
+
1178
+ read_pandas = _ReadPandasMixin.read_pandas
1179
+
1180
+ def __enter__(self):
1181
+ return self
1182
+
1183
+ def __exit__(self, exc_type, exc_value, traceback):
1184
+ pass
1185
+
1186
+ @property
1187
+ def stats(self):
1188
+ """
1189
+ Current IPC read statistics.
1190
+ """
1191
+ if not self.reader:
1192
+ raise ValueError("Operation on closed reader")
1193
+ return _wrap_read_stats(self.reader.get().stats())
1194
+
1195
+
1196
+ def get_tensor_size(Tensor tensor):
1197
+ """
1198
+ Return total size of serialized Tensor including metadata and padding.
1199
+
1200
+ Parameters
1201
+ ----------
1202
+ tensor : Tensor
1203
+ The tensor for which we want to known the size.
1204
+ """
1205
+ cdef int64_t size
1206
+ with nogil:
1207
+ check_status(GetTensorSize(deref(tensor.tp), &size))
1208
+ return size
1209
+
1210
+
1211
+ def get_record_batch_size(RecordBatch batch):
1212
+ """
1213
+ Return total size of serialized RecordBatch including metadata and padding.
1214
+
1215
+ Parameters
1216
+ ----------
1217
+ batch : RecordBatch
1218
+ The recordbatch for which we want to know the size.
1219
+ """
1220
+ cdef int64_t size
1221
+ with nogil:
1222
+ check_status(GetRecordBatchSize(deref(batch.batch), &size))
1223
+ return size
1224
+
1225
+
1226
+ def write_tensor(Tensor tensor, NativeFile dest):
1227
+ """
1228
+ Write pyarrow.Tensor to pyarrow.NativeFile object its current position.
1229
+
1230
+ Parameters
1231
+ ----------
1232
+ tensor : pyarrow.Tensor
1233
+ dest : pyarrow.NativeFile
1234
+
1235
+ Returns
1236
+ -------
1237
+ bytes_written : int
1238
+ Total number of bytes written to the file
1239
+ """
1240
+ cdef:
1241
+ int32_t metadata_length
1242
+ int64_t body_length
1243
+
1244
+ handle = dest.get_output_stream()
1245
+
1246
+ with nogil:
1247
+ check_status(
1248
+ WriteTensor(deref(tensor.tp), handle.get(),
1249
+ &metadata_length, &body_length))
1250
+
1251
+ return metadata_length + body_length
1252
+
1253
+
1254
+ cdef NativeFile as_native_file(source):
1255
+ if not isinstance(source, NativeFile):
1256
+ if hasattr(source, 'read'):
1257
+ source = PythonFile(source)
1258
+ else:
1259
+ source = BufferReader(source)
1260
+
1261
+ if not isinstance(source, NativeFile):
1262
+ raise ValueError('Unable to read message from object with type: {0}'
1263
+ .format(type(source)))
1264
+ return source
1265
+
1266
+
1267
+ def read_tensor(source):
1268
+ """Read pyarrow.Tensor from pyarrow.NativeFile object from current
1269
+ position. If the file source supports zero copy (e.g. a memory map), then
1270
+ this operation does not allocate any memory. This function not assume that
1271
+ the stream is aligned
1272
+
1273
+ Parameters
1274
+ ----------
1275
+ source : pyarrow.NativeFile
1276
+
1277
+ Returns
1278
+ -------
1279
+ tensor : Tensor
1280
+
1281
+ """
1282
+ cdef:
1283
+ shared_ptr[CTensor] sp_tensor
1284
+ CInputStream* c_stream
1285
+ NativeFile nf = as_native_file(source)
1286
+
1287
+ c_stream = nf.get_input_stream().get()
1288
+ with nogil:
1289
+ sp_tensor = GetResultValue(ReadTensor(c_stream))
1290
+ return pyarrow_wrap_tensor(sp_tensor)
1291
+
1292
+
1293
+ def read_message(source):
1294
+ """
1295
+ Read length-prefixed message from file or buffer-like object
1296
+
1297
+ Parameters
1298
+ ----------
1299
+ source : pyarrow.NativeFile, file-like object, or buffer-like object
1300
+
1301
+ Returns
1302
+ -------
1303
+ message : Message
1304
+ """
1305
+ cdef:
1306
+ Message result = Message.__new__(Message)
1307
+ CInputStream* c_stream
1308
+
1309
+ cdef NativeFile nf = as_native_file(source)
1310
+ c_stream = nf.get_input_stream().get()
1311
+
1312
+ with nogil:
1313
+ result.message = move(
1314
+ GetResultValue(ReadMessage(c_stream, c_default_memory_pool())))
1315
+
1316
+ if result.message == nullptr:
1317
+ raise EOFError("End of Arrow stream")
1318
+
1319
+ return result
1320
+
1321
+
1322
+ def read_schema(obj, DictionaryMemo dictionary_memo=None):
1323
+ """
1324
+ Read Schema from message or buffer
1325
+
1326
+ Parameters
1327
+ ----------
1328
+ obj : buffer or Message
1329
+ dictionary_memo : DictionaryMemo, optional
1330
+ Needed to be able to reconstruct dictionary-encoded fields
1331
+ with read_record_batch
1332
+
1333
+ Returns
1334
+ -------
1335
+ schema : Schema
1336
+ """
1337
+ cdef:
1338
+ shared_ptr[CSchema] result
1339
+ shared_ptr[CRandomAccessFile] cpp_file
1340
+ Message message
1341
+ CDictionaryMemo temp_memo
1342
+ CDictionaryMemo* arg_dict_memo
1343
+
1344
+ if dictionary_memo is not None:
1345
+ arg_dict_memo = dictionary_memo.memo
1346
+ else:
1347
+ arg_dict_memo = &temp_memo
1348
+
1349
+ if isinstance(obj, Message):
1350
+ message = obj
1351
+ with nogil:
1352
+ result = GetResultValue(ReadSchema(
1353
+ deref(message.message.get()), arg_dict_memo))
1354
+ else:
1355
+ get_reader(obj, False, &cpp_file)
1356
+ with nogil:
1357
+ result = GetResultValue(ReadSchema(cpp_file.get(), arg_dict_memo))
1358
+
1359
+ return pyarrow_wrap_schema(result)
1360
+
1361
+
1362
+ def read_record_batch(obj, Schema schema,
1363
+ DictionaryMemo dictionary_memo=None):
1364
+ """
1365
+ Read RecordBatch from message, given a known schema. If reading data from a
1366
+ complete IPC stream, use ipc.open_stream instead
1367
+
1368
+ Parameters
1369
+ ----------
1370
+ obj : Message or Buffer-like
1371
+ schema : Schema
1372
+ dictionary_memo : DictionaryMemo, optional
1373
+ If message contains dictionaries, must pass a populated
1374
+ DictionaryMemo
1375
+
1376
+ Returns
1377
+ -------
1378
+ batch : RecordBatch
1379
+ """
1380
+ cdef:
1381
+ shared_ptr[CRecordBatch] result
1382
+ Message message
1383
+ CDictionaryMemo temp_memo
1384
+ CDictionaryMemo* arg_dict_memo
1385
+
1386
+ if isinstance(obj, Message):
1387
+ message = obj
1388
+ else:
1389
+ message = read_message(obj)
1390
+
1391
+ if dictionary_memo is not None:
1392
+ arg_dict_memo = dictionary_memo.memo
1393
+ else:
1394
+ arg_dict_memo = &temp_memo
1395
+
1396
+ with nogil:
1397
+ result = GetResultValue(
1398
+ ReadRecordBatch(deref(message.message.get()),
1399
+ schema.sp_schema,
1400
+ arg_dict_memo,
1401
+ CIpcReadOptions.Defaults()))
1402
+
1403
+ return pyarrow_wrap_batch(result)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # Arrow file and stream reader/writer classes, and other messaging tools
19
+
20
+ import os
21
+
22
+ import pyarrow as pa
23
+
24
+ from pyarrow.lib import (IpcReadOptions, IpcWriteOptions, ReadStats, WriteStats, # noqa
25
+ Message, MessageReader,
26
+ RecordBatchReader, _ReadPandasMixin,
27
+ MetadataVersion,
28
+ read_message, read_record_batch, read_schema,
29
+ read_tensor, write_tensor,
30
+ get_record_batch_size, get_tensor_size)
31
+ import pyarrow.lib as lib
32
+
33
+
34
+ class RecordBatchStreamReader(lib._RecordBatchStreamReader):
35
+ """
36
+ Reader for the Arrow streaming binary format.
37
+
38
+ Parameters
39
+ ----------
40
+ source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
41
+ Either an in-memory buffer, or a readable file object.
42
+ If you want to use memory map use MemoryMappedFile as source.
43
+ options : pyarrow.ipc.IpcReadOptions
44
+ Options for IPC deserialization.
45
+ If None, default values will be used.
46
+ memory_pool : MemoryPool, default None
47
+ If None, default memory pool is used.
48
+ """
49
+
50
+ def __init__(self, source, *, options=None, memory_pool=None):
51
+ options = _ensure_default_ipc_read_options(options)
52
+ self._open(source, options=options, memory_pool=memory_pool)
53
+
54
+
55
+ _ipc_writer_class_doc = """\
56
+ Parameters
57
+ ----------
58
+ sink : str, pyarrow.NativeFile, or file-like Python object
59
+ Either a file path, or a writable file object.
60
+ schema : pyarrow.Schema
61
+ The Arrow schema for data to be written to the file.
62
+ use_legacy_format : bool, default None
63
+ Deprecated in favor of setting options. Cannot be provided with
64
+ options.
65
+
66
+ If None, False will be used unless this default is overridden by
67
+ setting the environment variable ARROW_PRE_0_15_IPC_FORMAT=1
68
+ options : pyarrow.ipc.IpcWriteOptions
69
+ Options for IPC serialization.
70
+
71
+ If None, default values will be used: the legacy format will not
72
+ be used unless overridden by setting the environment variable
73
+ ARROW_PRE_0_15_IPC_FORMAT=1, and the V5 metadata version will be
74
+ used unless overridden by setting the environment variable
75
+ ARROW_PRE_1_0_METADATA_VERSION=1."""
76
+
77
+
78
+ class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
79
+ __doc__ = """Writer for the Arrow streaming binary format
80
+
81
+ {}""".format(_ipc_writer_class_doc)
82
+
83
+ def __init__(self, sink, schema, *, use_legacy_format=None, options=None):
84
+ options = _get_legacy_format_default(use_legacy_format, options)
85
+ self._open(sink, schema, options=options)
86
+
87
+
88
+ class RecordBatchFileReader(lib._RecordBatchFileReader):
89
+ """
90
+ Class for reading Arrow record batch data from the Arrow binary file format
91
+
92
+ Parameters
93
+ ----------
94
+ source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
95
+ Either an in-memory buffer, or a readable file object.
96
+ If you want to use memory map use MemoryMappedFile as source.
97
+ footer_offset : int, default None
98
+ If the file is embedded in some larger file, this is the byte offset to
99
+ the very end of the file data
100
+ options : pyarrow.ipc.IpcReadOptions
101
+ Options for IPC serialization.
102
+ If None, default values will be used.
103
+ memory_pool : MemoryPool, default None
104
+ If None, default memory pool is used.
105
+ """
106
+
107
+ def __init__(self, source, footer_offset=None, *, options=None,
108
+ memory_pool=None):
109
+ options = _ensure_default_ipc_read_options(options)
110
+ self._open(source, footer_offset=footer_offset,
111
+ options=options, memory_pool=memory_pool)
112
+
113
+
114
+ class RecordBatchFileWriter(lib._RecordBatchFileWriter):
115
+
116
+ __doc__ = """Writer to create the Arrow binary file format
117
+
118
+ {}""".format(_ipc_writer_class_doc)
119
+
120
+ def __init__(self, sink, schema, *, use_legacy_format=None, options=None):
121
+ options = _get_legacy_format_default(use_legacy_format, options)
122
+ self._open(sink, schema, options=options)
123
+
124
+
125
+ def _get_legacy_format_default(use_legacy_format, options):
126
+ if use_legacy_format is not None and options is not None:
127
+ raise ValueError(
128
+ "Can provide at most one of options and use_legacy_format")
129
+ elif options:
130
+ if not isinstance(options, IpcWriteOptions):
131
+ raise TypeError("expected IpcWriteOptions, got {}"
132
+ .format(type(options)))
133
+ return options
134
+
135
+ metadata_version = MetadataVersion.V5
136
+ if use_legacy_format is None:
137
+ use_legacy_format = \
138
+ bool(int(os.environ.get('ARROW_PRE_0_15_IPC_FORMAT', '0')))
139
+ if bool(int(os.environ.get('ARROW_PRE_1_0_METADATA_VERSION', '0'))):
140
+ metadata_version = MetadataVersion.V4
141
+ return IpcWriteOptions(use_legacy_format=use_legacy_format,
142
+ metadata_version=metadata_version)
143
+
144
+
145
+ def _ensure_default_ipc_read_options(options):
146
+ if options and not isinstance(options, IpcReadOptions):
147
+ raise TypeError(
148
+ "expected IpcReadOptions, got {}".format(type(options))
149
+ )
150
+ return options or IpcReadOptions()
151
+
152
+
153
+ def new_stream(sink, schema, *, use_legacy_format=None, options=None):
154
+ return RecordBatchStreamWriter(sink, schema,
155
+ use_legacy_format=use_legacy_format,
156
+ options=options)
157
+
158
+
159
+ new_stream.__doc__ = """\
160
+ Create an Arrow columnar IPC stream writer instance
161
+
162
+ {}
163
+
164
+ Returns
165
+ -------
166
+ writer : RecordBatchStreamWriter
167
+ A writer for the given sink
168
+ """.format(_ipc_writer_class_doc)
169
+
170
+
171
+ def open_stream(source, *, options=None, memory_pool=None):
172
+ """
173
+ Create reader for Arrow streaming format.
174
+
175
+ Parameters
176
+ ----------
177
+ source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
178
+ Either an in-memory buffer, or a readable file object.
179
+ options : pyarrow.ipc.IpcReadOptions
180
+ Options for IPC serialization.
181
+ If None, default values will be used.
182
+ memory_pool : MemoryPool, default None
183
+ If None, default memory pool is used.
184
+
185
+ Returns
186
+ -------
187
+ reader : RecordBatchStreamReader
188
+ A reader for the given source
189
+ """
190
+ return RecordBatchStreamReader(source, options=options,
191
+ memory_pool=memory_pool)
192
+
193
+
194
+ def new_file(sink, schema, *, use_legacy_format=None, options=None):
195
+ return RecordBatchFileWriter(sink, schema,
196
+ use_legacy_format=use_legacy_format,
197
+ options=options)
198
+
199
+
200
+ new_file.__doc__ = """\
201
+ Create an Arrow columnar IPC file writer instance
202
+
203
+ {}
204
+
205
+ Returns
206
+ -------
207
+ writer : RecordBatchFileWriter
208
+ A writer for the given sink
209
+ """.format(_ipc_writer_class_doc)
210
+
211
+
212
+ def open_file(source, footer_offset=None, *, options=None, memory_pool=None):
213
+ """
214
+ Create reader for Arrow file format.
215
+
216
+ Parameters
217
+ ----------
218
+ source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
219
+ Either an in-memory buffer, or a readable file object.
220
+ footer_offset : int, default None
221
+ If the file is embedded in some larger file, this is the byte offset to
222
+ the very end of the file data.
223
+ options : pyarrow.ipc.IpcReadOptions
224
+ Options for IPC serialization.
225
+ If None, default values will be used.
226
+ memory_pool : MemoryPool, default None
227
+ If None, default memory pool is used.
228
+
229
+ Returns
230
+ -------
231
+ reader : RecordBatchFileReader
232
+ A reader for the given source
233
+ """
234
+ return RecordBatchFileReader(
235
+ source, footer_offset=footer_offset,
236
+ options=options, memory_pool=memory_pool)
237
+
238
+
239
+ def serialize_pandas(df, *, nthreads=None, preserve_index=None):
240
+ """
241
+ Serialize a pandas DataFrame into a buffer protocol compatible object.
242
+
243
+ Parameters
244
+ ----------
245
+ df : pandas.DataFrame
246
+ nthreads : int, default None
247
+ Number of threads to use for conversion to Arrow, default all CPUs.
248
+ preserve_index : bool, default None
249
+ The default of None will store the index as a column, except for
250
+ RangeIndex which is stored as metadata only. If True, always
251
+ preserve the pandas index data as a column. If False, no index
252
+ information is saved and the result will have a default RangeIndex.
253
+
254
+ Returns
255
+ -------
256
+ buf : buffer
257
+ An object compatible with the buffer protocol.
258
+ """
259
+ batch = pa.RecordBatch.from_pandas(df, nthreads=nthreads,
260
+ preserve_index=preserve_index)
261
+ sink = pa.BufferOutputStream()
262
+ with pa.RecordBatchStreamWriter(sink, batch.schema) as writer:
263
+ writer.write_batch(batch)
264
+ return sink.getvalue()
265
+
266
+
267
+ def deserialize_pandas(buf, *, use_threads=True):
268
+ """Deserialize a buffer protocol compatible object into a pandas DataFrame.
269
+
270
+ Parameters
271
+ ----------
272
+ buf : buffer
273
+ An object compatible with the buffer protocol.
274
+ use_threads : bool, default True
275
+ Whether to parallelize the conversion using multiple threads.
276
+
277
+ Returns
278
+ -------
279
+ df : pandas.DataFrame
280
+ The buffer deserialized as pandas DataFrame
281
+ """
282
+ buffer_reader = pa.BufferReader(buf)
283
+ with pa.RecordBatchStreamReader(buffer_reader) as reader:
284
+ table = reader.read_all()
285
+ return table.to_pandas(use_threads=use_threads)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/jvm.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ """
18
+ Functions to interact with Arrow memory allocated by Arrow Java.
19
+
20
+ These functions convert the objects holding the metadata, the actual
21
+ data is not copied at all.
22
+
23
+ This will only work with a JVM running in the same process such as provided
24
+ through jpype. Modules that talk to a remote JVM like py4j will not work as the
25
+ memory addresses reported by them are not reachable in the python process.
26
+ """
27
+
28
+ import pyarrow as pa
29
+
30
+
31
+ class _JvmBufferNanny:
32
+ """
33
+ An object that keeps a org.apache.arrow.memory.ArrowBuf's underlying
34
+ memory alive.
35
+ """
36
+ ref_manager = None
37
+
38
+ def __init__(self, jvm_buf):
39
+ ref_manager = jvm_buf.getReferenceManager()
40
+ # Will raise a java.lang.IllegalArgumentException if the buffer
41
+ # is already freed. It seems that exception cannot easily be
42
+ # caught...
43
+ ref_manager.retain()
44
+ self.ref_manager = ref_manager
45
+
46
+ def __del__(self):
47
+ if self.ref_manager is not None:
48
+ self.ref_manager.release()
49
+
50
+
51
+ def jvm_buffer(jvm_buf):
52
+ """
53
+ Construct an Arrow buffer from org.apache.arrow.memory.ArrowBuf
54
+
55
+ Parameters
56
+ ----------
57
+
58
+ jvm_buf: org.apache.arrow.memory.ArrowBuf
59
+ Arrow Buffer representation on the JVM.
60
+
61
+ Returns
62
+ -------
63
+ pyarrow.Buffer
64
+ Python Buffer that references the JVM memory.
65
+ """
66
+ nanny = _JvmBufferNanny(jvm_buf)
67
+ address = jvm_buf.memoryAddress()
68
+ size = jvm_buf.capacity()
69
+ return pa.foreign_buffer(address, size, base=nanny)
70
+
71
+
72
+ def _from_jvm_int_type(jvm_type):
73
+ """
74
+ Convert a JVM int type to its Python equivalent.
75
+
76
+ Parameters
77
+ ----------
78
+ jvm_type : org.apache.arrow.vector.types.pojo.ArrowType$Int
79
+
80
+ Returns
81
+ -------
82
+ typ : pyarrow.DataType
83
+ """
84
+
85
+ bit_width = jvm_type.getBitWidth()
86
+ if jvm_type.getIsSigned():
87
+ if bit_width == 8:
88
+ return pa.int8()
89
+ elif bit_width == 16:
90
+ return pa.int16()
91
+ elif bit_width == 32:
92
+ return pa.int32()
93
+ elif bit_width == 64:
94
+ return pa.int64()
95
+ else:
96
+ if bit_width == 8:
97
+ return pa.uint8()
98
+ elif bit_width == 16:
99
+ return pa.uint16()
100
+ elif bit_width == 32:
101
+ return pa.uint32()
102
+ elif bit_width == 64:
103
+ return pa.uint64()
104
+
105
+
106
+ def _from_jvm_float_type(jvm_type):
107
+ """
108
+ Convert a JVM float type to its Python equivalent.
109
+
110
+ Parameters
111
+ ----------
112
+ jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$FloatingPoint
113
+
114
+ Returns
115
+ -------
116
+ typ: pyarrow.DataType
117
+ """
118
+ precision = jvm_type.getPrecision().toString()
119
+ if precision == 'HALF':
120
+ return pa.float16()
121
+ elif precision == 'SINGLE':
122
+ return pa.float32()
123
+ elif precision == 'DOUBLE':
124
+ return pa.float64()
125
+
126
+
127
+ def _from_jvm_time_type(jvm_type):
128
+ """
129
+ Convert a JVM time type to its Python equivalent.
130
+
131
+ Parameters
132
+ ----------
133
+ jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Time
134
+
135
+ Returns
136
+ -------
137
+ typ: pyarrow.DataType
138
+ """
139
+ time_unit = jvm_type.getUnit().toString()
140
+ if time_unit == 'SECOND':
141
+ assert jvm_type.getBitWidth() == 32
142
+ return pa.time32('s')
143
+ elif time_unit == 'MILLISECOND':
144
+ assert jvm_type.getBitWidth() == 32
145
+ return pa.time32('ms')
146
+ elif time_unit == 'MICROSECOND':
147
+ assert jvm_type.getBitWidth() == 64
148
+ return pa.time64('us')
149
+ elif time_unit == 'NANOSECOND':
150
+ assert jvm_type.getBitWidth() == 64
151
+ return pa.time64('ns')
152
+
153
+
154
+ def _from_jvm_timestamp_type(jvm_type):
155
+ """
156
+ Convert a JVM timestamp type to its Python equivalent.
157
+
158
+ Parameters
159
+ ----------
160
+ jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Timestamp
161
+
162
+ Returns
163
+ -------
164
+ typ: pyarrow.DataType
165
+ """
166
+ time_unit = jvm_type.getUnit().toString()
167
+ timezone = jvm_type.getTimezone()
168
+ if timezone is not None:
169
+ timezone = str(timezone)
170
+ if time_unit == 'SECOND':
171
+ return pa.timestamp('s', tz=timezone)
172
+ elif time_unit == 'MILLISECOND':
173
+ return pa.timestamp('ms', tz=timezone)
174
+ elif time_unit == 'MICROSECOND':
175
+ return pa.timestamp('us', tz=timezone)
176
+ elif time_unit == 'NANOSECOND':
177
+ return pa.timestamp('ns', tz=timezone)
178
+
179
+
180
+ def _from_jvm_date_type(jvm_type):
181
+ """
182
+ Convert a JVM date type to its Python equivalent
183
+
184
+ Parameters
185
+ ----------
186
+ jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Date
187
+
188
+ Returns
189
+ -------
190
+ typ: pyarrow.DataType
191
+ """
192
+ day_unit = jvm_type.getUnit().toString()
193
+ if day_unit == 'DAY':
194
+ return pa.date32()
195
+ elif day_unit == 'MILLISECOND':
196
+ return pa.date64()
197
+
198
+
199
+ def field(jvm_field):
200
+ """
201
+ Construct a Field from a org.apache.arrow.vector.types.pojo.Field
202
+ instance.
203
+
204
+ Parameters
205
+ ----------
206
+ jvm_field: org.apache.arrow.vector.types.pojo.Field
207
+
208
+ Returns
209
+ -------
210
+ pyarrow.Field
211
+ """
212
+ name = str(jvm_field.getName())
213
+ jvm_type = jvm_field.getType()
214
+
215
+ typ = None
216
+ if not jvm_type.isComplex():
217
+ type_str = jvm_type.getTypeID().toString()
218
+ if type_str == 'Null':
219
+ typ = pa.null()
220
+ elif type_str == 'Int':
221
+ typ = _from_jvm_int_type(jvm_type)
222
+ elif type_str == 'FloatingPoint':
223
+ typ = _from_jvm_float_type(jvm_type)
224
+ elif type_str == 'Utf8':
225
+ typ = pa.string()
226
+ elif type_str == 'Binary':
227
+ typ = pa.binary()
228
+ elif type_str == 'FixedSizeBinary':
229
+ typ = pa.binary(jvm_type.getByteWidth())
230
+ elif type_str == 'Bool':
231
+ typ = pa.bool_()
232
+ elif type_str == 'Time':
233
+ typ = _from_jvm_time_type(jvm_type)
234
+ elif type_str == 'Timestamp':
235
+ typ = _from_jvm_timestamp_type(jvm_type)
236
+ elif type_str == 'Date':
237
+ typ = _from_jvm_date_type(jvm_type)
238
+ elif type_str == 'Decimal':
239
+ typ = pa.decimal128(jvm_type.getPrecision(), jvm_type.getScale())
240
+ else:
241
+ raise NotImplementedError(
242
+ "Unsupported JVM type: {}".format(type_str))
243
+ else:
244
+ # TODO: The following JVM types are not implemented:
245
+ # Struct, List, FixedSizeList, Union, Dictionary
246
+ raise NotImplementedError(
247
+ "JVM field conversion only implemented for primitive types.")
248
+
249
+ nullable = jvm_field.isNullable()
250
+ jvm_metadata = jvm_field.getMetadata()
251
+ if jvm_metadata.isEmpty():
252
+ metadata = None
253
+ else:
254
+ metadata = {str(entry.getKey()): str(entry.getValue())
255
+ for entry in jvm_metadata.entrySet()}
256
+ return pa.field(name, typ, nullable, metadata)
257
+
258
+
259
+ def schema(jvm_schema):
260
+ """
261
+ Construct a Schema from a org.apache.arrow.vector.types.pojo.Schema
262
+ instance.
263
+
264
+ Parameters
265
+ ----------
266
+ jvm_schema: org.apache.arrow.vector.types.pojo.Schema
267
+
268
+ Returns
269
+ -------
270
+ pyarrow.Schema
271
+ """
272
+ fields = jvm_schema.getFields()
273
+ fields = [field(f) for f in fields]
274
+ jvm_metadata = jvm_schema.getCustomMetadata()
275
+ if jvm_metadata.isEmpty():
276
+ metadata = None
277
+ else:
278
+ metadata = {str(entry.getKey()): str(entry.getValue())
279
+ for entry in jvm_metadata.entrySet()}
280
+ return pa.schema(fields, metadata)
281
+
282
+
283
+ def array(jvm_array):
284
+ """
285
+ Construct an (Python) Array from its JVM equivalent.
286
+
287
+ Parameters
288
+ ----------
289
+ jvm_array : org.apache.arrow.vector.ValueVector
290
+
291
+ Returns
292
+ -------
293
+ array : Array
294
+ """
295
+ if jvm_array.getField().getType().isComplex():
296
+ minor_type_str = jvm_array.getMinorType().toString()
297
+ raise NotImplementedError(
298
+ "Cannot convert JVM Arrow array of type {},"
299
+ " complex types not yet implemented.".format(minor_type_str))
300
+ dtype = field(jvm_array.getField()).type
301
+ buffers = [jvm_buffer(buf)
302
+ for buf in list(jvm_array.getBuffers(False))]
303
+
304
+ # If JVM has an empty Vector, buffer list will be empty so create manually
305
+ if len(buffers) == 0:
306
+ return pa.array([], type=dtype)
307
+
308
+ length = jvm_array.getValueCount()
309
+ null_count = jvm_array.getNullCount()
310
+ return pa.Array.from_buffers(dtype, length, buffers, null_count)
311
+
312
+
313
+ def record_batch(jvm_vector_schema_root):
314
+ """
315
+ Construct a (Python) RecordBatch from a JVM VectorSchemaRoot
316
+
317
+ Parameters
318
+ ----------
319
+ jvm_vector_schema_root : org.apache.arrow.vector.VectorSchemaRoot
320
+
321
+ Returns
322
+ -------
323
+ record_batch: pyarrow.RecordBatch
324
+ """
325
+ pa_schema = schema(jvm_vector_schema_root.getSchema())
326
+
327
+ arrays = []
328
+ for name in pa_schema.names:
329
+ arrays.append(array(jvm_vector_schema_root.getVector(name)))
330
+
331
+ return pa.RecordBatch.from_arrays(
332
+ arrays,
333
+ pa_schema.names,
334
+ metadata=pa_schema.metadata
335
+ )
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib.pxd ADDED
@@ -0,0 +1,770 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # cython: language_level = 3
19
+
20
+ from cpython cimport PyObject
21
+ from libcpp cimport nullptr, bool as c_bool
22
+ from libcpp.cast cimport dynamic_cast
23
+ from libcpp.memory cimport dynamic_pointer_cast
24
+ from pyarrow.includes.common cimport *
25
+ from pyarrow.includes.libarrow cimport *
26
+ from pyarrow.includes.libarrow_python cimport *
27
+
28
+ # Will be available in Cython 3, not backported
29
+ # ref: https://github.com/cython/cython/issues/3293#issuecomment-1223058101
30
+ cdef extern from "<optional>" namespace "std" nogil:
31
+ cdef cppclass nullopt_t:
32
+ nullopt_t()
33
+
34
+ cdef nullopt_t nullopt
35
+
36
+ cdef cppclass optional[T]:
37
+ ctypedef T value_type
38
+ optional()
39
+ optional(nullopt_t)
40
+ optional(optional&) except +
41
+ optional(T&) except +
42
+ c_bool has_value()
43
+ T& value()
44
+ T& value_or[U](U& default_value)
45
+ void swap(optional&)
46
+ void reset()
47
+ T& emplace(...)
48
+ T& operator*()
49
+ # T* operator->() # Not Supported
50
+ optional& operator=(optional&)
51
+ optional& operator=[U](U&)
52
+ c_bool operator bool()
53
+ c_bool operator!()
54
+ c_bool operator==[U](optional&, U&)
55
+ c_bool operator!=[U](optional&, U&)
56
+ c_bool operator<[U](optional&, U&)
57
+ c_bool operator>[U](optional&, U&)
58
+ c_bool operator<=[U](optional&, U&)
59
+ c_bool operator>=[U](optional&, U&)
60
+
61
+ optional[T] make_optional[T](...) except +
62
+
63
+ cdef extern from "Python.h":
64
+ int PySlice_Check(object)
65
+
66
+
67
+ cdef int check_status(const CStatus& status) except -1 nogil
68
+ cdef object convert_status(const CStatus& status)
69
+
70
+
71
+ cdef class _Weakrefable:
72
+ cdef object __weakref__
73
+
74
+
75
+ cdef class IpcWriteOptions(_Weakrefable):
76
+ cdef:
77
+ CIpcWriteOptions c_options
78
+
79
+
80
+ cdef class IpcReadOptions(_Weakrefable):
81
+ cdef:
82
+ CIpcReadOptions c_options
83
+
84
+
85
+ cdef class Message(_Weakrefable):
86
+ cdef:
87
+ unique_ptr[CMessage] message
88
+
89
+
90
+ cdef class MemoryPool(_Weakrefable):
91
+ cdef:
92
+ CMemoryPool* pool
93
+
94
+ cdef void init(self, CMemoryPool* pool)
95
+
96
+
97
+ cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool)
98
+
99
+
100
+ cdef object box_memory_pool(CMemoryPool* pool)
101
+
102
+
103
+ cdef class DataType(_Weakrefable):
104
+ cdef:
105
+ shared_ptr[CDataType] sp_type
106
+ CDataType* type
107
+ bytes pep3118_format
108
+
109
+ cdef void init(self, const shared_ptr[CDataType]& type) except *
110
+ cpdef Field field(self, i)
111
+
112
+
113
+ cdef class ListType(DataType):
114
+ cdef:
115
+ const CListType* list_type
116
+
117
+
118
+ cdef class LargeListType(DataType):
119
+ cdef:
120
+ const CLargeListType* list_type
121
+
122
+
123
+ cdef class ListViewType(DataType):
124
+ cdef:
125
+ const CListViewType* list_view_type
126
+
127
+
128
+ cdef class LargeListViewType(DataType):
129
+ cdef:
130
+ const CLargeListViewType* list_view_type
131
+
132
+
133
+ cdef class MapType(DataType):
134
+ cdef:
135
+ const CMapType* map_type
136
+
137
+
138
+ cdef class FixedSizeListType(DataType):
139
+ cdef:
140
+ const CFixedSizeListType* list_type
141
+
142
+
143
+ cdef class StructType(DataType):
144
+ cdef:
145
+ const CStructType* struct_type
146
+
147
+ cdef Field field_by_name(self, name)
148
+
149
+
150
+ cdef class DictionaryMemo(_Weakrefable):
151
+ cdef:
152
+ # Even though the CDictionaryMemo instance is private, we allocate
153
+ # it on the heap so as to avoid C++ ABI issues with Python wheels.
154
+ shared_ptr[CDictionaryMemo] sp_memo
155
+ CDictionaryMemo* memo
156
+
157
+
158
+ cdef class DictionaryType(DataType):
159
+ cdef:
160
+ const CDictionaryType* dict_type
161
+
162
+
163
+ cdef class TimestampType(DataType):
164
+ cdef:
165
+ const CTimestampType* ts_type
166
+
167
+
168
+ cdef class Time32Type(DataType):
169
+ cdef:
170
+ const CTime32Type* time_type
171
+
172
+
173
+ cdef class Time64Type(DataType):
174
+ cdef:
175
+ const CTime64Type* time_type
176
+
177
+
178
+ cdef class DurationType(DataType):
179
+ cdef:
180
+ const CDurationType* duration_type
181
+
182
+
183
+ cdef class FixedSizeBinaryType(DataType):
184
+ cdef:
185
+ const CFixedSizeBinaryType* fixed_size_binary_type
186
+
187
+
188
+ cdef class Decimal32Type(FixedSizeBinaryType):
189
+ cdef:
190
+ const CDecimal32Type* decimal32_type
191
+
192
+
193
+ cdef class Decimal64Type(FixedSizeBinaryType):
194
+ cdef:
195
+ const CDecimal64Type* decimal64_type
196
+
197
+
198
+ cdef class Decimal128Type(FixedSizeBinaryType):
199
+ cdef:
200
+ const CDecimal128Type* decimal128_type
201
+
202
+
203
+ cdef class Decimal256Type(FixedSizeBinaryType):
204
+ cdef:
205
+ const CDecimal256Type* decimal256_type
206
+
207
+
208
+ cdef class RunEndEncodedType(DataType):
209
+ cdef:
210
+ const CRunEndEncodedType* run_end_encoded_type
211
+
212
+
213
+ cdef class BaseExtensionType(DataType):
214
+ cdef:
215
+ const CExtensionType* ext_type
216
+
217
+
218
+ cdef class ExtensionType(BaseExtensionType):
219
+ cdef:
220
+ const CPyExtensionType* cpy_ext_type
221
+
222
+
223
+ cdef class FixedShapeTensorType(BaseExtensionType):
224
+ cdef:
225
+ const CFixedShapeTensorType* tensor_ext_type
226
+
227
+ cdef class Bool8Type(BaseExtensionType):
228
+ cdef:
229
+ const CBool8Type* bool8_ext_type
230
+
231
+ cdef class OpaqueType(BaseExtensionType):
232
+ cdef:
233
+ const COpaqueType* opaque_ext_type
234
+
235
+ cdef class UuidType(BaseExtensionType):
236
+ cdef:
237
+ const CUuidType* uuid_ext_type
238
+
239
+ cdef class JsonType(BaseExtensionType):
240
+ cdef:
241
+ const CJsonType* json_ext_type
242
+
243
+
244
+ cdef class PyExtensionType(ExtensionType):
245
+ pass
246
+
247
+
248
+ cdef class _Metadata(_Weakrefable):
249
+ # required because KeyValueMetadata also extends collections.abc.Mapping
250
+ # and the first parent class must be an extension type
251
+ pass
252
+
253
+
254
+ cdef class KeyValueMetadata(_Metadata):
255
+ cdef:
256
+ shared_ptr[const CKeyValueMetadata] wrapped
257
+ const CKeyValueMetadata* metadata
258
+
259
+ cdef void init(self, const shared_ptr[const CKeyValueMetadata]& wrapped)
260
+
261
+ @staticmethod
262
+ cdef wrap(const shared_ptr[const CKeyValueMetadata]& sp)
263
+ cdef inline shared_ptr[const CKeyValueMetadata] unwrap(self) nogil
264
+
265
+
266
+ cdef class Field(_Weakrefable):
267
+ cdef:
268
+ shared_ptr[CField] sp_field
269
+ CField* field
270
+
271
+ cdef readonly:
272
+ DataType type
273
+
274
+ cdef void init(self, const shared_ptr[CField]& field)
275
+
276
+
277
+ cdef class Schema(_Weakrefable):
278
+ cdef:
279
+ shared_ptr[CSchema] sp_schema
280
+ CSchema* schema
281
+
282
+ cdef void init(self, const vector[shared_ptr[CField]]& fields)
283
+ cdef void init_schema(self, const shared_ptr[CSchema]& schema)
284
+
285
+
286
+ cdef class Scalar(_Weakrefable):
287
+ cdef:
288
+ shared_ptr[CScalar] wrapped
289
+
290
+ cdef void init(self, const shared_ptr[CScalar]& wrapped)
291
+
292
+ @staticmethod
293
+ cdef wrap(const shared_ptr[CScalar]& wrapped)
294
+
295
+ cdef inline shared_ptr[CScalar] unwrap(self) nogil
296
+
297
+
298
+ cdef class _PandasConvertible(_Weakrefable):
299
+ pass
300
+
301
+
302
+ cdef class Array(_PandasConvertible):
303
+ cdef:
304
+ shared_ptr[CArray] sp_array
305
+ CArray* ap
306
+
307
+ cdef readonly:
308
+ DataType type
309
+ # To allow Table to propagate metadata to pandas.Series
310
+ object _name
311
+
312
+ cdef void init(self, const shared_ptr[CArray]& sp_array) except *
313
+ cdef getitem(self, int64_t i)
314
+ cdef int64_t length(self)
315
+ cdef void _assert_cpu(self) except *
316
+
317
+
318
+ cdef class Tensor(_Weakrefable):
319
+ cdef:
320
+ shared_ptr[CTensor] sp_tensor
321
+ CTensor* tp
322
+
323
+ cdef readonly:
324
+ DataType type
325
+ bytes _ssize_t_shape
326
+ bytes _ssize_t_strides
327
+
328
+ cdef void init(self, const shared_ptr[CTensor]& sp_tensor)
329
+
330
+
331
+ cdef class SparseCSRMatrix(_Weakrefable):
332
+ cdef:
333
+ shared_ptr[CSparseCSRMatrix] sp_sparse_tensor
334
+ CSparseCSRMatrix* stp
335
+
336
+ cdef readonly:
337
+ DataType type
338
+
339
+ cdef void init(self, const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor)
340
+
341
+
342
+ cdef class SparseCSCMatrix(_Weakrefable):
343
+ cdef:
344
+ shared_ptr[CSparseCSCMatrix] sp_sparse_tensor
345
+ CSparseCSCMatrix* stp
346
+
347
+ cdef readonly:
348
+ DataType type
349
+
350
+ cdef void init(self, const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor)
351
+
352
+
353
+ cdef class SparseCOOTensor(_Weakrefable):
354
+ cdef:
355
+ shared_ptr[CSparseCOOTensor] sp_sparse_tensor
356
+ CSparseCOOTensor* stp
357
+
358
+ cdef readonly:
359
+ DataType type
360
+
361
+ cdef void init(self, const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor)
362
+
363
+
364
+ cdef class SparseCSFTensor(_Weakrefable):
365
+ cdef:
366
+ shared_ptr[CSparseCSFTensor] sp_sparse_tensor
367
+ CSparseCSFTensor* stp
368
+
369
+ cdef readonly:
370
+ DataType type
371
+
372
+ cdef void init(self, const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor)
373
+
374
+
375
+ cdef class NullArray(Array):
376
+ pass
377
+
378
+
379
+ cdef class BooleanArray(Array):
380
+ pass
381
+
382
+
383
+ cdef class NumericArray(Array):
384
+ pass
385
+
386
+
387
+ cdef class IntegerArray(NumericArray):
388
+ pass
389
+
390
+
391
+ cdef class FloatingPointArray(NumericArray):
392
+ pass
393
+
394
+
395
+ cdef class Int8Array(IntegerArray):
396
+ pass
397
+
398
+
399
+ cdef class UInt8Array(IntegerArray):
400
+ pass
401
+
402
+
403
+ cdef class Int16Array(IntegerArray):
404
+ pass
405
+
406
+
407
+ cdef class UInt16Array(IntegerArray):
408
+ pass
409
+
410
+
411
+ cdef class Int32Array(IntegerArray):
412
+ pass
413
+
414
+
415
+ cdef class UInt32Array(IntegerArray):
416
+ pass
417
+
418
+
419
+ cdef class Int64Array(IntegerArray):
420
+ pass
421
+
422
+
423
+ cdef class UInt64Array(IntegerArray):
424
+ pass
425
+
426
+
427
+ cdef class HalfFloatArray(FloatingPointArray):
428
+ pass
429
+
430
+
431
+ cdef class FloatArray(FloatingPointArray):
432
+ pass
433
+
434
+
435
+ cdef class DoubleArray(FloatingPointArray):
436
+ pass
437
+
438
+
439
+ cdef class FixedSizeBinaryArray(Array):
440
+ pass
441
+
442
+
443
+ cdef class Decimal32Array(FixedSizeBinaryArray):
444
+ pass
445
+
446
+
447
+ cdef class Decimal64Array(FixedSizeBinaryArray):
448
+ pass
449
+
450
+
451
+ cdef class Decimal128Array(FixedSizeBinaryArray):
452
+ pass
453
+
454
+
455
+ cdef class Decimal256Array(FixedSizeBinaryArray):
456
+ pass
457
+
458
+
459
+ cdef class StructArray(Array):
460
+ pass
461
+
462
+
463
+ cdef class BaseListArray(Array):
464
+ pass
465
+
466
+
467
+ cdef class ListArray(BaseListArray):
468
+ pass
469
+
470
+
471
+ cdef class LargeListArray(BaseListArray):
472
+ pass
473
+
474
+
475
+ cdef class ListViewArray(BaseListArray):
476
+ pass
477
+
478
+
479
+ cdef class LargeListViewArray(BaseListArray):
480
+ pass
481
+
482
+
483
+ cdef class MapArray(ListArray):
484
+ pass
485
+
486
+
487
+ cdef class FixedSizeListArray(BaseListArray):
488
+ pass
489
+
490
+
491
+ cdef class UnionArray(Array):
492
+ pass
493
+
494
+
495
+ cdef class StringArray(Array):
496
+ pass
497
+
498
+
499
+ cdef class BinaryArray(Array):
500
+ pass
501
+
502
+
503
+ cdef class StringViewArray(Array):
504
+ pass
505
+
506
+
507
+ cdef class BinaryViewArray(Array):
508
+ pass
509
+
510
+
511
+ cdef class DictionaryArray(Array):
512
+ cdef:
513
+ object _indices, _dictionary
514
+
515
+
516
+ cdef class ExtensionArray(Array):
517
+ pass
518
+
519
+
520
+ cdef class MonthDayNanoIntervalArray(Array):
521
+ pass
522
+
523
+
524
+ cdef wrap_array_output(PyObject* output)
525
+ cdef wrap_datum(const CDatum& datum)
526
+
527
+
528
+ cdef class ChunkedArray(_PandasConvertible):
529
+ cdef:
530
+ shared_ptr[CChunkedArray] sp_chunked_array
531
+ CChunkedArray* chunked_array
532
+ c_bool _is_cpu
533
+ c_bool _init_is_cpu
534
+
535
+ cdef readonly:
536
+ # To allow Table to propagate metadata to pandas.Series
537
+ object _name
538
+
539
+ cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array)
540
+ cdef getitem(self, int64_t i)
541
+
542
+
543
+ cdef class _Tabular(_PandasConvertible):
544
+ cdef void _assert_cpu(self) except *
545
+
546
+
547
+ cdef class Table(_Tabular):
548
+ cdef:
549
+ shared_ptr[CTable] sp_table
550
+ CTable* table
551
+ c_bool _is_cpu
552
+ c_bool _init_is_cpu
553
+
554
+ cdef void init(self, const shared_ptr[CTable]& table)
555
+
556
+
557
+ cdef class RecordBatch(_Tabular):
558
+ cdef:
559
+ shared_ptr[CRecordBatch] sp_batch
560
+ CRecordBatch* batch
561
+ Schema _schema
562
+
563
+ cdef void init(self, const shared_ptr[CRecordBatch]& table)
564
+
565
+
566
+ cdef class Device(_Weakrefable):
567
+ cdef:
568
+ shared_ptr[CDevice] device
569
+
570
+ cdef void init(self, const shared_ptr[CDevice]& device)
571
+
572
+ @staticmethod
573
+ cdef wrap(const shared_ptr[CDevice]& device)
574
+
575
+ cdef inline shared_ptr[CDevice] unwrap(self) nogil
576
+
577
+
578
+ cdef class MemoryManager(_Weakrefable):
579
+ cdef:
580
+ shared_ptr[CMemoryManager] memory_manager
581
+
582
+ cdef void init(self, const shared_ptr[CMemoryManager]& memory_manager)
583
+
584
+ @staticmethod
585
+ cdef wrap(const shared_ptr[CMemoryManager]& mm)
586
+
587
+ cdef inline shared_ptr[CMemoryManager] unwrap(self) nogil
588
+
589
+
590
+ cdef class Buffer(_Weakrefable):
591
+ cdef:
592
+ shared_ptr[CBuffer] buffer
593
+ Py_ssize_t shape[1]
594
+ Py_ssize_t strides[1]
595
+
596
+ cdef void init(self, const shared_ptr[CBuffer]& buffer)
597
+ cdef getitem(self, int64_t i)
598
+
599
+
600
+ cdef class ResizableBuffer(Buffer):
601
+
602
+ cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer)
603
+
604
+
605
+ cdef class NativeFile(_Weakrefable):
606
+ cdef:
607
+ shared_ptr[CInputStream] input_stream
608
+ shared_ptr[CRandomAccessFile] random_access
609
+ shared_ptr[COutputStream] output_stream
610
+ bint is_readable
611
+ bint is_writable
612
+ bint is_seekable
613
+ bint _is_appending
614
+ bint own_file
615
+
616
+ # By implementing these "virtual" functions (all functions in Cython
617
+ # extension classes are technically virtual in the C++ sense) we can expose
618
+ # the arrow::io abstract file interfaces to other components throughout the
619
+ # suite of Arrow C++ libraries
620
+ cdef set_random_access_file(self, shared_ptr[CRandomAccessFile] handle)
621
+ cdef set_input_stream(self, shared_ptr[CInputStream] handle)
622
+ cdef set_output_stream(self, shared_ptr[COutputStream] handle)
623
+
624
+ cdef shared_ptr[CRandomAccessFile] get_random_access_file(self) except *
625
+ cdef shared_ptr[CInputStream] get_input_stream(self) except *
626
+ cdef shared_ptr[COutputStream] get_output_stream(self) except *
627
+
628
+
629
+ cdef class BufferedInputStream(NativeFile):
630
+ pass
631
+
632
+
633
+ cdef class BufferedOutputStream(NativeFile):
634
+ pass
635
+
636
+
637
+ cdef class CompressedInputStream(NativeFile):
638
+ pass
639
+
640
+
641
+ cdef class CompressedOutputStream(NativeFile):
642
+ pass
643
+
644
+
645
+ cdef class _CRecordBatchWriter(_Weakrefable):
646
+ cdef:
647
+ SharedPtrNoGIL[CRecordBatchWriter] writer
648
+
649
+
650
+ cdef class RecordBatchReader(_Weakrefable):
651
+ cdef:
652
+ SharedPtrNoGIL[CRecordBatchReader] reader
653
+
654
+
655
+ cdef class CacheOptions(_Weakrefable):
656
+ cdef:
657
+ CCacheOptions wrapped
658
+
659
+ cdef void init(self, CCacheOptions options)
660
+
661
+ cdef inline CCacheOptions unwrap(self)
662
+
663
+ @staticmethod
664
+ cdef wrap(const CCacheOptions options)
665
+
666
+
667
+ cdef class Codec(_Weakrefable):
668
+ cdef:
669
+ shared_ptr[CCodec] wrapped
670
+
671
+ cdef inline CCodec* unwrap(self) nogil
672
+
673
+
674
+ # This class is only used internally for now
675
+ cdef class StopToken:
676
+ cdef:
677
+ CStopToken stop_token
678
+
679
+ cdef void init(self, CStopToken stop_token)
680
+
681
+
682
+ cdef get_input_stream(object source, c_bool use_memory_map,
683
+ shared_ptr[CInputStream]* reader)
684
+ cdef get_reader(object source, c_bool use_memory_map,
685
+ shared_ptr[CRandomAccessFile]* reader)
686
+ cdef get_writer(object source, shared_ptr[COutputStream]* writer)
687
+ cdef NativeFile get_native_file(object source, c_bool use_memory_map)
688
+
689
+ cdef shared_ptr[CInputStream] native_transcoding_input_stream(
690
+ shared_ptr[CInputStream] stream, src_encoding,
691
+ dest_encoding) except *
692
+
693
+ cdef shared_ptr[function[StreamWrapFunc]] make_streamwrap_func(
694
+ src_encoding, dest_encoding) except *
695
+
696
+ # Default is allow_none=False
697
+ cpdef DataType ensure_type(object type, bint allow_none=*)
698
+
699
+ cdef timeunit_to_string(TimeUnit unit)
700
+ cdef TimeUnit string_to_timeunit(unit) except *
701
+
702
+ # Exceptions may be raised when converting dict values, so need to
703
+ # check exception state on return
704
+ cdef shared_ptr[const CKeyValueMetadata] pyarrow_unwrap_metadata(
705
+ object meta) except *
706
+ cdef object pyarrow_wrap_metadata(
707
+ const shared_ptr[const CKeyValueMetadata]& meta)
708
+
709
+ #
710
+ # Public Cython API for 3rd party code
711
+ #
712
+ # If you add functions to this list, please also update
713
+ # `cpp/src/arrow/python/pyarrow.{h, cc}`
714
+ #
715
+
716
+ # Wrapping C++ -> Python
717
+
718
+ cdef public object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf)
719
+ cdef public object pyarrow_wrap_resizable_buffer(
720
+ const shared_ptr[CResizableBuffer]& buf)
721
+
722
+ cdef public object pyarrow_wrap_data_type(const shared_ptr[CDataType]& type)
723
+ cdef public object pyarrow_wrap_field(const shared_ptr[CField]& field)
724
+ cdef public object pyarrow_wrap_schema(const shared_ptr[CSchema]& type)
725
+
726
+ cdef public object pyarrow_wrap_scalar(const shared_ptr[CScalar]& sp_scalar)
727
+
728
+ cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array)
729
+ cdef public object pyarrow_wrap_chunked_array(
730
+ const shared_ptr[CChunkedArray]& sp_array)
731
+
732
+ cdef public object pyarrow_wrap_sparse_coo_tensor(
733
+ const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor)
734
+ cdef public object pyarrow_wrap_sparse_csc_matrix(
735
+ const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor)
736
+ cdef public object pyarrow_wrap_sparse_csf_tensor(
737
+ const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor)
738
+ cdef public object pyarrow_wrap_sparse_csr_matrix(
739
+ const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor)
740
+ cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
741
+
742
+ cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
743
+ cdef public object pyarrow_wrap_table(const shared_ptr[CTable]& ctable)
744
+
745
+ # Unwrapping Python -> C++
746
+
747
+ cdef public shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer)
748
+
749
+ cdef public shared_ptr[CDataType] pyarrow_unwrap_data_type(object data_type)
750
+ cdef public shared_ptr[CField] pyarrow_unwrap_field(object field)
751
+ cdef public shared_ptr[CSchema] pyarrow_unwrap_schema(object schema)
752
+
753
+ cdef public shared_ptr[CScalar] pyarrow_unwrap_scalar(object scalar)
754
+
755
+ cdef public shared_ptr[CArray] pyarrow_unwrap_array(object array)
756
+ cdef public shared_ptr[CChunkedArray] pyarrow_unwrap_chunked_array(
757
+ object array)
758
+
759
+ cdef public shared_ptr[CSparseCOOTensor] pyarrow_unwrap_sparse_coo_tensor(
760
+ object sparse_tensor)
761
+ cdef public shared_ptr[CSparseCSCMatrix] pyarrow_unwrap_sparse_csc_matrix(
762
+ object sparse_tensor)
763
+ cdef public shared_ptr[CSparseCSFTensor] pyarrow_unwrap_sparse_csf_tensor(
764
+ object sparse_tensor)
765
+ cdef public shared_ptr[CSparseCSRMatrix] pyarrow_unwrap_sparse_csr_matrix(
766
+ object sparse_tensor)
767
+ cdef public shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor)
768
+
769
+ cdef public shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch)
770
+ cdef public shared_ptr[CTable] pyarrow_unwrap_table(object table)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib_api.h ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Generated by Cython 3.0.12 */
2
+
3
+ #ifndef __PYX_HAVE_API__pyarrow__lib
4
+ #define __PYX_HAVE_API__pyarrow__lib
5
+ #ifdef __MINGW64__
6
+ #define MS_WIN64
7
+ #endif
8
+ #include "Python.h"
9
+ #include "lib.h"
10
+
11
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_box_memory_pool)( arrow::MemoryPool *) = 0;
12
+ #define box_memory_pool __pyx_api_f_7pyarrow_3lib_box_memory_pool
13
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer)(std::shared_ptr< arrow::Buffer> const &) = 0;
14
+ #define pyarrow_wrap_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer
15
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer)(std::shared_ptr< arrow::ResizableBuffer> const &) = 0;
16
+ #define pyarrow_wrap_resizable_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer
17
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type)(std::shared_ptr< arrow::DataType> const &) = 0;
18
+ #define pyarrow_wrap_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type
19
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field)(std::shared_ptr< arrow::Field> const &) = 0;
20
+ #define pyarrow_wrap_field __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field
21
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema)(std::shared_ptr< arrow::Schema> const &) = 0;
22
+ #define pyarrow_wrap_schema __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema
23
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar)(std::shared_ptr< arrow::Scalar> const &) = 0;
24
+ #define pyarrow_wrap_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar
25
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array)(std::shared_ptr< arrow::Array> const &) = 0;
26
+ #define pyarrow_wrap_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array
27
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array)(std::shared_ptr< arrow::ChunkedArray> const &) = 0;
28
+ #define pyarrow_wrap_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array
29
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor)(std::shared_ptr< arrow::SparseCOOTensor> const &) = 0;
30
+ #define pyarrow_wrap_sparse_coo_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor
31
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix)(std::shared_ptr< arrow::SparseCSCMatrix> const &) = 0;
32
+ #define pyarrow_wrap_sparse_csc_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix
33
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor)(std::shared_ptr< arrow::SparseCSFTensor> const &) = 0;
34
+ #define pyarrow_wrap_sparse_csf_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor
35
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix)(std::shared_ptr< arrow::SparseCSRMatrix> const &) = 0;
36
+ #define pyarrow_wrap_sparse_csr_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix
37
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor)(std::shared_ptr< arrow::Tensor> const &) = 0;
38
+ #define pyarrow_wrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor
39
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch)(std::shared_ptr< arrow::RecordBatch> const &) = 0;
40
+ #define pyarrow_wrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch
41
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table)(std::shared_ptr< arrow::Table> const &) = 0;
42
+ #define pyarrow_wrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table
43
+ static std::shared_ptr< arrow::Buffer> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer)(PyObject *) = 0;
44
+ #define pyarrow_unwrap_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer
45
+ static std::shared_ptr< arrow::DataType> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type)(PyObject *) = 0;
46
+ #define pyarrow_unwrap_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type
47
+ static std::shared_ptr< arrow::Field> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field)(PyObject *) = 0;
48
+ #define pyarrow_unwrap_field __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field
49
+ static std::shared_ptr< arrow::Schema> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema)(PyObject *) = 0;
50
+ #define pyarrow_unwrap_schema __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema
51
+ static std::shared_ptr< arrow::Scalar> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar)(PyObject *) = 0;
52
+ #define pyarrow_unwrap_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar
53
+ static std::shared_ptr< arrow::Array> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array)(PyObject *) = 0;
54
+ #define pyarrow_unwrap_array __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array
55
+ static std::shared_ptr< arrow::ChunkedArray> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array)(PyObject *) = 0;
56
+ #define pyarrow_unwrap_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array
57
+ static std::shared_ptr< arrow::SparseCOOTensor> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor)(PyObject *) = 0;
58
+ #define pyarrow_unwrap_sparse_coo_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor
59
+ static std::shared_ptr< arrow::SparseCSCMatrix> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix)(PyObject *) = 0;
60
+ #define pyarrow_unwrap_sparse_csc_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix
61
+ static std::shared_ptr< arrow::SparseCSFTensor> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor)(PyObject *) = 0;
62
+ #define pyarrow_unwrap_sparse_csf_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor
63
+ static std::shared_ptr< arrow::SparseCSRMatrix> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix)(PyObject *) = 0;
64
+ #define pyarrow_unwrap_sparse_csr_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix
65
+ static std::shared_ptr< arrow::Tensor> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor)(PyObject *) = 0;
66
+ #define pyarrow_unwrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor
67
+ static std::shared_ptr< arrow::RecordBatch> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch)(PyObject *) = 0;
68
+ #define pyarrow_unwrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch
69
+ static std::shared_ptr< arrow::Table> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table)(PyObject *) = 0;
70
+ #define pyarrow_unwrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table
71
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status)(arrow::Status const &) = 0;
72
+ #define pyarrow_internal_check_status __pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status
73
+ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status)(arrow::Status const &) = 0;
74
+ #define pyarrow_internal_convert_status __pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status
75
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer)(PyObject *) = 0;
76
+ #define pyarrow_is_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer
77
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type)(PyObject *) = 0;
78
+ #define pyarrow_is_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type
79
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata)(PyObject *) = 0;
80
+ #define pyarrow_is_metadata __pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata
81
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_field)(PyObject *) = 0;
82
+ #define pyarrow_is_field __pyx_api_f_7pyarrow_3lib_pyarrow_is_field
83
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_schema)(PyObject *) = 0;
84
+ #define pyarrow_is_schema __pyx_api_f_7pyarrow_3lib_pyarrow_is_schema
85
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_array)(PyObject *) = 0;
86
+ #define pyarrow_is_array __pyx_api_f_7pyarrow_3lib_pyarrow_is_array
87
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array)(PyObject *) = 0;
88
+ #define pyarrow_is_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array
89
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar)(PyObject *) = 0;
90
+ #define pyarrow_is_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar
91
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor)(PyObject *) = 0;
92
+ #define pyarrow_is_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor
93
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor)(PyObject *) = 0;
94
+ #define pyarrow_is_sparse_coo_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor
95
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix)(PyObject *) = 0;
96
+ #define pyarrow_is_sparse_csr_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix
97
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix)(PyObject *) = 0;
98
+ #define pyarrow_is_sparse_csc_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix
99
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor)(PyObject *) = 0;
100
+ #define pyarrow_is_sparse_csf_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor
101
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_table)(PyObject *) = 0;
102
+ #define pyarrow_is_table __pyx_api_f_7pyarrow_3lib_pyarrow_is_table
103
+ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch)(PyObject *) = 0;
104
+ #define pyarrow_is_batch __pyx_api_f_7pyarrow_3lib_pyarrow_is_batch
105
+ #ifndef __PYX_HAVE_RT_ImportFunction_3_0_12
106
+ #define __PYX_HAVE_RT_ImportFunction_3_0_12
107
+ static int __Pyx_ImportFunction_3_0_12(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
108
+ PyObject *d = 0;
109
+ PyObject *cobj = 0;
110
+ union {
111
+ void (*fp)(void);
112
+ void *p;
113
+ } tmp;
114
+ d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
115
+ if (!d)
116
+ goto bad;
117
+ cobj = PyDict_GetItemString(d, funcname);
118
+ if (!cobj) {
119
+ PyErr_Format(PyExc_ImportError,
120
+ "%.200s does not export expected C function %.200s",
121
+ PyModule_GetName(module), funcname);
122
+ goto bad;
123
+ }
124
+ if (!PyCapsule_IsValid(cobj, sig)) {
125
+ PyErr_Format(PyExc_TypeError,
126
+ "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
127
+ PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
128
+ goto bad;
129
+ }
130
+ tmp.p = PyCapsule_GetPointer(cobj, sig);
131
+ *f = tmp.fp;
132
+ if (!(*f))
133
+ goto bad;
134
+ Py_DECREF(d);
135
+ return 0;
136
+ bad:
137
+ Py_XDECREF(d);
138
+ return -1;
139
+ }
140
+ #endif
141
+
142
+
143
+ static int import_pyarrow__lib(void) {
144
+ PyObject *module = 0;
145
+ module = PyImport_ImportModule("pyarrow.lib");
146
+ if (!module) goto bad;
147
+ if (__Pyx_ImportFunction_3_0_12(module, "box_memory_pool", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_box_memory_pool, "PyObject *( arrow::MemoryPool *)") < 0) goto bad;
148
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer, "PyObject *(std::shared_ptr< arrow::Buffer> const &)") < 0) goto bad;
149
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_resizable_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer, "PyObject *(std::shared_ptr< arrow::ResizableBuffer> const &)") < 0) goto bad;
150
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type, "PyObject *(std::shared_ptr< arrow::DataType> const &)") < 0) goto bad;
151
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field, "PyObject *(std::shared_ptr< arrow::Field> const &)") < 0) goto bad;
152
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema, "PyObject *(std::shared_ptr< arrow::Schema> const &)") < 0) goto bad;
153
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar, "PyObject *(std::shared_ptr< arrow::Scalar> const &)") < 0) goto bad;
154
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array, "PyObject *(std::shared_ptr< arrow::Array> const &)") < 0) goto bad;
155
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array, "PyObject *(std::shared_ptr< arrow::ChunkedArray> const &)") < 0) goto bad;
156
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor, "PyObject *(std::shared_ptr< arrow::SparseCOOTensor> const &)") < 0) goto bad;
157
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix, "PyObject *(std::shared_ptr< arrow::SparseCSCMatrix> const &)") < 0) goto bad;
158
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor, "PyObject *(std::shared_ptr< arrow::SparseCSFTensor> const &)") < 0) goto bad;
159
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix, "PyObject *(std::shared_ptr< arrow::SparseCSRMatrix> const &)") < 0) goto bad;
160
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor, "PyObject *(std::shared_ptr< arrow::Tensor> const &)") < 0) goto bad;
161
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch, "PyObject *(std::shared_ptr< arrow::RecordBatch> const &)") < 0) goto bad;
162
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table, "PyObject *(std::shared_ptr< arrow::Table> const &)") < 0) goto bad;
163
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer, "std::shared_ptr< arrow::Buffer> (PyObject *)") < 0) goto bad;
164
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type, "std::shared_ptr< arrow::DataType> (PyObject *)") < 0) goto bad;
165
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field, "std::shared_ptr< arrow::Field> (PyObject *)") < 0) goto bad;
166
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema, "std::shared_ptr< arrow::Schema> (PyObject *)") < 0) goto bad;
167
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar, "std::shared_ptr< arrow::Scalar> (PyObject *)") < 0) goto bad;
168
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array, "std::shared_ptr< arrow::Array> (PyObject *)") < 0) goto bad;
169
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array, "std::shared_ptr< arrow::ChunkedArray> (PyObject *)") < 0) goto bad;
170
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor, "std::shared_ptr< arrow::SparseCOOTensor> (PyObject *)") < 0) goto bad;
171
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix, "std::shared_ptr< arrow::SparseCSCMatrix> (PyObject *)") < 0) goto bad;
172
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor, "std::shared_ptr< arrow::SparseCSFTensor> (PyObject *)") < 0) goto bad;
173
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix, "std::shared_ptr< arrow::SparseCSRMatrix> (PyObject *)") < 0) goto bad;
174
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor, "std::shared_ptr< arrow::Tensor> (PyObject *)") < 0) goto bad;
175
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch, "std::shared_ptr< arrow::RecordBatch> (PyObject *)") < 0) goto bad;
176
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table, "std::shared_ptr< arrow::Table> (PyObject *)") < 0) goto bad;
177
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_internal_check_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status, "int (arrow::Status const &)") < 0) goto bad;
178
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_internal_convert_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status, "PyObject *(arrow::Status const &)") < 0) goto bad;
179
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer, "int (PyObject *)") < 0) goto bad;
180
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type, "int (PyObject *)") < 0) goto bad;
181
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_metadata", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata, "int (PyObject *)") < 0) goto bad;
182
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_field, "int (PyObject *)") < 0) goto bad;
183
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_schema, "int (PyObject *)") < 0) goto bad;
184
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_array, "int (PyObject *)") < 0) goto bad;
185
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array, "int (PyObject *)") < 0) goto bad;
186
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar, "int (PyObject *)") < 0) goto bad;
187
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor, "int (PyObject *)") < 0) goto bad;
188
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor, "int (PyObject *)") < 0) goto bad;
189
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix, "int (PyObject *)") < 0) goto bad;
190
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix, "int (PyObject *)") < 0) goto bad;
191
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor, "int (PyObject *)") < 0) goto bad;
192
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_table, "int (PyObject *)") < 0) goto bad;
193
+ if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch, "int (PyObject *)") < 0) goto bad;
194
+ Py_DECREF(module); module = 0;
195
+ return 0;
196
+ bad:
197
+ Py_XDECREF(module);
198
+ return -1;
199
+ }
200
+
201
+ #endif /* !__PYX_HAVE_API__pyarrow__lib */