BryanW commited on Mar 23

Commit

7f9dddc

verified ·

1 Parent(s): 20a2dfd

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/INSTALLER +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/LICENSE.txt +13 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/METADATA +250 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/RECORD +131 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/WHEEL +6 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/top_level.txt +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/__init__.py +77 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/from_thread.py +527 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/lowlevel.py +161 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/py.typed +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/pytest_plugin.py +191 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_interpreter.py +218 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_process.py +258 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_thread.py +69 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/_compat.py +11 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/parser_inline.py +147 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/port.yaml +48 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/token.py +180 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/INSTALLER +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/LICENSE +202 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/METADATA +317 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/NOTICE +13 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/RECORD +18 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/WHEEL +6 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/top_level.txt +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/__init__.py +437 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_azurefs.pyx +134 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute.pyx +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute_docstrings.py +56 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_cuda.pyx +1080 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_dataset_parquet.pyx +1053 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_fs.pxd +91 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_hdfs.pyx +160 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pxd +36 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pyx +310 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_orc.pyx +445 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_parquet.pyx +2266 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_pyarrow_cpp_tests.pxd +33 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_substrait.pyx +481 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cffi.py +81 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/compute.py +744 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/config.pxi +95 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cuda.py +25 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/error.pxi +274 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/fs.py +431 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.pxi +1403 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.py +285 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/jvm.py +335 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib.pxd +770 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib_api.h +201 -0

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/LICENSE.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+   Copyright aio-libs contributors.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,250 @@

+Metadata-Version: 2.2
+Name: aiohttp
+Version: 3.11.13
+Summary: Async http client/server framework (asyncio)
+Home-page: https://github.com/aio-libs/aiohttp
+Maintainer: aiohttp team <team@aiohttp.org>
+Maintainer-email: team@aiohttp.org
+License: Apache-2.0
+Project-URL: Chat: Matrix, https://matrix.to/#/#aio-libs:matrix.org
+Project-URL: Chat: Matrix Space, https://matrix.to/#/#aio-libs-space:matrix.org
+Project-URL: CI: GitHub Actions, https://github.com/aio-libs/aiohttp/actions?query=workflow%3ACI
+Project-URL: Coverage: codecov, https://codecov.io/github/aio-libs/aiohttp
+Project-URL: Docs: Changelog, https://docs.aiohttp.org/en/stable/changes.html
+Project-URL: Docs: RTD, https://docs.aiohttp.org
+Project-URL: GitHub: issues, https://github.com/aio-libs/aiohttp/issues
+Project-URL: GitHub: repo, https://github.com/aio-libs/aiohttp
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Framework :: AsyncIO
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: POSIX
+Classifier: Operating System :: MacOS :: MacOS X
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Internet :: WWW/HTTP
+Requires-Python: >=3.9
+Description-Content-Type: text/x-rst
+License-File: LICENSE.txt
+Requires-Dist: aiohappyeyeballs>=2.3.0
+Requires-Dist: aiosignal>=1.1.2
+Requires-Dist: async-timeout<6.0,>=4.0; python_version < "3.11"
+Requires-Dist: attrs>=17.3.0
+Requires-Dist: frozenlist>=1.1.1
+Requires-Dist: multidict<7.0,>=4.5
+Requires-Dist: propcache>=0.2.0
+Requires-Dist: yarl<2.0,>=1.17.0
+Provides-Extra: speedups
+Requires-Dist: aiodns>=3.2.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "speedups"
+Requires-Dist: Brotli; platform_python_implementation == "CPython" and extra == "speedups"
+Requires-Dist: brotlicffi; platform_python_implementation != "CPython" and extra == "speedups"
+==================================
+Async http client/server framework
+==================================
+.. image:: https://raw.githubusercontent.com/aio-libs/aiohttp/master/docs/aiohttp-plain.svg
+   :height: 64px
+   :width: 64px
+   :alt: aiohttp logo
+|
+.. image:: https://github.com/aio-libs/aiohttp/workflows/CI/badge.svg
+   :target: https://github.com/aio-libs/aiohttp/actions?query=workflow%3ACI
+   :alt: GitHub Actions status for master branch
+.. image:: https://codecov.io/gh/aio-libs/aiohttp/branch/master/graph/badge.svg
+   :target: https://codecov.io/gh/aio-libs/aiohttp
+   :alt: codecov.io status for master branch
+.. image:: https://img.shields.io/endpoint?url=https://codspeed.io/badge.json
+   :target: https://codspeed.io/aio-libs/aiohttp
+   :alt: Codspeed.io status for aiohttp
+.. image:: https://badge.fury.io/py/aiohttp.svg
+   :target: https://pypi.org/project/aiohttp
+   :alt: Latest PyPI package version
+.. image:: https://readthedocs.org/projects/aiohttp/badge/?version=latest
+   :target: https://docs.aiohttp.org/
+   :alt: Latest Read The Docs
+.. image:: https://img.shields.io/matrix/aio-libs:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
+   :target: https://matrix.to/#/%23aio-libs:matrix.org
+   :alt: Matrix Room — #aio-libs:matrix.org
+.. image:: https://img.shields.io/matrix/aio-libs-space:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs-space%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
+   :target: https://matrix.to/#/%23aio-libs-space:matrix.org
+   :alt: Matrix Space — #aio-libs-space:matrix.org
+Key Features
+============
+- Supports both client and server side of HTTP protocol.
+- Supports both client and server Web-Sockets out-of-the-box and avoids
+  Callback Hell.
+- Provides Web-server with middleware and pluggable routing.
+Getting started
+===============
+Client
+------
+To get something from the web:
+.. code-block:: python
+  import aiohttp
+  import asyncio
+  async def main():
+      async with aiohttp.ClientSession() as session:
+          async with session.get('http://python.org') as response:
+              print("Status:", response.status)
+              print("Content-type:", response.headers['content-type'])
+              html = await response.text()
+              print("Body:", html[:15], "...")
+  asyncio.run(main())
+This prints:
+.. code-block::
+    Status: 200
+    Content-type: text/html; charset=utf-8
+    Body: <!doctype html> ...
+Coming from `requests <https://requests.readthedocs.io/>`_ ? Read `why we need so many lines <https://aiohttp.readthedocs.io/en/latest/http_request_lifecycle.html>`_.
+Server
+------
+An example using a simple server:
+.. code-block:: python
+    # examples/server_simple.py
+    from aiohttp import web
+    async def handle(request):
+        name = request.match_info.get('name', "Anonymous")
+        text = "Hello, " + name
+        return web.Response(text=text)
+    async def wshandle(request):
+        ws = web.WebSocketResponse()
+        await ws.prepare(request)
+        async for msg in ws:
+            if msg.type == web.WSMsgType.text:
+                await ws.send_str("Hello, {}".format(msg.data))
+            elif msg.type == web.WSMsgType.binary:
+                await ws.send_bytes(msg.data)
+            elif msg.type == web.WSMsgType.close:
+                break
+        return ws
+    app = web.Application()
+    app.add_routes([web.get('/', handle),
+                    web.get('/echo', wshandle),
+                    web.get('/{name}', handle)])
+    if __name__ == '__main__':
+        web.run_app(app)
+Documentation
+=============
+https://aiohttp.readthedocs.io/
+Demos
+=====
+https://github.com/aio-libs/aiohttp-demos
+External links
+==============
+* `Third party libraries
+  <http://aiohttp.readthedocs.io/en/latest/third_party.html>`_
+* `Built with aiohttp
+  <http://aiohttp.readthedocs.io/en/latest/built_with.html>`_
+* `Powered by aiohttp
+  <http://aiohttp.readthedocs.io/en/latest/powered_by.html>`_
+Feel free to make a Pull Request for adding your link to these pages!
+Communication channels
+======================
+*aio-libs Discussions*: https://github.com/aio-libs/aiohttp/discussions
+*Matrix*: `#aio-libs:matrix.org <https://matrix.to/#/#aio-libs:matrix.org>`_
+We support `Stack Overflow
+<https://stackoverflow.com/questions/tagged/aiohttp>`_.
+Please add *aiohttp* tag to your question there.
+Requirements
+============
+- attrs_
+- multidict_
+- yarl_
+- frozenlist_
+Optionally you may install the aiodns_ library (highly recommended for sake of speed).
+.. _aiodns: https://pypi.python.org/pypi/aiodns
+.. _attrs: https://github.com/python-attrs/attrs
+.. _multidict: https://pypi.python.org/pypi/multidict
+.. _frozenlist: https://pypi.org/project/frozenlist/
+.. _yarl: https://pypi.python.org/pypi/yarl
+.. _async-timeout: https://pypi.python.org/pypi/async_timeout
+License
+=======
+``aiohttp`` is offered under the Apache 2 license.
+Keepsafe
+========
+The aiohttp community would like to thank Keepsafe
+(https://www.getkeepsafe.com) for its support in the early days of
+the project.
+Source code
+===========
+The latest developer version is available in a GitHub repository:
+https://github.com/aio-libs/aiohttp
+Benchmarks
+==========
+If you are interested in efficiency, the AsyncIO community maintains a
+list of benchmarks on the official wiki:
+https://github.com/python/asyncio/wiki/Benchmarks

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,131 @@

+aiohttp-3.11.13.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+aiohttp-3.11.13.dist-info/LICENSE.txt,sha256=n4DQ2311WpQdtFchcsJw7L2PCCuiFd3QlZhZQu2Uqes,588
+aiohttp-3.11.13.dist-info/METADATA,sha256=V_vS5w25_e4iRV4NQ4mjjJeaSpzg0IaWqru7XzJqJLc,7712
+aiohttp-3.11.13.dist-info/RECORD,,
+aiohttp-3.11.13.dist-info/WHEEL,sha256=siqMuoWpRueIZ87ijidBxnOwHeSOOcxNwYCs-pC4Yv0,151
+aiohttp-3.11.13.dist-info/top_level.txt,sha256=iv-JIaacmTl-hSho3QmphcKnbRRYx1st47yjz_178Ro,8
+aiohttp/.hash/_cparser.pxd.hash,sha256=hYa9Vje-oMs2eh_7MfCPOh2QW_1x1yCjcZuc7AmwLd0,121
+aiohttp/.hash/_find_header.pxd.hash,sha256=_mbpD6vM-CVCKq3ulUvsOAz5Wdo88wrDzfpOsMQaMNA,125
+aiohttp/.hash/_http_parser.pyx.hash,sha256=GBgZjCNbtZApPhf9-gHpS5Z2WMIzM-vgp5VSZIEvZfk,125
+aiohttp/.hash/_http_writer.pyx.hash,sha256=-UgSF82qclpxjP0og_gcFEsstXRKF9e3Ou4wziAyDvI,125
+aiohttp/.hash/hdrs.py.hash,sha256=v6IaKbsxjsdQxBzhb5AjP0x_9G3rUe84D7avf7AI4cs,116
+aiohttp/__init__.py,sha256=612FENJ9aLTJsbeZNR9LF_PAJ4nLUY4NgyqlkDnO70c,7840
+aiohttp/__pycache__/__init__.cpython-312.pyc,,
+aiohttp/__pycache__/abc.cpython-312.pyc,,
+aiohttp/__pycache__/base_protocol.cpython-312.pyc,,
+aiohttp/__pycache__/client.cpython-312.pyc,,
+aiohttp/__pycache__/client_exceptions.cpython-312.pyc,,
+aiohttp/__pycache__/client_proto.cpython-312.pyc,,
+aiohttp/__pycache__/client_reqrep.cpython-312.pyc,,
+aiohttp/__pycache__/client_ws.cpython-312.pyc,,
+aiohttp/__pycache__/compression_utils.cpython-312.pyc,,
+aiohttp/__pycache__/connector.cpython-312.pyc,,
+aiohttp/__pycache__/cookiejar.cpython-312.pyc,,
+aiohttp/__pycache__/formdata.cpython-312.pyc,,
+aiohttp/__pycache__/hdrs.cpython-312.pyc,,
+aiohttp/__pycache__/helpers.cpython-312.pyc,,
+aiohttp/__pycache__/http.cpython-312.pyc,,
+aiohttp/__pycache__/http_exceptions.cpython-312.pyc,,
+aiohttp/__pycache__/http_parser.cpython-312.pyc,,
+aiohttp/__pycache__/http_websocket.cpython-312.pyc,,
+aiohttp/__pycache__/http_writer.cpython-312.pyc,,
+aiohttp/__pycache__/log.cpython-312.pyc,,
+aiohttp/__pycache__/multipart.cpython-312.pyc,,
+aiohttp/__pycache__/payload.cpython-312.pyc,,
+aiohttp/__pycache__/payload_streamer.cpython-312.pyc,,
+aiohttp/__pycache__/pytest_plugin.cpython-312.pyc,,
+aiohttp/__pycache__/resolver.cpython-312.pyc,,
+aiohttp/__pycache__/streams.cpython-312.pyc,,
+aiohttp/__pycache__/tcp_helpers.cpython-312.pyc,,
+aiohttp/__pycache__/test_utils.cpython-312.pyc,,
+aiohttp/__pycache__/tracing.cpython-312.pyc,,
+aiohttp/__pycache__/typedefs.cpython-312.pyc,,
+aiohttp/__pycache__/web.cpython-312.pyc,,
+aiohttp/__pycache__/web_app.cpython-312.pyc,,
+aiohttp/__pycache__/web_exceptions.cpython-312.pyc,,
+aiohttp/__pycache__/web_fileresponse.cpython-312.pyc,,
+aiohttp/__pycache__/web_log.cpython-312.pyc,,
+aiohttp/__pycache__/web_middlewares.cpython-312.pyc,,
+aiohttp/__pycache__/web_protocol.cpython-312.pyc,,
+aiohttp/__pycache__/web_request.cpython-312.pyc,,
+aiohttp/__pycache__/web_response.cpython-312.pyc,,
+aiohttp/__pycache__/web_routedef.cpython-312.pyc,,
+aiohttp/__pycache__/web_runner.cpython-312.pyc,,
+aiohttp/__pycache__/web_server.cpython-312.pyc,,
+aiohttp/__pycache__/web_urldispatcher.cpython-312.pyc,,
+aiohttp/__pycache__/web_ws.cpython-312.pyc,,
+aiohttp/__pycache__/worker.cpython-312.pyc,,
+aiohttp/_cparser.pxd,sha256=8jGIg-VJ9p3llwCakUYDsPGxA4HiZe9dmK9Jmtlz-5g,4318
+aiohttp/_find_header.pxd,sha256=0GfwFCPN2zxEKTO1_MA5sYq2UfzsG8kcV3aTqvwlz3g,68
+aiohttp/_headers.pxi,sha256=n701k28dVPjwRnx5j6LpJhLTfj7dqu2vJt7f0O60Oyg,2007
+aiohttp/_http_parser.cpython-312-x86_64-linux-gnu.so,sha256=kZJwKEDTHDTxMWYND9KqRALWr8BaLSyyTBmQ6aRYuRA,2813904
+aiohttp/_http_parser.pyx,sha256=wQdADj5LizwC_7nFGr8nIlk6GpoaQeQ0359H0HMKGuM,28241
+aiohttp/_http_writer.cpython-312-x86_64-linux-gnu.so,sha256=6h2_x6dsZeFsvy_Iurvq9c-RfNLg-heJJAablu4uSSk,492232
+aiohttp/_http_writer.pyx,sha256=fiCck_EVgRiTX7VtAoV2AldjuesJMFPev4TWd9Fx8jo,4597
+aiohttp/_websocket/.hash/mask.pxd.hash,sha256=Y0zBddk_ck3pi9-BFzMcpkcvCKvwvZ4GTtZFb9u1nxQ,128
+aiohttp/_websocket/.hash/mask.pyx.hash,sha256=90owpXYM8_kIma4KUcOxhWSk-Uv4NVMBoCYeFM1B3d0,128
+aiohttp/_websocket/.hash/reader_c.pxd.hash,sha256=EoZjkF_tAFEbGvV0oRY2GZOSuAfWFWFjMhXgq6mQExo,132
+aiohttp/_websocket/__init__.py,sha256=Mar3R9_vBN_Ea4lsW7iTAVXD7OKswKPGqF5xgSyt77k,44
+aiohttp/_websocket/__pycache__/__init__.cpython-312.pyc,,
+aiohttp/_websocket/__pycache__/helpers.cpython-312.pyc,,
+aiohttp/_websocket/__pycache__/models.cpython-312.pyc,,
+aiohttp/_websocket/__pycache__/reader.cpython-312.pyc,,
+aiohttp/_websocket/__pycache__/reader_c.cpython-312.pyc,,
+aiohttp/_websocket/__pycache__/reader_py.cpython-312.pyc,,
+aiohttp/_websocket/__pycache__/writer.cpython-312.pyc,,
+aiohttp/_websocket/helpers.py,sha256=P-XLv8IUaihKzDenVUqfKU5DJbWE5HvG8uhvUZK8Ic4,5038
+aiohttp/_websocket/mask.cpython-312-x86_64-linux-gnu.so,sha256=rdCiOTakoDIcEYBPFRf7ncQUgXmhosFnNMxUYqJn8uA,265432
+aiohttp/_websocket/mask.pxd,sha256=sBmZ1Amym9kW4Ge8lj1fLZ7mPPya4LzLdpkQExQXv5M,112
+aiohttp/_websocket/mask.pyx,sha256=BHjOtV0O0w7xp9p0LNADRJvGmgfPn9sGeJvSs0fL__4,1397
+aiohttp/_websocket/models.py,sha256=XAzjs_8JYszWXIgZ6R3ZRrF-tX9Q_6LiD49WRYojopM,2121
+aiohttp/_websocket/reader.py,sha256=eC4qS0c5sOeQ2ebAHLaBpIaTVFaSKX79pY2xvh3Pqyw,1030
+aiohttp/_websocket/reader_c.cpython-312-x86_64-linux-gnu.so,sha256=wt49Wc5GzT9rRbxIN-8pgo3mQdTthCSVaoQR4NI6OTQ,1871856
+aiohttp/_websocket/reader_c.pxd,sha256=9rMWCpAC1jng7_gtqLjRlqQv9q7UkOn63tIQfq2k8Gc,2444
+aiohttp/_websocket/reader_c.py,sha256=anZsBKZWlL8SO8gArsZMDstH37qBuZOvJA7jtj0Z95M,17975
+aiohttp/_websocket/reader_py.py,sha256=anZsBKZWlL8SO8gArsZMDstH37qBuZOvJA7jtj0Z95M,17975
+aiohttp/_websocket/writer.py,sha256=T3P36iMrzVPPC2XeScserHMD5vd9an6yizWzqDUkRZ0,7077
+aiohttp/abc.py,sha256=JLMOxrKLGTDaPRLfraY1pl-xka53YiHhAH9yaF9QRXQ,6512
+aiohttp/base_protocol.py,sha256=Tp8cxUPQvv9kUPk3w6lAzk6d2MAzV3scwI_3Go3C47c,3025
+aiohttp/client.py,sha256=isdfGlM4O5ILr4F4gBABlybxo4MQ1tNaMm7zjMcrfrM,54309
+aiohttp/client_exceptions.py,sha256=uyKbxI2peZhKl7lELBMx3UeusNkfpemPWpGFq0r6JeM,11367
+aiohttp/client_proto.py,sha256=dV7u9floGWG-_xtD2fLUYqiANG6VsJtq0HMlTjf1g-g,10015
+aiohttp/client_reqrep.py,sha256=VAgh0NxP2HvYWx6nX1Pr8FINc1m-W8-5q2zKeZV68n8,43925
+aiohttp/client_ws.py,sha256=1CIjIXwyzOMIYw6AjUES4-qUwbyVHW1seJKQfg_Rta8,15109
+aiohttp/compression_utils.py,sha256=0J3EAOR-0HehlYIudJXRu_Kr6hrYCY0IfuJ1px9MhQs,5681
+aiohttp/connector.py,sha256=ZAXixLOyIl6zQEnetPLYKkbjP2BoHFEzGeEyvRBoGqI,60734
+aiohttp/cookiejar.py,sha256=PYR1K1mkLa24Hm6c9UEJnAitccNzz97CbsJyQ2ULAlU,17615
+aiohttp/formdata.py,sha256=CUJnCWDNHFcXSYZ_TupaT6rHkY-Q7ghssvWzaYBPIo0,6552
+aiohttp/hdrs.py,sha256=2rj5MyA-6yRdYPhW5UKkW4iNWhEAlGIOSBH5D4FmKNE,5111
+aiohttp/helpers.py,sha256=KqPQECeiJ_EhA93k7-5ZoVdZH0sk_4n0tCoM_E-iMnE,29091
+aiohttp/http.py,sha256=8o8j8xH70OWjnfTWA9V44NR785QPxEPrUtzMXiAVpwc,1842
+aiohttp/http_exceptions.py,sha256=RYmBycJvvPerKkgXXm8v145I1N-fbsgSpcsbNIC-gdE,2961
+aiohttp/http_parser.py,sha256=UqerYPJzA1MqLmeG1jURhTNO1YhwUASK3QVcNEz0me8,36851
+aiohttp/http_websocket.py,sha256=8VXFKw6KQUEmPg48GtRMB37v0gTK7A0inoxXuDxMZEc,842
+aiohttp/http_writer.py,sha256=pRIyfOmL3cZmdWDWBBJ2cZEwEJzLWzlPPAJInaPLThI,7595
+aiohttp/log.py,sha256=BbNKx9e3VMIm0xYjZI0IcBBoS7wjdeIeSaiJE7-qK2g,325
+aiohttp/multipart.py,sha256=SABIvo3vhXzG4bLDZ0C4V3yG_86vAb-3Zb9Li7BVmI8,36944
+aiohttp/payload.py,sha256=rCA9JJI_RMCik_7qNIaC1Bh21aXhABGYK2tsYeaHRQ4,15793
+aiohttp/payload_streamer.py,sha256=ZzEYyfzcjGWkVkK3XR2pBthSCSIykYvY3Wr5cGQ2eTc,2211
+aiohttp/py.typed,sha256=sow9soTwP9T_gEAQSVh7Gb8855h04Nwmhs2We-JRgZM,7
+aiohttp/pytest_plugin.py,sha256=AfJ6VIWzsp5KgpXRREsX3yqGUZrJyfb7zzcMqzWxz7I,12768
+aiohttp/resolver.py,sha256=sJ8-LYCtl_g9f6gn_5X2NFQ9FQ72Q2Mr4_rLxo9NVeI,6375
+aiohttp/streams.py,sha256=U-qTkuAqIfpJChuKEy-vYn8nQ_Z1MVcW0WO2DHiJz_o,22329
+aiohttp/tcp_helpers.py,sha256=BSadqVWaBpMFDRWnhaaR941N9MiDZ7bdTrxgCb0CW-M,961
+aiohttp/test_utils.py,sha256=r7kBasmZtC3tQY5OmyMaIl1B9P8Bnnq1oM3npVcAPKs,22811
+aiohttp/tracing.py,sha256=66XQwtdR5DHv8p953eeNL0l8o6iHDaNwH9bBaybHXD4,15137
+aiohttp/typedefs.py,sha256=wUlqwe9Mw9W8jT3HsYJcYk00qP3EMPz3nTkYXmeNN48,1657
+aiohttp/web.py,sha256=As5nqGQy4QXWMXSaOsh0JudSVVJVIt_nr3n0b8CaMb0,18422
+aiohttp/web_app.py,sha256=Zre0QHM9JAp4d7jrj5NRxlPnfTrKLNuA42Rdsh8Q2TI,19554
+aiohttp/web_exceptions.py,sha256=7nIuiwhZ39vJJ9KrWqArA5QcWbUdqkz2CLwEpJapeN8,10360
+aiohttp/web_fileresponse.py,sha256=FRsS0p9r1KU5y8ceG0QXBYnrL6xggjbxcXSmI6qIR4k,16504
+aiohttp/web_log.py,sha256=rX5D7xLOX2B6BMdiZ-chme_KfJfW5IXEoFwLfkfkajs,7865
+aiohttp/web_middlewares.py,sha256=sFI0AgeNjdyAjuz92QtMIpngmJSOxrqe2Jfbs4BNUu0,4165
+aiohttp/web_protocol.py,sha256=0MYjcaQishUyJxJ4lsH4IfHef4nIvHDf-DSZwI1Con4,25539
+aiohttp/web_request.py,sha256=j_SSX9s-d3ZeNyqUTpFIaPUaNdSqHwb7yfc0ufL8xFA,29750
+aiohttp/web_response.py,sha256=65aliDETi7rZ8P76ksuHQI0ZTu1cKpclCSailNu105M,28696
+aiohttp/web_routedef.py,sha256=VT1GAx6BrawoDh5RwBwBu5wSABSqgWwAe74AUCyZAEo,6110
+aiohttp/web_runner.py,sha256=v1G1nKiOOQgFnTSR4IMc6I9ReEFDMaHtMLvO_roDM-A,11786
+aiohttp/web_server.py,sha256=-9WDKUAiR9ll-rSdwXSqG6YjaoW79d1R4y0BGSqgUMA,2888
+aiohttp/web_urldispatcher.py,sha256=TIMxFmhLjERseG0xcZv2Ef9Xuo_GTBRqBqeMkCgL0K8,43825
+aiohttp/web_ws.py,sha256=Gr-UWgau41P-OoJUb3WJvaNYiDESXzrHmIw1Cqonupc,22612
+aiohttp/worker.py,sha256=0lvxRNMjGM47ddlQWtci53ri9YN42Su1Vdd_Z7zMMH0,8040

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,6 @@

+Wheel-Version: 1.0
+Generator: setuptools (75.8.0)
+Root-Is-Purelib: false
+Tag: cp312-cp312-manylinux_2_17_x86_64
+Tag: cp312-cp312-manylinux2014_x86_64

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiohttp-3.11.13.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ aiohttp

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/__init__.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from __future__ import annotations
+from ._core._eventloop import current_time as current_time
+from ._core._eventloop import get_all_backends as get_all_backends
+from ._core._eventloop import get_cancelled_exc_class as get_cancelled_exc_class
+from ._core._eventloop import run as run
+from ._core._eventloop import sleep as sleep
+from ._core._eventloop import sleep_forever as sleep_forever
+from ._core._eventloop import sleep_until as sleep_until
+from ._core._exceptions import BrokenResourceError as BrokenResourceError
+from ._core._exceptions import BrokenWorkerIntepreter as BrokenWorkerIntepreter
+from ._core._exceptions import BrokenWorkerProcess as BrokenWorkerProcess
+from ._core._exceptions import BusyResourceError as BusyResourceError
+from ._core._exceptions import ClosedResourceError as ClosedResourceError
+from ._core._exceptions import DelimiterNotFound as DelimiterNotFound
+from ._core._exceptions import EndOfStream as EndOfStream
+from ._core._exceptions import IncompleteRead as IncompleteRead
+from ._core._exceptions import TypedAttributeLookupError as TypedAttributeLookupError
+from ._core._exceptions import WouldBlock as WouldBlock
+from ._core._fileio import AsyncFile as AsyncFile
+from ._core._fileio import Path as Path
+from ._core._fileio import open_file as open_file
+from ._core._fileio import wrap_file as wrap_file
+from ._core._resources import aclose_forcefully as aclose_forcefully
+from ._core._signals import open_signal_receiver as open_signal_receiver
+from ._core._sockets import connect_tcp as connect_tcp
+from ._core._sockets import connect_unix as connect_unix
+from ._core._sockets import create_connected_udp_socket as create_connected_udp_socket
+from ._core._sockets import (
+    create_connected_unix_datagram_socket as create_connected_unix_datagram_socket,
+)
+from ._core._sockets import create_tcp_listener as create_tcp_listener
+from ._core._sockets import create_udp_socket as create_udp_socket
+from ._core._sockets import create_unix_datagram_socket as create_unix_datagram_socket
+from ._core._sockets import create_unix_listener as create_unix_listener
+from ._core._sockets import getaddrinfo as getaddrinfo
+from ._core._sockets import getnameinfo as getnameinfo
+from ._core._sockets import wait_readable as wait_readable
+from ._core._sockets import wait_socket_readable as wait_socket_readable
+from ._core._sockets import wait_socket_writable as wait_socket_writable
+from ._core._sockets import wait_writable as wait_writable
+from ._core._streams import create_memory_object_stream as create_memory_object_stream
+from ._core._subprocesses import open_process as open_process
+from ._core._subprocesses import run_process as run_process
+from ._core._synchronization import CapacityLimiter as CapacityLimiter
+from ._core._synchronization import (
+    CapacityLimiterStatistics as CapacityLimiterStatistics,
+)
+from ._core._synchronization import Condition as Condition
+from ._core._synchronization import ConditionStatistics as ConditionStatistics
+from ._core._synchronization import Event as Event
+from ._core._synchronization import EventStatistics as EventStatistics
+from ._core._synchronization import Lock as Lock
+from ._core._synchronization import LockStatistics as LockStatistics
+from ._core._synchronization import ResourceGuard as ResourceGuard
+from ._core._synchronization import Semaphore as Semaphore
+from ._core._synchronization import SemaphoreStatistics as SemaphoreStatistics
+from ._core._tasks import TASK_STATUS_IGNORED as TASK_STATUS_IGNORED
+from ._core._tasks import CancelScope as CancelScope
+from ._core._tasks import create_task_group as create_task_group
+from ._core._tasks import current_effective_deadline as current_effective_deadline
+from ._core._tasks import fail_after as fail_after
+from ._core._tasks import move_on_after as move_on_after
+from ._core._testing import TaskInfo as TaskInfo
+from ._core._testing import get_current_task as get_current_task
+from ._core._testing import get_running_tasks as get_running_tasks
+from ._core._testing import wait_all_tasks_blocked as wait_all_tasks_blocked
+from ._core._typedattr import TypedAttributeProvider as TypedAttributeProvider
+from ._core._typedattr import TypedAttributeSet as TypedAttributeSet
+from ._core._typedattr import typed_attribute as typed_attribute
+# Re-export imports so they look like they live directly in this package
+for __value in list(locals().values()):
+    if getattr(__value, "__module__", "").startswith("anyio."):
+        __value.__module__ = __name__
+del __value

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/from_thread.py ADDED Viewed

	@@ -0,0 +1,527 @@

+from __future__ import annotations
+import sys
+from collections.abc import Awaitable, Callable, Generator
+from concurrent.futures import Future
+from contextlib import (
+    AbstractAsyncContextManager,
+    AbstractContextManager,
+    contextmanager,
+)
+from dataclasses import dataclass, field
+from inspect import isawaitable
+from threading import Lock, Thread, get_ident
+from types import TracebackType
+from typing import (
+    Any,
+    Generic,
+    TypeVar,
+    cast,
+    overload,
+)
+from ._core import _eventloop
+from ._core._eventloop import get_async_backend, get_cancelled_exc_class, threadlocals
+from ._core._synchronization import Event
+from ._core._tasks import CancelScope, create_task_group
+from .abc import AsyncBackend
+from .abc._tasks import TaskStatus
+if sys.version_info >= (3, 11):
+    from typing import TypeVarTuple, Unpack
+else:
+    from typing_extensions import TypeVarTuple, Unpack
+T_Retval = TypeVar("T_Retval")
+T_co = TypeVar("T_co", covariant=True)
+PosArgsT = TypeVarTuple("PosArgsT")
+def run(
+    func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval]], *args: Unpack[PosArgsT]
+) -> T_Retval:
+    """
+    Call a coroutine function from a worker thread.
+    :param func: a coroutine function
+    :param args: positional arguments for the callable
+    :return: the return value of the coroutine function
+    """
+    try:
+        async_backend = threadlocals.current_async_backend
+        token = threadlocals.current_token
+    except AttributeError:
+        raise RuntimeError(
+            "This function can only be run from an AnyIO worker thread"
+        ) from None
+    return async_backend.run_async_from_thread(func, args, token=token)
+def run_sync(
+    func: Callable[[Unpack[PosArgsT]], T_Retval], *args: Unpack[PosArgsT]
+) -> T_Retval:
+    """
+    Call a function in the event loop thread from a worker thread.
+    :param func: a callable
+    :param args: positional arguments for the callable
+    :return: the return value of the callable
+    """
+    try:
+        async_backend = threadlocals.current_async_backend
+        token = threadlocals.current_token
+    except AttributeError:
+        raise RuntimeError(
+            "This function can only be run from an AnyIO worker thread"
+        ) from None
+    return async_backend.run_sync_from_thread(func, args, token=token)
+class _BlockingAsyncContextManager(Generic[T_co], AbstractContextManager):
+    _enter_future: Future[T_co]
+    _exit_future: Future[bool | None]
+    _exit_event: Event
+    _exit_exc_info: tuple[
+        type[BaseException] | None, BaseException | None, TracebackType | None
+    ] = (None, None, None)
+    def __init__(
+        self, async_cm: AbstractAsyncContextManager[T_co], portal: BlockingPortal
+    ):
+        self._async_cm = async_cm
+        self._portal = portal
+    async def run_async_cm(self) -> bool | None:
+        try:
+            self._exit_event = Event()
+            value = await self._async_cm.__aenter__()
+        except BaseException as exc:
+            self._enter_future.set_exception(exc)
+            raise
+        else:
+            self._enter_future.set_result(value)
+        try:
+            # Wait for the sync context manager to exit.
+            # This next statement can raise `get_cancelled_exc_class()` if
+            # something went wrong in a task group in this async context
+            # manager.
+            await self._exit_event.wait()
+        finally:
+            # In case of cancellation, it could be that we end up here before
+            # `_BlockingAsyncContextManager.__exit__` is called, and an
+            # `_exit_exc_info` has been set.
+            result = await self._async_cm.__aexit__(*self._exit_exc_info)
+            return result
+    def __enter__(self) -> T_co:
+        self._enter_future = Future()
+        self._exit_future = self._portal.start_task_soon(self.run_async_cm)
+        return self._enter_future.result()
+    def __exit__(
+        self,
+        __exc_type: type[BaseException] | None,
+        __exc_value: BaseException | None,
+        __traceback: TracebackType | None,
+    ) -> bool | None:
+        self._exit_exc_info = __exc_type, __exc_value, __traceback
+        self._portal.call(self._exit_event.set)
+        return self._exit_future.result()
+class _BlockingPortalTaskStatus(TaskStatus):
+    def __init__(self, future: Future):
+        self._future = future
+    def started(self, value: object = None) -> None:
+        self._future.set_result(value)
+class BlockingPortal:
+    """An object that lets external threads run code in an asynchronous event loop."""
+    def __new__(cls) -> BlockingPortal:
+        return get_async_backend().create_blocking_portal()
+    def __init__(self) -> None:
+        self._event_loop_thread_id: int | None = get_ident()
+        self._stop_event = Event()
+        self._task_group = create_task_group()
+        self._cancelled_exc_class = get_cancelled_exc_class()
+    async def __aenter__(self) -> BlockingPortal:
+        await self._task_group.__aenter__()
+        return self
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> bool | None:
+        await self.stop()
+        return await self._task_group.__aexit__(exc_type, exc_val, exc_tb)
+    def _check_running(self) -> None:
+        if self._event_loop_thread_id is None:
+            raise RuntimeError("This portal is not running")
+        if self._event_loop_thread_id == get_ident():
+            raise RuntimeError(
+                "This method cannot be called from the event loop thread"
+            )
+    async def sleep_until_stopped(self) -> None:
+        """Sleep until :meth:`stop` is called."""
+        await self._stop_event.wait()
+    async def stop(self, cancel_remaining: bool = False) -> None:
+        """
+        Signal the portal to shut down.
+        This marks the portal as no longer accepting new calls and exits from
+        :meth:`sleep_until_stopped`.
+        :param cancel_remaining: ``True`` to cancel all the remaining tasks, ``False``
+            to let them finish before returning
+        """
+        self._event_loop_thread_id = None
+        self._stop_event.set()
+        if cancel_remaining:
+            self._task_group.cancel_scope.cancel()
+    async def _call_func(
+        self,
+        func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
+        args: tuple[Unpack[PosArgsT]],
+        kwargs: dict[str, Any],
+        future: Future[T_Retval],
+    ) -> None:
+        def callback(f: Future[T_Retval]) -> None:
+            if f.cancelled() and self._event_loop_thread_id not in (
+                None,
+                get_ident(),
+            ):
+                self.call(scope.cancel)
+        try:
+            retval_or_awaitable = func(*args, **kwargs)
+            if isawaitable(retval_or_awaitable):
+                with CancelScope() as scope:
+                    if future.cancelled():
+                        scope.cancel()
+                    else:
+                        future.add_done_callback(callback)
+                    retval = await retval_or_awaitable
+            else:
+                retval = retval_or_awaitable
+        except self._cancelled_exc_class:
+            future.cancel()
+            future.set_running_or_notify_cancel()
+        except BaseException as exc:
+            if not future.cancelled():
+                future.set_exception(exc)
+            # Let base exceptions fall through
+            if not isinstance(exc, Exception):
+                raise
+        else:
+            if not future.cancelled():
+                future.set_result(retval)
+        finally:
+            scope = None  # type: ignore[assignment]
+    def _spawn_task_from_thread(
+        self,
+        func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
+        args: tuple[Unpack[PosArgsT]],
+        kwargs: dict[str, Any],
+        name: object,
+        future: Future[T_Retval],
+    ) -> None:
+        """
+        Spawn a new task using the given callable.
+        Implementors must ensure that the future is resolved when the task finishes.
+        :param func: a callable
+        :param args: positional arguments to be passed to the callable
+        :param kwargs: keyword arguments to be passed to the callable
+        :param name: name of the task (will be coerced to a string if not ``None``)
+        :param future: a future that will resolve to the return value of the callable,
+            or the exception raised during its execution
+        """
+        raise NotImplementedError
+    @overload
+    def call(
+        self,
+        func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval]],
+        *args: Unpack[PosArgsT],
+    ) -> T_Retval: ...
+    @overload
+    def call(
+        self, func: Callable[[Unpack[PosArgsT]], T_Retval], *args: Unpack[PosArgsT]
+    ) -> T_Retval: ...
+    def call(
+        self,
+        func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
+        *args: Unpack[PosArgsT],
+    ) -> T_Retval:
+        """
+        Call the given function in the event loop thread.
+        If the callable returns a coroutine object, it is awaited on.
+        :param func: any callable
+        :raises RuntimeError: if the portal is not running or if this method is called
+            from within the event loop thread
+        """
+        return cast(T_Retval, self.start_task_soon(func, *args).result())
+    @overload
+    def start_task_soon(
+        self,
+        func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval]],
+        *args: Unpack[PosArgsT],
+        name: object = None,
+    ) -> Future[T_Retval]: ...
+    @overload
+    def start_task_soon(
+        self,
+        func: Callable[[Unpack[PosArgsT]], T_Retval],
+        *args: Unpack[PosArgsT],
+        name: object = None,
+    ) -> Future[T_Retval]: ...
+    def start_task_soon(
+        self,
+        func: Callable[[Unpack[PosArgsT]], Awaitable[T_Retval] | T_Retval],
+        *args: Unpack[PosArgsT],
+        name: object = None,
+    ) -> Future[T_Retval]:
+        """
+        Start a task in the portal's task group.
+        The task will be run inside a cancel scope which can be cancelled by cancelling
+        the returned future.
+        :param func: the target function
+        :param args: positional arguments passed to ``func``
+        :param name: name of the task (will be coerced to a string if not ``None``)
+        :return: a future that resolves with the return value of the callable if the
+            task completes successfully, or with the exception raised in the task
+        :raises RuntimeError: if the portal is not running or if this method is called
+            from within the event loop thread
+        :rtype: concurrent.futures.Future[T_Retval]
+        .. versionadded:: 3.0
+        """
+        self._check_running()
+        f: Future[T_Retval] = Future()
+        self._spawn_task_from_thread(func, args, {}, name, f)
+        return f
+    def start_task(
+        self,
+        func: Callable[..., Awaitable[T_Retval]],
+        *args: object,
+        name: object = None,
+    ) -> tuple[Future[T_Retval], Any]:
+        """
+        Start a task in the portal's task group and wait until it signals for readiness.
+        This method works the same way as :meth:`.abc.TaskGroup.start`.
+        :param func: the target function
+        :param args: positional arguments passed to ``func``
+        :param name: name of the task (will be coerced to a string if not ``None``)
+        :return: a tuple of (future, task_status_value) where the ``task_status_value``
+            is the value passed to ``task_status.started()`` from within the target
+            function
+        :rtype: tuple[concurrent.futures.Future[T_Retval], Any]
+        .. versionadded:: 3.0
+        """
+        def task_done(future: Future[T_Retval]) -> None:
+            if not task_status_future.done():
+                if future.cancelled():
+                    task_status_future.cancel()
+                elif future.exception():
+                    task_status_future.set_exception(future.exception())
+                else:
+                    exc = RuntimeError(
+                        "Task exited without calling task_status.started()"
+                    )
+                    task_status_future.set_exception(exc)
+        self._check_running()
+        task_status_future: Future = Future()
+        task_status = _BlockingPortalTaskStatus(task_status_future)
+        f: Future = Future()
+        f.add_done_callback(task_done)
+        self._spawn_task_from_thread(func, args, {"task_status": task_status}, name, f)
+        return f, task_status_future.result()
+    def wrap_async_context_manager(
+        self, cm: AbstractAsyncContextManager[T_co]
+    ) -> AbstractContextManager[T_co]:
+        """
+        Wrap an async context manager as a synchronous context manager via this portal.
+        Spawns a task that will call both ``__aenter__()`` and ``__aexit__()``, stopping
+        in the middle until the synchronous context manager exits.
+        :param cm: an asynchronous context manager
+        :return: a synchronous context manager
+        .. versionadded:: 2.1
+        """
+        return _BlockingAsyncContextManager(cm, self)
+@dataclass
+class BlockingPortalProvider:
+    """
+    A manager for a blocking portal. Used as a context manager. The first thread to
+    enter this context manager causes a blocking portal to be started with the specific
+    parameters, and the last thread to exit causes the portal to be shut down. Thus,
+    there will be exactly one blocking portal running in this context as long as at
+    least one thread has entered this context manager.
+    The parameters are the same as for :func:`~anyio.run`.
+    :param backend: name of the backend
+    :param backend_options: backend options
+    .. versionadded:: 4.4
+    """
+    backend: str = "asyncio"
+    backend_options: dict[str, Any] | None = None
+    _lock: Lock = field(init=False, default_factory=Lock)
+    _leases: int = field(init=False, default=0)
+    _portal: BlockingPortal = field(init=False)
+    _portal_cm: AbstractContextManager[BlockingPortal] | None = field(
+        init=False, default=None
+    )
+    def __enter__(self) -> BlockingPortal:
+        with self._lock:
+            if self._portal_cm is None:
+                self._portal_cm = start_blocking_portal(
+                    self.backend, self.backend_options
+                )
+                self._portal = self._portal_cm.__enter__()
+            self._leases += 1
+            return self._portal
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        portal_cm: AbstractContextManager[BlockingPortal] | None = None
+        with self._lock:
+            assert self._portal_cm
+            assert self._leases > 0
+            self._leases -= 1
+            if not self._leases:
+                portal_cm = self._portal_cm
+                self._portal_cm = None
+                del self._portal
+        if portal_cm:
+            portal_cm.__exit__(None, None, None)
+@contextmanager
+def start_blocking_portal(
+    backend: str = "asyncio", backend_options: dict[str, Any] | None = None
+) -> Generator[BlockingPortal, Any, None]:
+    """
+    Start a new event loop in a new thread and run a blocking portal in its main task.
+    The parameters are the same as for :func:`~anyio.run`.
+    :param backend: name of the backend
+    :param backend_options: backend options
+    :return: a context manager that yields a blocking portal
+    .. versionchanged:: 3.0
+        Usage as a context manager is now required.
+    """
+    async def run_portal() -> None:
+        async with BlockingPortal() as portal_:
+            future.set_result(portal_)
+            await portal_.sleep_until_stopped()
+    def run_blocking_portal() -> None:
+        if future.set_running_or_notify_cancel():
+            try:
+                _eventloop.run(
+                    run_portal, backend=backend, backend_options=backend_options
+                )
+            except BaseException as exc:
+                if not future.done():
+                    future.set_exception(exc)
+    future: Future[BlockingPortal] = Future()
+    thread = Thread(target=run_blocking_portal, daemon=True)
+    thread.start()
+    try:
+        cancel_remaining_tasks = False
+        portal = future.result()
+        try:
+            yield portal
+        except BaseException:
+            cancel_remaining_tasks = True
+            raise
+        finally:
+            try:
+                portal.call(portal.stop, cancel_remaining_tasks)
+            except RuntimeError:
+                pass
+    finally:
+        thread.join()
+def check_cancelled() -> None:
+    """
+    Check if the cancel scope of the host task's running the current worker thread has
+    been cancelled.
+    If the host task's current cancel scope has indeed been cancelled, the
+    backend-specific cancellation exception will be raised.
+    :raises RuntimeError: if the current thread was not spawned by
+        :func:`.to_thread.run_sync`
+    """
+    try:
+        async_backend: AsyncBackend = threadlocals.current_async_backend
+    except AttributeError:
+        raise RuntimeError(
+            "This function can only be run from an AnyIO worker thread"
+        ) from None
+    async_backend.check_cancelled()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/lowlevel.py ADDED Viewed

	@@ -0,0 +1,161 @@

+from __future__ import annotations
+import enum
+from dataclasses import dataclass
+from typing import Any, Generic, Literal, TypeVar, overload
+from weakref import WeakKeyDictionary
+from ._core._eventloop import get_async_backend
+T = TypeVar("T")
+D = TypeVar("D")
+async def checkpoint() -> None:
+    """
+    Check for cancellation and allow the scheduler to switch to another task.
+    Equivalent to (but more efficient than)::
+        await checkpoint_if_cancelled()
+        await cancel_shielded_checkpoint()
+    .. versionadded:: 3.0
+    """
+    await get_async_backend().checkpoint()
+async def checkpoint_if_cancelled() -> None:
+    """
+    Enter a checkpoint if the enclosing cancel scope has been cancelled.
+    This does not allow the scheduler to switch to a different task.
+    .. versionadded:: 3.0
+    """
+    await get_async_backend().checkpoint_if_cancelled()
+async def cancel_shielded_checkpoint() -> None:
+    """
+    Allow the scheduler to switch to another task but without checking for cancellation.
+    Equivalent to (but potentially more efficient than)::
+        with CancelScope(shield=True):
+            await checkpoint()
+    .. versionadded:: 3.0
+    """
+    await get_async_backend().cancel_shielded_checkpoint()
+def current_token() -> object:
+    """
+    Return a backend specific token object that can be used to get back to the event
+    loop.
+    """
+    return get_async_backend().current_token()
+_run_vars: WeakKeyDictionary[Any, dict[str, Any]] = WeakKeyDictionary()
+_token_wrappers: dict[Any, _TokenWrapper] = {}
+@dataclass(frozen=True)
+class _TokenWrapper:
+    __slots__ = "_token", "__weakref__"
+    _token: object
+class _NoValueSet(enum.Enum):
+    NO_VALUE_SET = enum.auto()
+class RunvarToken(Generic[T]):
+    __slots__ = "_var", "_value", "_redeemed"
+    def __init__(self, var: RunVar[T], value: T | Literal[_NoValueSet.NO_VALUE_SET]):
+        self._var = var
+        self._value: T | Literal[_NoValueSet.NO_VALUE_SET] = value
+        self._redeemed = False
+class RunVar(Generic[T]):
+    """
+    Like a :class:`~contextvars.ContextVar`, except scoped to the running event loop.
+    """
+    __slots__ = "_name", "_default"
+    NO_VALUE_SET: Literal[_NoValueSet.NO_VALUE_SET] = _NoValueSet.NO_VALUE_SET
+    _token_wrappers: set[_TokenWrapper] = set()
+    def __init__(
+        self, name: str, default: T | Literal[_NoValueSet.NO_VALUE_SET] = NO_VALUE_SET
+    ):
+        self._name = name
+        self._default = default
+    @property
+    def _current_vars(self) -> dict[str, T]:
+        token = current_token()
+        try:
+            return _run_vars[token]
+        except KeyError:
+            run_vars = _run_vars[token] = {}
+            return run_vars
+    @overload
+    def get(self, default: D) -> T | D: ...
+    @overload
+    def get(self) -> T: ...
+    def get(
+        self, default: D | Literal[_NoValueSet.NO_VALUE_SET] = NO_VALUE_SET
+    ) -> T | D:
+        try:
+            return self._current_vars[self._name]
+        except KeyError:
+            if default is not RunVar.NO_VALUE_SET:
+                return default
+            elif self._default is not RunVar.NO_VALUE_SET:
+                return self._default
+        raise LookupError(
+            f'Run variable "{self._name}" has no value and no default set'
+        )
+    def set(self, value: T) -> RunvarToken[T]:
+        current_vars = self._current_vars
+        token = RunvarToken(self, current_vars.get(self._name, RunVar.NO_VALUE_SET))
+        current_vars[self._name] = value
+        return token
+    def reset(self, token: RunvarToken[T]) -> None:
+        if token._var is not self:
+            raise ValueError("This token does not belong to this RunVar")
+        if token._redeemed:
+            raise ValueError("This token has already been used")
+        if token._value is _NoValueSet.NO_VALUE_SET:
+            try:
+                del self._current_vars[self._name]
+            except KeyError:
+                pass
+        else:
+            self._current_vars[self._name] = token._value
+        token._redeemed = True
+    def __repr__(self) -> str:
+        return f"<RunVar name={self._name!r}>"

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/py.typed ADDED Viewed

File without changes

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/pytest_plugin.py ADDED Viewed

	@@ -0,0 +1,191 @@

+from __future__ import annotations
+import sys
+from collections.abc import Generator, Iterator
+from contextlib import ExitStack, contextmanager
+from inspect import isasyncgenfunction, iscoroutinefunction, ismethod
+from typing import Any, cast
+import pytest
+import sniffio
+from _pytest.fixtures import SubRequest
+from _pytest.outcomes import Exit
+from ._core._eventloop import get_all_backends, get_async_backend
+from ._core._exceptions import iterate_exceptions
+from .abc import TestRunner
+if sys.version_info < (3, 11):
+    from exceptiongroup import ExceptionGroup
+_current_runner: TestRunner | None = None
+_runner_stack: ExitStack | None = None
+_runner_leases = 0
+def extract_backend_and_options(backend: object) -> tuple[str, dict[str, Any]]:
+    if isinstance(backend, str):
+        return backend, {}
+    elif isinstance(backend, tuple) and len(backend) == 2:
+        if isinstance(backend[0], str) and isinstance(backend[1], dict):
+            return cast(tuple[str, dict[str, Any]], backend)
+    raise TypeError("anyio_backend must be either a string or tuple of (string, dict)")
+@contextmanager
+def get_runner(
+    backend_name: str, backend_options: dict[str, Any]
+) -> Iterator[TestRunner]:
+    global _current_runner, _runner_leases, _runner_stack
+    if _current_runner is None:
+        asynclib = get_async_backend(backend_name)
+        _runner_stack = ExitStack()
+        if sniffio.current_async_library_cvar.get(None) is None:
+            # Since we're in control of the event loop, we can cache the name of the
+            # async library
+            token = sniffio.current_async_library_cvar.set(backend_name)
+            _runner_stack.callback(sniffio.current_async_library_cvar.reset, token)
+        backend_options = backend_options or {}
+        _current_runner = _runner_stack.enter_context(
+            asynclib.create_test_runner(backend_options)
+        )
+    _runner_leases += 1
+    try:
+        yield _current_runner
+    finally:
+        _runner_leases -= 1
+        if not _runner_leases:
+            assert _runner_stack is not None
+            _runner_stack.close()
+            _runner_stack = _current_runner = None
+def pytest_configure(config: Any) -> None:
+    config.addinivalue_line(
+        "markers",
+        "anyio: mark the (coroutine function) test to be run "
+        "asynchronously via anyio.",
+    )
+@pytest.hookimpl(hookwrapper=True)
+def pytest_fixture_setup(fixturedef: Any, request: Any) -> Generator[Any]:
+    def wrapper(
+        *args: Any, anyio_backend: Any, request: SubRequest, **kwargs: Any
+    ) -> Any:
+        # Rebind any fixture methods to the request instance
+        if (
+            request.instance
+            and ismethod(func)
+            and type(func.__self__) is type(request.instance)
+        ):
+            local_func = func.__func__.__get__(request.instance)
+        else:
+            local_func = func
+        backend_name, backend_options = extract_backend_and_options(anyio_backend)
+        if has_backend_arg:
+            kwargs["anyio_backend"] = anyio_backend
+        if has_request_arg:
+            kwargs["request"] = request
+        with get_runner(backend_name, backend_options) as runner:
+            if isasyncgenfunction(local_func):
+                yield from runner.run_asyncgen_fixture(local_func, kwargs)
+            else:
+                yield runner.run_fixture(local_func, kwargs)
+    # Only apply this to coroutine functions and async generator functions in requests
+    # that involve the anyio_backend fixture
+    func = fixturedef.func
+    if isasyncgenfunction(func) or iscoroutinefunction(func):
+        if "anyio_backend" in request.fixturenames:
+            fixturedef.func = wrapper
+            original_argname = fixturedef.argnames
+            if not (has_backend_arg := "anyio_backend" in fixturedef.argnames):
+                fixturedef.argnames += ("anyio_backend",)
+            if not (has_request_arg := "request" in fixturedef.argnames):
+                fixturedef.argnames += ("request",)
+            try:
+                return (yield)
+            finally:
+                fixturedef.func = func
+                fixturedef.argnames = original_argname
+    return (yield)
+@pytest.hookimpl(tryfirst=True)
+def pytest_pycollect_makeitem(collector: Any, name: Any, obj: Any) -> None:
+    if collector.istestfunction(obj, name):
+        inner_func = obj.hypothesis.inner_test if hasattr(obj, "hypothesis") else obj
+        if iscoroutinefunction(inner_func):
+            marker = collector.get_closest_marker("anyio")
+            own_markers = getattr(obj, "pytestmark", ())
+            if marker or any(marker.name == "anyio" for marker in own_markers):
+                pytest.mark.usefixtures("anyio_backend")(obj)
+@pytest.hookimpl(tryfirst=True)
+def pytest_pyfunc_call(pyfuncitem: Any) -> bool | None:
+    def run_with_hypothesis(**kwargs: Any) -> None:
+        with get_runner(backend_name, backend_options) as runner:
+            runner.run_test(original_func, kwargs)
+    backend = pyfuncitem.funcargs.get("anyio_backend")
+    if backend:
+        backend_name, backend_options = extract_backend_and_options(backend)
+        if hasattr(pyfuncitem.obj, "hypothesis"):
+            # Wrap the inner test function unless it's already wrapped
+            original_func = pyfuncitem.obj.hypothesis.inner_test
+            if original_func.__qualname__ != run_with_hypothesis.__qualname__:
+                if iscoroutinefunction(original_func):
+                    pyfuncitem.obj.hypothesis.inner_test = run_with_hypothesis
+            return None
+        if iscoroutinefunction(pyfuncitem.obj):
+            funcargs = pyfuncitem.funcargs
+            testargs = {arg: funcargs[arg] for arg in pyfuncitem._fixtureinfo.argnames}
+            with get_runner(backend_name, backend_options) as runner:
+                try:
+                    runner.run_test(pyfuncitem.obj, testargs)
+                except ExceptionGroup as excgrp:
+                    for exc in iterate_exceptions(excgrp):
+                        if isinstance(exc, (Exit, KeyboardInterrupt, SystemExit)):
+                            raise exc from excgrp
+                    raise
+            return True
+    return None
+@pytest.fixture(scope="module", params=get_all_backends())
+def anyio_backend(request: Any) -> Any:
+    return request.param
+@pytest.fixture
+def anyio_backend_name(anyio_backend: Any) -> str:
+    if isinstance(anyio_backend, str):
+        return anyio_backend
+    else:
+        return anyio_backend[0]
+@pytest.fixture
+def anyio_backend_options(anyio_backend: Any) -> dict[str, Any]:
+    if isinstance(anyio_backend, str):
+        return {}
+    else:
+        return anyio_backend[1]

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_interpreter.py ADDED Viewed

	@@ -0,0 +1,218 @@

+from __future__ import annotations
+import atexit
+import os
+import pickle
+import sys
+from collections import deque
+from collections.abc import Callable
+from textwrap import dedent
+from typing import Any, Final, TypeVar
+from . import current_time, to_thread
+from ._core._exceptions import BrokenWorkerIntepreter
+from ._core._synchronization import CapacityLimiter
+from .lowlevel import RunVar
+if sys.version_info >= (3, 11):
+    from typing import TypeVarTuple, Unpack
+else:
+    from typing_extensions import TypeVarTuple, Unpack
+UNBOUND: Final = 2  # I have no clue how this works, but it was used in the stdlib
+FMT_UNPICKLED: Final = 0
+FMT_PICKLED: Final = 1
+DEFAULT_CPU_COUNT: Final = 8  # this is just an arbitrarily selected value
+MAX_WORKER_IDLE_TIME = (
+    30  # seconds a subinterpreter can be idle before becoming eligible for pruning
+)
+T_Retval = TypeVar("T_Retval")
+PosArgsT = TypeVarTuple("PosArgsT")
+_idle_workers = RunVar[deque["Worker"]]("_available_workers")
+_default_interpreter_limiter = RunVar[CapacityLimiter]("_default_interpreter_limiter")
+class Worker:
+    _run_func = compile(
+        dedent("""
+        import _interpqueues as queues
+        import _interpreters as interpreters
+        from pickle import loads, dumps, HIGHEST_PROTOCOL
+        item = queues.get(queue_id)[0]
+        try:
+            func, args = loads(item)
+            retval = func(*args)
+        except BaseException as exc:
+            is_exception = True
+            retval = exc
+        else:
+            is_exception = False
+        try:
+            queues.put(queue_id, (retval, is_exception), FMT_UNPICKLED, UNBOUND)
+        except interpreters.NotShareableError:
+            retval = dumps(retval, HIGHEST_PROTOCOL)
+            queues.put(queue_id, (retval, is_exception), FMT_PICKLED, UNBOUND)
+        """),
+        "<string>",
+        "exec",
+    )
+    last_used: float = 0
+    _initialized: bool = False
+    _interpreter_id: int
+    _queue_id: int
+    def initialize(self) -> None:
+        import _interpqueues as queues
+        import _interpreters as interpreters
+        self._interpreter_id = interpreters.create()
+        self._queue_id = queues.create(2, FMT_UNPICKLED, UNBOUND)  # type: ignore[call-arg]
+        self._initialized = True
+        interpreters.set___main___attrs(
+            self._interpreter_id,
+            {
+                "queue_id": self._queue_id,
+                "FMT_PICKLED": FMT_PICKLED,
+                "FMT_UNPICKLED": FMT_UNPICKLED,
+                "UNBOUND": UNBOUND,
+            },
+        )
+    def destroy(self) -> None:
+        import _interpqueues as queues
+        import _interpreters as interpreters
+        if self._initialized:
+            interpreters.destroy(self._interpreter_id)
+            queues.destroy(self._queue_id)
+    def _call(
+        self,
+        func: Callable[..., T_Retval],
+        args: tuple[Any],
+    ) -> tuple[Any, bool]:
+        import _interpqueues as queues
+        import _interpreters as interpreters
+        if not self._initialized:
+            self.initialize()
+        payload = pickle.dumps((func, args), pickle.HIGHEST_PROTOCOL)
+        queues.put(self._queue_id, payload, FMT_PICKLED, UNBOUND)  # type: ignore[call-arg]
+        res: Any
+        is_exception: bool
+        if exc_info := interpreters.exec(self._interpreter_id, self._run_func):  # type: ignore[func-returns-value,arg-type]
+            raise BrokenWorkerIntepreter(exc_info)
+        (res, is_exception), fmt = queues.get(self._queue_id)[:2]
+        if fmt == FMT_PICKLED:
+            res = pickle.loads(res)
+        return res, is_exception
+    async def call(
+        self,
+        func: Callable[..., T_Retval],
+        args: tuple[Any],
+        limiter: CapacityLimiter,
+    ) -> T_Retval:
+        result, is_exception = await to_thread.run_sync(
+            self._call,
+            func,
+            args,
+            limiter=limiter,
+        )
+        if is_exception:
+            raise result
+        return result
+def _stop_workers(workers: deque[Worker]) -> None:
+    for worker in workers:
+        worker.destroy()
+    workers.clear()
+async def run_sync(
+    func: Callable[[Unpack[PosArgsT]], T_Retval],
+    *args: Unpack[PosArgsT],
+    limiter: CapacityLimiter | None = None,
+) -> T_Retval:
+    """
+    Call the given function with the given arguments in a subinterpreter.
+    If the ``cancellable`` option is enabled and the task waiting for its completion is
+    cancelled, the call will still run its course but its return value (or any raised
+    exception) will be ignored.
+    .. warning:: This feature is **experimental**. The upstream interpreter API has not
+        yet been finalized or thoroughly tested, so don't rely on this for anything
+        mission critical.
+    :param func: a callable
+    :param args: positional arguments for the callable
+    :param limiter: capacity limiter to use to limit the total amount of subinterpreters
+        running (if omitted, the default limiter is used)
+    :return: the result of the call
+    :raises BrokenWorkerIntepreter: if there's an internal error in a subinterpreter
+    """
+    if sys.version_info <= (3, 13):
+        raise RuntimeError("subinterpreters require at least Python 3.13")
+    if limiter is None:
+        limiter = current_default_interpreter_limiter()
+    try:
+        idle_workers = _idle_workers.get()
+    except LookupError:
+        idle_workers = deque()
+        _idle_workers.set(idle_workers)
+        atexit.register(_stop_workers, idle_workers)
+    async with limiter:
+        try:
+            worker = idle_workers.pop()
+        except IndexError:
+            worker = Worker()
+    try:
+        return await worker.call(func, args, limiter)
+    finally:
+        # Prune workers that have been idle for too long
+        now = current_time()
+        while idle_workers:
+            if now - idle_workers[0].last_used <= MAX_WORKER_IDLE_TIME:
+                break
+            await to_thread.run_sync(idle_workers.popleft().destroy, limiter=limiter)
+        worker.last_used = current_time()
+        idle_workers.append(worker)
+def current_default_interpreter_limiter() -> CapacityLimiter:
+    """
+    Return the capacity limiter that is used by default to limit the number of
+    concurrently running subinterpreters.
+    Defaults to the number of CPU cores.
+    :return: a capacity limiter object
+    """
+    try:
+        return _default_interpreter_limiter.get()
+    except LookupError:
+        limiter = CapacityLimiter(os.cpu_count() or DEFAULT_CPU_COUNT)
+        _default_interpreter_limiter.set(limiter)
+        return limiter

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_process.py ADDED Viewed

	@@ -0,0 +1,258 @@

+from __future__ import annotations
+import os
+import pickle
+import subprocess
+import sys
+from collections import deque
+from collections.abc import Callable
+from importlib.util import module_from_spec, spec_from_file_location
+from typing import TypeVar, cast
+from ._core._eventloop import current_time, get_async_backend, get_cancelled_exc_class
+from ._core._exceptions import BrokenWorkerProcess
+from ._core._subprocesses import open_process
+from ._core._synchronization import CapacityLimiter
+from ._core._tasks import CancelScope, fail_after
+from .abc import ByteReceiveStream, ByteSendStream, Process
+from .lowlevel import RunVar, checkpoint_if_cancelled
+from .streams.buffered import BufferedByteReceiveStream
+if sys.version_info >= (3, 11):
+    from typing import TypeVarTuple, Unpack
+else:
+    from typing_extensions import TypeVarTuple, Unpack
+WORKER_MAX_IDLE_TIME = 300  # 5 minutes
+T_Retval = TypeVar("T_Retval")
+PosArgsT = TypeVarTuple("PosArgsT")
+_process_pool_workers: RunVar[set[Process]] = RunVar("_process_pool_workers")
+_process_pool_idle_workers: RunVar[deque[tuple[Process, float]]] = RunVar(
+    "_process_pool_idle_workers"
+)
+_default_process_limiter: RunVar[CapacityLimiter] = RunVar("_default_process_limiter")
+async def run_sync(  # type: ignore[return]
+    func: Callable[[Unpack[PosArgsT]], T_Retval],
+    *args: Unpack[PosArgsT],
+    cancellable: bool = False,
+    limiter: CapacityLimiter | None = None,
+) -> T_Retval:
+    """
+    Call the given function with the given arguments in a worker process.
+    If the ``cancellable`` option is enabled and the task waiting for its completion is
+    cancelled, the worker process running it will be abruptly terminated using SIGKILL
+    (or ``terminateProcess()`` on Windows).
+    :param func: a callable
+    :param args: positional arguments for the callable
+    :param cancellable: ``True`` to allow cancellation of the operation while it's
+        running
+    :param limiter: capacity limiter to use to limit the total amount of processes
+        running (if omitted, the default limiter is used)
+    :return: an awaitable that yields the return value of the function.
+    """
+    async def send_raw_command(pickled_cmd: bytes) -> object:
+        try:
+            await stdin.send(pickled_cmd)
+            response = await buffered.receive_until(b"\n", 50)
+            status, length = response.split(b" ")
+            if status not in (b"RETURN", b"EXCEPTION"):
+                raise RuntimeError(
+                    f"Worker process returned unexpected response: {response!r}"
+                )
+            pickled_response = await buffered.receive_exactly(int(length))
+        except BaseException as exc:
+            workers.discard(process)
+            try:
+                process.kill()
+                with CancelScope(shield=True):
+                    await process.aclose()
+            except ProcessLookupError:
+                pass
+            if isinstance(exc, get_cancelled_exc_class()):
+                raise
+            else:
+                raise BrokenWorkerProcess from exc
+        retval = pickle.loads(pickled_response)
+        if status == b"EXCEPTION":
+            assert isinstance(retval, BaseException)
+            raise retval
+        else:
+            return retval
+    # First pickle the request before trying to reserve a worker process
+    await checkpoint_if_cancelled()
+    request = pickle.dumps(("run", func, args), protocol=pickle.HIGHEST_PROTOCOL)
+    # If this is the first run in this event loop thread, set up the necessary variables
+    try:
+        workers = _process_pool_workers.get()
+        idle_workers = _process_pool_idle_workers.get()
+    except LookupError:
+        workers = set()
+        idle_workers = deque()
+        _process_pool_workers.set(workers)
+        _process_pool_idle_workers.set(idle_workers)
+        get_async_backend().setup_process_pool_exit_at_shutdown(workers)
+    async with limiter or current_default_process_limiter():
+        # Pop processes from the pool (starting from the most recently used) until we
+        # find one that hasn't exited yet
+        process: Process
+        while idle_workers:
+            process, idle_since = idle_workers.pop()
+            if process.returncode is None:
+                stdin = cast(ByteSendStream, process.stdin)
+                buffered = BufferedByteReceiveStream(
+                    cast(ByteReceiveStream, process.stdout)
+                )
+                # Prune any other workers that have been idle for WORKER_MAX_IDLE_TIME
+                # seconds or longer
+                now = current_time()
+                killed_processes: list[Process] = []
+                while idle_workers:
+                    if now - idle_workers[0][1] < WORKER_MAX_IDLE_TIME:
+                        break
+                    process_to_kill, idle_since = idle_workers.popleft()
+                    process_to_kill.kill()
+                    workers.remove(process_to_kill)
+                    killed_processes.append(process_to_kill)
+                with CancelScope(shield=True):
+                    for killed_process in killed_processes:
+                        await killed_process.aclose()
+                break
+            workers.remove(process)
+        else:
+            command = [sys.executable, "-u", "-m", __name__]
+            process = await open_process(
+                command, stdin=subprocess.PIPE, stdout=subprocess.PIPE
+            )
+            try:
+                stdin = cast(ByteSendStream, process.stdin)
+                buffered = BufferedByteReceiveStream(
+                    cast(ByteReceiveStream, process.stdout)
+                )
+                with fail_after(20):
+                    message = await buffered.receive(6)
+                if message != b"READY\n":
+                    raise BrokenWorkerProcess(
+                        f"Worker process returned unexpected response: {message!r}"
+                    )
+                main_module_path = getattr(sys.modules["__main__"], "__file__", None)
+                pickled = pickle.dumps(
+                    ("init", sys.path, main_module_path),
+                    protocol=pickle.HIGHEST_PROTOCOL,
+                )
+                await send_raw_command(pickled)
+            except (BrokenWorkerProcess, get_cancelled_exc_class()):
+                raise
+            except BaseException as exc:
+                process.kill()
+                raise BrokenWorkerProcess(
+                    "Error during worker process initialization"
+                ) from exc
+            workers.add(process)
+        with CancelScope(shield=not cancellable):
+            try:
+                return cast(T_Retval, await send_raw_command(request))
+            finally:
+                if process in workers:
+                    idle_workers.append((process, current_time()))
+def current_default_process_limiter() -> CapacityLimiter:
+    """
+    Return the capacity limiter that is used by default to limit the number of worker
+    processes.
+    :return: a capacity limiter object
+    """
+    try:
+        return _default_process_limiter.get()
+    except LookupError:
+        limiter = CapacityLimiter(os.cpu_count() or 2)
+        _default_process_limiter.set(limiter)
+        return limiter
+def process_worker() -> None:
+    # Redirect standard streams to os.devnull so that user code won't interfere with the
+    # parent-worker communication
+    stdin = sys.stdin
+    stdout = sys.stdout
+    sys.stdin = open(os.devnull)
+    sys.stdout = open(os.devnull, "w")
+    stdout.buffer.write(b"READY\n")
+    while True:
+        retval = exception = None
+        try:
+            command, *args = pickle.load(stdin.buffer)
+        except EOFError:
+            return
+        except BaseException as exc:
+            exception = exc
+        else:
+            if command == "run":
+                func, args = args
+                try:
+                    retval = func(*args)
+                except BaseException as exc:
+                    exception = exc
+            elif command == "init":
+                main_module_path: str | None
+                sys.path, main_module_path = args
+                del sys.modules["__main__"]
+                if main_module_path and os.path.isfile(main_module_path):
+                    # Load the parent's main module but as __mp_main__ instead of
+                    # __main__ (like multiprocessing does) to avoid infinite recursion
+                    try:
+                        spec = spec_from_file_location("__mp_main__", main_module_path)
+                        if spec and spec.loader:
+                            main = module_from_spec(spec)
+                            spec.loader.exec_module(main)
+                            sys.modules["__main__"] = main
+                    except BaseException as exc:
+                        exception = exc
+        try:
+            if exception is not None:
+                status = b"EXCEPTION"
+                pickled = pickle.dumps(exception, pickle.HIGHEST_PROTOCOL)
+            else:
+                status = b"RETURN"
+                pickled = pickle.dumps(retval, pickle.HIGHEST_PROTOCOL)
+        except BaseException as exc:
+            exception = exc
+            status = b"EXCEPTION"
+            pickled = pickle.dumps(exc, pickle.HIGHEST_PROTOCOL)
+        stdout.buffer.write(b"%s %d\n" % (status, len(pickled)))
+        stdout.buffer.write(pickled)
+        # Respect SIGTERM
+        if isinstance(exception, SystemExit):
+            raise exception
+if __name__ == "__main__":
+    process_worker()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/anyio/to_thread.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from __future__ import annotations
+import sys
+from collections.abc import Callable
+from typing import TypeVar
+from warnings import warn
+from ._core._eventloop import get_async_backend
+from .abc import CapacityLimiter
+if sys.version_info >= (3, 11):
+    from typing import TypeVarTuple, Unpack
+else:
+    from typing_extensions import TypeVarTuple, Unpack
+T_Retval = TypeVar("T_Retval")
+PosArgsT = TypeVarTuple("PosArgsT")
+async def run_sync(
+    func: Callable[[Unpack[PosArgsT]], T_Retval],
+    *args: Unpack[PosArgsT],
+    abandon_on_cancel: bool = False,
+    cancellable: bool | None = None,
+    limiter: CapacityLimiter | None = None,
+) -> T_Retval:
+    """
+    Call the given function with the given arguments in a worker thread.
+    If the ``cancellable`` option is enabled and the task waiting for its completion is
+    cancelled, the thread will still run its course but its return value (or any raised
+    exception) will be ignored.
+    :param func: a callable
+    :param args: positional arguments for the callable
+    :param abandon_on_cancel: ``True`` to abandon the thread (leaving it to run
+        unchecked on own) if the host task is cancelled, ``False`` to ignore
+        cancellations in the host task until the operation has completed in the worker
+        thread
+    :param cancellable: deprecated alias of ``abandon_on_cancel``; will override
+        ``abandon_on_cancel`` if both parameters are passed
+    :param limiter: capacity limiter to use to limit the total amount of threads running
+        (if omitted, the default limiter is used)
+    :return: an awaitable that yields the return value of the function.
+    """
+    if cancellable is not None:
+        abandon_on_cancel = cancellable
+        warn(
+            "The `cancellable=` keyword argument to `anyio.to_thread.run_sync` is "
+            "deprecated since AnyIO 4.1.0; use `abandon_on_cancel=` instead",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+    return await get_async_backend().run_sync_in_worker_thread(
+        func, args, abandon_on_cancel=abandon_on_cancel, limiter=limiter
+    )
+def current_default_thread_limiter() -> CapacityLimiter:
+    """
+    Return the capacity limiter that is used by default to limit the number of
+    concurrent threads.
+    :return: a capacity limiter object
+    """
+    return get_async_backend().current_default_thread_limiter()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/_compat.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from __future__ import annotations
+from collections.abc import Mapping
+import sys
+from typing import Any
+DATACLASS_KWARGS: Mapping[str, Any]
+if sys.version_info >= (3, 10):
+    DATACLASS_KWARGS = {"slots": True}
+else:
+    DATACLASS_KWARGS = {}

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/parser_inline.py ADDED Viewed

	@@ -0,0 +1,147 @@

+"""Tokenizes paragraph content.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Callable
+from . import rules_inline
+from .ruler import Ruler
+from .rules_inline.state_inline import StateInline
+from .token import Token
+from .utils import EnvType
+if TYPE_CHECKING:
+    from markdown_it import MarkdownIt
+# Parser rules
+RuleFuncInlineType = Callable[[StateInline, bool], bool]
+"""(state: StateInline, silent: bool) -> matched: bool)
+`silent` disables token generation, useful for lookahead.
+"""
+_rules: list[tuple[str, RuleFuncInlineType]] = [
+    ("text", rules_inline.text),
+    ("linkify", rules_inline.linkify),
+    ("newline", rules_inline.newline),
+    ("escape", rules_inline.escape),
+    ("backticks", rules_inline.backtick),
+    ("strikethrough", rules_inline.strikethrough.tokenize),
+    ("emphasis", rules_inline.emphasis.tokenize),
+    ("link", rules_inline.link),
+    ("image", rules_inline.image),
+    ("autolink", rules_inline.autolink),
+    ("html_inline", rules_inline.html_inline),
+    ("entity", rules_inline.entity),
+]
+# Note `rule2` ruleset was created specifically for emphasis/strikethrough
+# post-processing and may be changed in the future.
+#
+# Don't use this for anything except pairs (plugins working with `balance_pairs`).
+#
+RuleFuncInline2Type = Callable[[StateInline], None]
+_rules2: list[tuple[str, RuleFuncInline2Type]] = [
+    ("balance_pairs", rules_inline.link_pairs),
+    ("strikethrough", rules_inline.strikethrough.postProcess),
+    ("emphasis", rules_inline.emphasis.postProcess),
+    # rules for pairs separate '**' into its own text tokens, which may be left unused,
+    # rule below merges unused segments back with the rest of the text
+    ("fragments_join", rules_inline.fragments_join),
+]
+class ParserInline:
+    def __init__(self) -> None:
+        self.ruler = Ruler[RuleFuncInlineType]()
+        for name, rule in _rules:
+            self.ruler.push(name, rule)
+        # Second ruler used for post-processing (e.g. in emphasis-like rules)
+        self.ruler2 = Ruler[RuleFuncInline2Type]()
+        for name, rule2 in _rules2:
+            self.ruler2.push(name, rule2)
+    def skipToken(self, state: StateInline) -> None:
+        """Skip single token by running all rules in validation mode;
+        returns `True` if any rule reported success
+        """
+        ok = False
+        pos = state.pos
+        rules = self.ruler.getRules("")
+        maxNesting = state.md.options["maxNesting"]
+        cache = state.cache
+        if pos in cache:
+            state.pos = cache[pos]
+            return
+        if state.level < maxNesting:
+            for rule in rules:
+                #  Increment state.level and decrement it later to limit recursion.
+                # It's harmless to do here, because no tokens are created.
+                # But ideally, we'd need a separate private state variable for this purpose.
+                state.level += 1
+                ok = rule(state, True)
+                state.level -= 1
+                if ok:
+                    break
+        else:
+            # Too much nesting, just skip until the end of the paragraph.
+            #
+            # NOTE: this will cause links to behave incorrectly in the following case,
+            #       when an amount of `[` is exactly equal to `maxNesting + 1`:
+            #
+            #       [[[[[[[[[[[[[[[[[[[[[foo]()
+            #
+            # TODO: remove this workaround when CM standard will allow nested links
+            #       (we can replace it by preventing links from being parsed in
+            #       validation mode)
+            #
+            state.pos = state.posMax
+        if not ok:
+            state.pos += 1
+        cache[pos] = state.pos
+    def tokenize(self, state: StateInline) -> None:
+        """Generate tokens for input range."""
+        ok = False
+        rules = self.ruler.getRules("")
+        end = state.posMax
+        maxNesting = state.md.options["maxNesting"]
+        while state.pos < end:
+            # Try all possible rules.
+            # On success, rule should:
+            #
+            # - update `state.pos`
+            # - update `state.tokens`
+            # - return true
+            if state.level < maxNesting:
+                for rule in rules:
+                    ok = rule(state, False)
+                    if ok:
+                        break
+            if ok:
+                if state.pos >= end:
+                    break
+                continue
+            state.pending += state.src[state.pos]
+            state.pos += 1
+        if state.pending:
+            state.pushPending()
+    def parse(
+        self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
+    ) -> list[Token]:
+        """Process input string and push inline tokens into `tokens`"""
+        state = StateInline(src, md, env, tokens)
+        self.tokenize(state)
+        rules2 = self.ruler2.getRules("")
+        for rule in rules2:
+            rule(state)
+        return state.tokens

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/port.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+- package: markdown-it/markdown-it
+  version: 13.0.1
+  commit: e843acc9edad115cbf8cf85e676443f01658be08
+  date: May 3, 2022
+  notes:
+    - Rename variables that use python built-in names, e.g.
+      - `max` -> `maximum`
+      - `len` -> `length`
+      - `str` -> `string`
+    - |
+      Convert JS `for` loops to `while` loops
+      this is generally the main difference between the codes,
+      because in python you can't do e.g. `for {i=1;i<x;i++} {}`
+    - |
+      `env` is a common Python dictionary, and so does not have attribute access to keys,
+      as with JavaScript dictionaries.
+      `options` have attribute access only to core markdownit configuration options
+    - |
+      `Token.attrs` is a dictionary, instead of a list of lists.
+      Upstream the list format is only used to guarantee order: https://github.com/markdown-it/markdown-it/issues/142,
+      but in Python 3.7+ order of dictionaries is guaranteed.
+      One should anyhow use the `attrGet`, `attrSet`, `attrPush` and `attrJoin` methods
+      to manipulate `Token.attrs`, which have an identical signature to those upstream.
+    - Use python version of `charCodeAt`
+    - |
+      Use `str` units instead of `int`s to represent Unicode codepoints.
+      This provides a significant performance boost
+    - |
+      In markdown_it/rules_block/reference.py,
+      record line range in state.env["references"] and add state.env["duplicate_refs"]
+      This is to allow renderers to report on issues regarding references
+    - |
+      The `MarkdownIt.__init__` signature is slightly different for updating options,
+      since you must always specify the config first, e.g.
+      use `MarkdownIt("commonmark", {"html": False})` instead of `MarkdownIt({"html": False})`
+    - The default configuration preset for `MarkdownIt` is "commonmark" not "default"
+    - Allow custom renderer to be passed to `MarkdownIt`
+    - |
+      change render method signatures
+      `func(tokens, idx, options, env, slf)` to
+      `func(self, tokens, idx, options, env)`
+    - |
+      Extensions add render methods by format
+      `MarkdownIt.add_render_rule(name, function, fmt="html")`,
+      rather than `MarkdownIt.renderer.rules[name] = function`
+      and renderers should declare a class property `__output__ = "html"`.
+      This allows for extensibility to more than just HTML renderers
+    - inline tokens in tables are assigned a map (this is helpful for propagation to children)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/markdown_it/token.py ADDED Viewed

	@@ -0,0 +1,180 @@

+from __future__ import annotations
+from collections.abc import Callable, MutableMapping
+import dataclasses as dc
+from typing import Any, Literal
+import warnings
+from markdown_it._compat import DATACLASS_KWARGS
+def convert_attrs(value: Any) -> Any:
+    """Convert Token.attrs set as ``None`` or ``[[key, value], ...]`` to a dict.
+    This improves compatibility with upstream markdown-it.
+    """
+    if not value:
+        return {}
+    if isinstance(value, list):
+        return dict(value)
+    return value
+@dc.dataclass(**DATACLASS_KWARGS)
+class Token:
+    type: str
+    """Type of the token (string, e.g. "paragraph_open")"""
+    tag: str
+    """HTML tag name, e.g. 'p'"""
+    nesting: Literal[-1, 0, 1]
+    """Level change (number in {-1, 0, 1} set), where:
+    -  `1` means the tag is opening
+    -  `0` means the tag is self-closing
+    - `-1` means the tag is closing
+    """
+    attrs: dict[str, str | int | float] = dc.field(default_factory=dict)
+    """HTML attributes.
+    Note this differs from the upstream "list of lists" format,
+    although than an instance can still be initialised with this format.
+    """
+    map: list[int] | None = None
+    """Source map info. Format: `[ line_begin, line_end ]`"""
+    level: int = 0
+    """Nesting level, the same as `state.level`"""
+    children: list[Token] | None = None
+    """Array of child nodes (inline and img tokens)."""
+    content: str = ""
+    """Inner content, in the case of a self-closing tag (code, html, fence, etc.),"""
+    markup: str = ""
+    """'*' or '_' for emphasis, fence string for fence, etc."""
+    info: str = ""
+    """Additional information:
+    - Info string for "fence" tokens
+    - The value "auto" for autolink "link_open" and "link_close" tokens
+    - The string value of the item marker for ordered-list "list_item_open" tokens
+    """
+    meta: dict[Any, Any] = dc.field(default_factory=dict)
+    """A place for plugins to store any arbitrary data"""
+    block: bool = False
+    """True for block-level tokens, false for inline tokens.
+    Used in renderer to calculate line breaks
+    """
+    hidden: bool = False
+    """If true, ignore this element when rendering.
+    Used for tight lists to hide paragraphs.
+    """
+    def __post_init__(self) -> None:
+        self.attrs = convert_attrs(self.attrs)
+    def attrIndex(self, name: str) -> int:
+        warnings.warn(  # noqa: B028
+            "Token.attrIndex should not be used, since Token.attrs is a dictionary",
+            UserWarning,
+        )
+        if name not in self.attrs:
+            return -1
+        return list(self.attrs.keys()).index(name)
+    def attrItems(self) -> list[tuple[str, str | int | float]]:
+        """Get (key, value) list of attrs."""
+        return list(self.attrs.items())
+    def attrPush(self, attrData: tuple[str, str | int | float]) -> None:
+        """Add `[ name, value ]` attribute to list. Init attrs if necessary."""
+        name, value = attrData
+        self.attrSet(name, value)
+    def attrSet(self, name: str, value: str | int | float) -> None:
+        """Set `name` attribute to `value`. Override old value if exists."""
+        self.attrs[name] = value
+    def attrGet(self, name: str) -> None | str | int | float:
+        """Get the value of attribute `name`, or null if it does not exist."""
+        return self.attrs.get(name, None)
+    def attrJoin(self, name: str, value: str) -> None:
+        """Join value to existing attribute via space.
+        Or create new attribute if not exists.
+        Useful to operate with token classes.
+        """
+        if name in self.attrs:
+            current = self.attrs[name]
+            if not isinstance(current, str):
+                raise TypeError(
+                    f"existing attr 'name' is not a str: {self.attrs[name]}"
+                )
+            self.attrs[name] = f"{current} {value}"
+        else:
+            self.attrs[name] = value
+    def copy(self, **changes: Any) -> Token:
+        """Return a shallow copy of the instance."""
+        return dc.replace(self, **changes)
+    def as_dict(
+        self,
+        *,
+        children: bool = True,
+        as_upstream: bool = True,
+        meta_serializer: Callable[[dict[Any, Any]], Any] | None = None,
+        filter: Callable[[str, Any], bool] | None = None,
+        dict_factory: Callable[..., MutableMapping[str, Any]] = dict,
+    ) -> MutableMapping[str, Any]:
+        """Return the token as a dictionary.
+        :param children: Also convert children to dicts
+        :param as_upstream: Ensure the output dictionary is equal to that created by markdown-it
+            For example, attrs are converted to null or lists
+        :param meta_serializer: hook for serializing ``Token.meta``
+        :param filter: A callable whose return code determines whether an
+            attribute or element is included (``True``) or dropped (``False``).
+            Is called with the (key, value) pair.
+        :param dict_factory: A callable to produce dictionaries from.
+            For example, to produce ordered dictionaries instead of normal Python
+            dictionaries, pass in ``collections.OrderedDict``.
+        """
+        mapping = dict_factory((f.name, getattr(self, f.name)) for f in dc.fields(self))
+        if filter:
+            mapping = dict_factory((k, v) for k, v in mapping.items() if filter(k, v))
+        if as_upstream and "attrs" in mapping:
+            mapping["attrs"] = (
+                None
+                if not mapping["attrs"]
+                else [[k, v] for k, v in mapping["attrs"].items()]
+            )
+        if meta_serializer and "meta" in mapping:
+            mapping["meta"] = meta_serializer(mapping["meta"])
+        if children and mapping.get("children", None):
+            mapping["children"] = [
+                child.as_dict(
+                    children=children,
+                    filter=filter,
+                    dict_factory=dict_factory,
+                    as_upstream=as_upstream,
+                    meta_serializer=meta_serializer,
+                )
+                for child in mapping["children"]
+            ]
+        return mapping
+    @classmethod
+    def from_dict(cls, dct: MutableMapping[str, Any]) -> Token:
+        """Convert a dict to a Token."""
+        token = cls(**dct)
+        if token.children:
+            token.children = [cls.from_dict(c) for c in token.children]  # type: ignore[arg-type]
+        return token

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,317 @@

+Metadata-Version: 2.2
+Name: propcache
+Version: 0.3.0
+Summary: Accelerated property cache
+Home-page: https://github.com/aio-libs/propcache
+Author: Andrew Svetlov
+Author-email: andrew.svetlov@gmail.com
+Maintainer: aiohttp team <team@aiohttp.org>
+Maintainer-email: team@aiohttp.org
+License: Apache-2.0
+Project-URL: Chat: Matrix, https://matrix.to/#/#aio-libs:matrix.org
+Project-URL: Chat: Matrix Space, https://matrix.to/#/#aio-libs-space:matrix.org
+Project-URL: CI: GitHub Workflows, https://github.com/aio-libs/propcache/actions?query=branch:master
+Project-URL: Code of Conduct, https://github.com/aio-libs/.github/blob/master/CODE_OF_CONDUCT.md
+Project-URL: Coverage: codecov, https://codecov.io/github/aio-libs/propcache
+Project-URL: Docs: Changelog, https://propcache.readthedocs.io/en/latest/changes/
+Project-URL: Docs: RTD, https://propcache.readthedocs.io
+Project-URL: GitHub: issues, https://github.com/aio-libs/propcache/issues
+Project-URL: GitHub: repo, https://github.com/aio-libs/propcache
+Keywords: cython,cext,propcache
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Cython
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Internet :: WWW/HTTP
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.9
+Description-Content-Type: text/x-rst
+License-File: LICENSE
+License-File: NOTICE
+propcache
+=========
+The module provides a fast implementation of cached properties for Python 3.9+.
+.. image:: https://github.com/aio-libs/propcache/actions/workflows/ci-cd.yml/badge.svg
+  :target: https://github.com/aio-libs/propcache/actions?query=workflow%3ACI
+  :align: right
+.. image:: https://codecov.io/gh/aio-libs/propcache/branch/master/graph/badge.svg
+  :target: https://codecov.io/gh/aio-libs/propcache
+.. image:: https://badge.fury.io/py/propcache.svg
+    :target: https://badge.fury.io/py/propcache
+.. image:: https://readthedocs.org/projects/propcache/badge/?version=latest
+    :target: https://propcache.readthedocs.io
+.. image:: https://img.shields.io/pypi/pyversions/propcache.svg
+    :target: https://pypi.python.org/pypi/propcache
+.. image:: https://img.shields.io/matrix/aio-libs:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
+   :target: https://matrix.to/#/%23aio-libs:matrix.org
+   :alt: Matrix Room — #aio-libs:matrix.org
+.. image:: https://img.shields.io/matrix/aio-libs-space:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs-space%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat
+   :target: https://matrix.to/#/%23aio-libs-space:matrix.org
+   :alt: Matrix Space — #aio-libs-space:matrix.org
+Introduction
+------------
+The API is designed to be nearly identical to the built-in ``functools.cached_property`` class,
+except for the additional ``under_cached_property`` class which uses ``self._cache``
+instead of ``self.__dict__`` to store the cached values and prevents ``__set__`` from being called.
+For full documentation please read https://propcache.readthedocs.io.
+Installation
+------------
+::
+   $ pip install propcache
+The library is Python 3 only!
+PyPI contains binary wheels for Linux, Windows and MacOS.  If you want to install
+``propcache`` on another operating system where wheels are not provided,
+the the tarball will be used to compile the library from
+the source code. It requires a C compiler and and Python headers installed.
+To skip the compilation you must explicitly opt-in by using a PEP 517
+configuration setting ``pure-python``, or setting the ``PROPCACHE_NO_EXTENSIONS``
+environment variable to a non-empty value, e.g.:
+.. code-block:: console
+   $ pip install propcache --config-settings=pure-python=false
+Please note that the pure-Python (uncompiled) version is much slower. However,
+PyPy always uses a pure-Python implementation, and, as such, it is unaffected
+by this variable.
+API documentation
+------------------
+The documentation is located at https://propcache.readthedocs.io.
+Source code
+-----------
+The project is hosted on GitHub_
+Please file an issue on the `bug tracker
+<https://github.com/aio-libs/propcache/issues>`_ if you have found a bug
+or have some suggestion in order to improve the library.
+Discussion list
+---------------
+*aio-libs* google group: https://groups.google.com/forum/#!forum/aio-libs
+Feel free to post your questions and ideas here.
+Authors and License
+-------------------
+The ``propcache`` package is derived from ``yarl`` which is written by Andrew Svetlov.
+It's *Apache 2* licensed and freely available.
+.. _GitHub: https://github.com/aio-libs/propcache
+=========
+Changelog
+=========
+..
+    You should *NOT* be adding new change log entries to this file, this
+    file is managed by towncrier. You *may* edit previous change logs to
+    fix problems like typo corrections or such.
+    To add a new change log entry, please see
+    https://pip.pypa.io/en/latest/development/#adding-a-news-entry
+    we named the news folder "changes".
+    WARNING: Don't drop the next directive!
+.. towncrier release notes start
+0.3.0
+=====
+*(2025-02-20)*
+Features
+--------
+- Implemented support for the free-threaded build of CPython 3.13 -- by `@lysnikolaou <https://github.com/sponsors/lysnikolaou>`__.
+  *Related issues and pull requests on GitHub:*
+  `#84 <https://github.com/aio-libs/propcache/issues/84>`__.
+Packaging updates and notes for downstreams
+-------------------------------------------
+- Started building wheels for the free-threaded build of CPython 3.13 -- by `@lysnikolaou <https://github.com/sponsors/lysnikolaou>`__.
+  *Related issues and pull requests on GitHub:*
+  `#84 <https://github.com/aio-libs/propcache/issues/84>`__.
+Contributor-facing changes
+--------------------------
+- GitHub Actions CI/CD is now configured to manage caching pip-ecosystem
+  dependencies using `re-actors/cache-python-deps`_ -- an action by
+  `@webknjaz <https://github.com/sponsors/webknjaz>`__ that takes into account ABI stability and the exact
+  version of Python runtime.
+  .. _`re-actors/cache-python-deps`:
+     https://github.com/marketplace/actions/cache-python-deps
+  *Related issues and pull requests on GitHub:*
+  `#93 <https://github.com/aio-libs/propcache/issues/93>`__.
+----
+0.2.1
+=====
+*(2024-12-01)*
+Bug fixes
+---------
+- Stopped implicitly allowing the use of Cython pre-release versions when
+  building the distribution package -- by `@ajsanchezsanz <https://github.com/sponsors/ajsanchezsanz>`__ and
+  `@markgreene74 <https://github.com/sponsors/markgreene74>`__.
+  *Related commits on GitHub:*
+  `64df0a6 <https://github.com/aio-libs/propcache/commit/64df0a6>`__.
+- Fixed ``wrapped`` and ``func`` not being accessible in the Cython versions of ``propcache.api.cached_property`` and ``propcache.api.under_cached_property`` decorators -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
+  *Related issues and pull requests on GitHub:*
+  `#72 <https://github.com/aio-libs/propcache/issues/72>`__.
+Removals and backward incompatible breaking changes
+---------------------------------------------------
+- Removed support for Python 3.8 as it has reached end of life -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
+  *Related issues and pull requests on GitHub:*
+  `#57 <https://github.com/aio-libs/propcache/issues/57>`__.
+Packaging updates and notes for downstreams
+-------------------------------------------
+- Stopped implicitly allowing the use of Cython pre-release versions when
+  building the distribution package -- by `@ajsanchezsanz <https://github.com/sponsors/ajsanchezsanz>`__ and
+  `@markgreene74 <https://github.com/sponsors/markgreene74>`__.
+  *Related commits on GitHub:*
+  `64df0a6 <https://github.com/aio-libs/propcache/commit/64df0a6>`__.
+----
+0.2.0
+=====
+*(2024-10-07)*
+Bug fixes
+---------
+- Fixed loading the C-extensions on Python 3.8 -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
+  *Related issues and pull requests on GitHub:*
+  `#26 <https://github.com/aio-libs/propcache/issues/26>`__.
+Features
+--------
+- Improved typing for the ``propcache.api.under_cached_property`` decorator -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
+  *Related issues and pull requests on GitHub:*
+  `#38 <https://github.com/aio-libs/propcache/issues/38>`__.
+Improved documentation
+----------------------
+- Added API documentation for the ``propcache.api.cached_property`` and ``propcache.api.under_cached_property`` decorators -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
+  *Related issues and pull requests on GitHub:*
+  `#16 <https://github.com/aio-libs/propcache/issues/16>`__.
+Packaging updates and notes for downstreams
+-------------------------------------------
+- Moved ``propcache.api.under_cached_property`` and ``propcache.api.cached_property`` to `propcache.api` -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
+  Both decorators remain importable from the top-level package, however importing from `propcache.api` is now the recommended way to use them.
+  *Related issues and pull requests on GitHub:*
+  `#19 <https://github.com/aio-libs/propcache/issues/19>`__, `#24 <https://github.com/aio-libs/propcache/issues/24>`__, `#32 <https://github.com/aio-libs/propcache/issues/32>`__.
+- Converted project to use a src layout -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
+  *Related issues and pull requests on GitHub:*
+  `#22 <https://github.com/aio-libs/propcache/issues/22>`__, `#29 <https://github.com/aio-libs/propcache/issues/29>`__, `#37 <https://github.com/aio-libs/propcache/issues/37>`__.
+----
+0.1.0
+=====
+*(2024-10-03)*
+Features
+--------
+- Added ``armv7l`` wheels -- by `@bdraco <https://github.com/sponsors/bdraco>`__.
+  *Related issues and pull requests on GitHub:*
+  `#5 <https://github.com/aio-libs/propcache/issues/5>`__.
+----
+0.0.0
+=====
+*(2024-10-02)*
+- Initial release.

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/NOTICE ADDED Viewed

	@@ -0,0 +1,13 @@

+   Copyright 2016-2021, Andrew Svetlov and aio-libs team
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,18 @@

+propcache-0.3.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+propcache-0.3.0.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+propcache-0.3.0.dist-info/METADATA,sha256=EVZE30m1bHCrcoZ2RLtvfNkdXyHIInAIG39SNZEjLyE,10357
+propcache-0.3.0.dist-info/NOTICE,sha256=VtasbIEFwKUTBMIdsGDjYa-ajqCvmnXCOcKLXRNpODg,609
+propcache-0.3.0.dist-info/RECORD,,
+propcache-0.3.0.dist-info/WHEEL,sha256=siqMuoWpRueIZ87ijidBxnOwHeSOOcxNwYCs-pC4Yv0,151
+propcache-0.3.0.dist-info/top_level.txt,sha256=pVF_GbqSAITPMiX27kfU3QP9-ufhRvkADmudDxWdF3w,10
+propcache/__init__.py,sha256=82yOKjJMHwsj2IpsIfiuDumvBEOckaz2HB823YDJH4Y,965
+propcache/__pycache__/__init__.cpython-312.pyc,,
+propcache/__pycache__/_helpers.cpython-312.pyc,,
+propcache/__pycache__/_helpers_py.cpython-312.pyc,,
+propcache/__pycache__/api.cpython-312.pyc,,
+propcache/_helpers.py,sha256=8CnlWmfTM6RDbMvNDXwL-VMHWiwIUjG8nbeqmvRsbh8,1579
+propcache/_helpers_c.cpython-312-x86_64-linux-gnu.so,sha256=mqnzbvcdqj2KOhx9xGMMjatXLlEuB69xfnsCF5O9G8I,844512
+propcache/_helpers_c.pyx,sha256=9UqfhVrbbkiZDGtEPFEOfT7qghPjAkNtJpgI1JYUPao,2518
+propcache/_helpers_py.py,sha256=jnK6W43iETLcW-A1WMroGUKnElzX8Drw2UQfbEqLlI8,1637
+propcache/api.py,sha256=wvgB-ypkkI5uf72VVYl2NFGc_TnzUQA2CxC7dTlL5ak,179
+propcache/py.typed,sha256=ay5OMO475PlcZ_Fbun9maHW7Y6MBTk0UXL4ztHx3Iug,14

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,6 @@

+Wheel-Version: 1.0
+Generator: setuptools (75.8.0)
+Root-Is-Purelib: false
+Tag: cp312-cp312-manylinux_2_17_x86_64
+Tag: cp312-cp312-manylinux2014_x86_64

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/propcache-0.3.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ propcache

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/__init__.py ADDED Viewed

	@@ -0,0 +1,437 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# flake8: noqa
+"""
+PyArrow is the python implementation of Apache Arrow.
+Apache Arrow is a cross-language development platform for in-memory data.
+It specifies a standardized language-independent columnar memory format for
+flat and hierarchical data, organized for efficient analytic operations on
+modern hardware. It also provides computational libraries and zero-copy
+streaming messaging and interprocess communication.
+For more information see the official page at https://arrow.apache.org
+"""
+import gc as _gc
+import importlib as _importlib
+import os as _os
+import platform as _platform
+import sys as _sys
+import warnings as _warnings
+try:
+    from ._generated_version import version as __version__
+except ImportError:
+    # Package is not installed, parse git tag at runtime
+    try:
+        import setuptools_scm
+        # Code duplicated from setup.py to avoid a dependency on each other
+        def parse_git(root, **kwargs):
+            """
+            Parse function for setuptools_scm that ignores tags for non-C++
+            subprojects, e.g. apache-arrow-js-XXX tags.
+            """
+            from setuptools_scm.git import parse
+            kwargs['describe_command'] = \
+                "git describe --dirty --tags --long --match 'apache-arrow-[0-9]*.*'"
+            return parse(root, **kwargs)
+        __version__ = setuptools_scm.get_version('../',
+                                                 parse=parse_git)
+    except ImportError:
+        __version__ = None
+# ARROW-8684: Disable GC while initializing Cython extension module,
+# to workaround Cython bug in https://github.com/cython/cython/issues/3603
+_gc_enabled = _gc.isenabled()
+_gc.disable()
+import pyarrow.lib as _lib
+if _gc_enabled:
+    _gc.enable()
+from pyarrow.lib import (BuildInfo, RuntimeInfo, set_timezone_db_path,
+                         MonthDayNano, VersionInfo, cpp_build_info,
+                         cpp_version, cpp_version_info, runtime_info,
+                         cpu_count, set_cpu_count, enable_signal_handlers,
+                         io_thread_count, set_io_thread_count)
+def show_versions():
+    """
+    Print various version information, to help with error reporting.
+    """
+    def print_entry(label, value):
+        print(f"{label: <26}: {value: <8}")
+    print("pyarrow version info\n--------------------")
+    print_entry("Package kind", cpp_build_info.package_kind
+                if len(cpp_build_info.package_kind) > 0
+                else "not indicated")
+    print_entry("Arrow C++ library version", cpp_build_info.version)
+    print_entry("Arrow C++ compiler",
+                f"{cpp_build_info.compiler_id} {cpp_build_info.compiler_version}")
+    print_entry("Arrow C++ compiler flags", cpp_build_info.compiler_flags)
+    print_entry("Arrow C++ git revision", cpp_build_info.git_id)
+    print_entry("Arrow C++ git description", cpp_build_info.git_description)
+    print_entry("Arrow C++ build type", cpp_build_info.build_type)
+def _module_is_available(module):
+    try:
+        _importlib.import_module(f'pyarrow.{module}')
+    except ImportError:
+        return False
+    else:
+        return True
+def _filesystem_is_available(fs):
+    try:
+        import pyarrow.fs
+    except ImportError:
+        return False
+    try:
+        getattr(pyarrow.fs, fs)
+    except (ImportError, AttributeError):
+        return False
+    else:
+        return True
+def show_info():
+    """
+    Print detailed version and platform information, for error reporting
+    """
+    show_versions()
+    def print_entry(label, value):
+        print(f"  {label: <20}: {value: <8}")
+    print("\nPlatform:")
+    print_entry("OS / Arch", f"{_platform.system()} {_platform.machine()}")
+    print_entry("SIMD Level", runtime_info().simd_level)
+    print_entry("Detected SIMD Level", runtime_info().detected_simd_level)
+    pool = default_memory_pool()
+    print("\nMemory:")
+    print_entry("Default backend", pool.backend_name)
+    print_entry("Bytes allocated", f"{pool.bytes_allocated()} bytes")
+    print_entry("Max memory", f"{pool.max_memory()} bytes")
+    print_entry("Supported Backends", ', '.join(supported_memory_backends()))
+    print("\nOptional modules:")
+    modules = ["csv", "cuda", "dataset", "feather", "flight", "fs", "gandiva", "json",
+               "orc", "parquet"]
+    for module in modules:
+        status = "Enabled" if _module_is_available(module) else "-"
+        print(f"  {module: <20}: {status: <8}")
+    print("\nFilesystems:")
+    filesystems = ["AzureFileSystem", "GcsFileSystem",
+                   "HadoopFileSystem", "S3FileSystem"]
+    for fs in filesystems:
+        status = "Enabled" if _filesystem_is_available(fs) else "-"
+        print(f"  {fs: <20}: {status: <8}")
+    print("\nCompression Codecs:")
+    codecs = ["brotli", "bz2", "gzip", "lz4_frame", "lz4", "snappy", "zstd"]
+    for codec in codecs:
+        status = "Enabled" if Codec.is_available(codec) else "-"
+        print(f"  {codec: <20}: {status: <8}")
+from pyarrow.lib import (null, bool_,
+                         int8, int16, int32, int64,
+                         uint8, uint16, uint32, uint64,
+                         time32, time64, timestamp, date32, date64, duration,
+                         month_day_nano_interval,
+                         float16, float32, float64,
+                         binary, string, utf8, binary_view, string_view,
+                         large_binary, large_string, large_utf8,
+                         decimal32, decimal64, decimal128, decimal256,
+                         list_, large_list, list_view, large_list_view,
+                         map_, struct,
+                         union, sparse_union, dense_union,
+                         dictionary,
+                         run_end_encoded,
+                         bool8, fixed_shape_tensor, json_, opaque, uuid,
+                         field,
+                         type_for_alias,
+                         DataType, DictionaryType, StructType,
+                         ListType, LargeListType, FixedSizeListType,
+                         ListViewType, LargeListViewType,
+                         MapType, UnionType, SparseUnionType, DenseUnionType,
+                         TimestampType, Time32Type, Time64Type, DurationType,
+                         FixedSizeBinaryType,
+                         Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type,
+                         BaseExtensionType, ExtensionType,
+                         RunEndEncodedType, Bool8Type, FixedShapeTensorType,
+                         JsonType, OpaqueType, UuidType,
+                         PyExtensionType, UnknownExtensionType,
+                         register_extension_type, unregister_extension_type,
+                         DictionaryMemo,
+                         KeyValueMetadata,
+                         Field,
+                         Schema,
+                         schema,
+                         unify_schemas,
+                         Array, Tensor,
+                         array, chunked_array, record_batch, nulls, repeat,
+                         SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix,
+                         SparseCSFTensor,
+                         infer_type, from_numpy_dtype,
+                         NullArray,
+                         NumericArray, IntegerArray, FloatingPointArray,
+                         BooleanArray,
+                         Int8Array, UInt8Array,
+                         Int16Array, UInt16Array,
+                         Int32Array, UInt32Array,
+                         Int64Array, UInt64Array,
+                         HalfFloatArray, FloatArray, DoubleArray,
+                         ListArray, LargeListArray, FixedSizeListArray,
+                         ListViewArray, LargeListViewArray,
+                         MapArray, UnionArray,
+                         BinaryArray, StringArray,
+                         LargeBinaryArray, LargeStringArray,
+                         BinaryViewArray, StringViewArray,
+                         FixedSizeBinaryArray,
+                         DictionaryArray,
+                         Date32Array, Date64Array, TimestampArray,
+                         Time32Array, Time64Array, DurationArray,
+                         MonthDayNanoIntervalArray,
+                         Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
+                         StructArray, ExtensionArray,
+                         RunEndEncodedArray, Bool8Array, FixedShapeTensorArray,
+                         JsonArray, OpaqueArray, UuidArray,
+                         scalar, NA, _NULL as NULL, Scalar,
+                         NullScalar, BooleanScalar,
+                         Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
+                         UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
+                         HalfFloatScalar, FloatScalar, DoubleScalar,
+                         Decimal32Scalar, Decimal64Scalar, Decimal128Scalar, Decimal256Scalar,
+                         ListScalar, LargeListScalar, FixedSizeListScalar,
+                         ListViewScalar, LargeListViewScalar,
+                         Date32Scalar, Date64Scalar,
+                         Time32Scalar, Time64Scalar,
+                         TimestampScalar, DurationScalar,
+                         MonthDayNanoIntervalScalar,
+                         BinaryScalar, LargeBinaryScalar, BinaryViewScalar,
+                         StringScalar, LargeStringScalar, StringViewScalar,
+                         FixedSizeBinaryScalar, DictionaryScalar,
+                         MapScalar, StructScalar, UnionScalar,
+                         RunEndEncodedScalar, Bool8Scalar, ExtensionScalar,
+                         FixedShapeTensorScalar, JsonScalar, OpaqueScalar, UuidScalar)
+# Buffers, allocation
+from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
+                         default_cpu_memory_manager)
+from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
+                         Codec, compress, decompress, allocate_buffer)
+from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
+                         total_allocated_bytes, set_memory_pool,
+                         default_memory_pool, system_memory_pool,
+                         jemalloc_memory_pool, mimalloc_memory_pool,
+                         logging_memory_pool, proxy_memory_pool,
+                         log_memory_allocations, jemalloc_set_decay_ms,
+                         supported_memory_backends)
+# I/O
+from pyarrow.lib import (NativeFile, PythonFile,
+                         BufferedInputStream, BufferedOutputStream, CacheOptions,
+                         CompressedInputStream, CompressedOutputStream,
+                         TransformInputStream, transcoding_input_stream,
+                         FixedSizeBufferWriter,
+                         BufferReader, BufferOutputStream,
+                         OSFile, MemoryMappedFile, memory_map,
+                         create_memory_map, MockOutputStream,
+                         input_stream, output_stream,
+                         have_libhdfs)
+from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
+                         concat_arrays, concat_tables, TableGroupBy,
+                         RecordBatchReader, concat_batches)
+# Exceptions
+from pyarrow.lib import (ArrowCancelled,
+                         ArrowCapacityError,
+                         ArrowException,
+                         ArrowKeyError,
+                         ArrowIndexError,
+                         ArrowInvalid,
+                         ArrowIOError,
+                         ArrowMemoryError,
+                         ArrowNotImplementedError,
+                         ArrowTypeError,
+                         ArrowSerializationError)
+from pyarrow.ipc import serialize_pandas, deserialize_pandas
+import pyarrow.ipc as ipc
+import pyarrow.types as types
+# ----------------------------------------------------------------------
+# Deprecations
+from pyarrow.util import _deprecate_api, _deprecate_class
+# TODO: Deprecate these somehow in the pyarrow namespace
+from pyarrow.ipc import (Message, MessageReader, MetadataVersion,
+                         RecordBatchFileReader, RecordBatchFileWriter,
+                         RecordBatchStreamReader, RecordBatchStreamWriter)
+# ----------------------------------------------------------------------
+# Returning absolute path to the pyarrow include directory (if bundled, e.g. in
+# wheels)
+def get_include():
+    """
+    Return absolute path to directory containing Arrow C++ include
+    headers. Similar to numpy.get_include
+    """
+    return _os.path.join(_os.path.dirname(__file__), 'include')
+def _get_pkg_config_executable():
+    return _os.environ.get('PKG_CONFIG', 'pkg-config')
+def _has_pkg_config(pkgname):
+    import subprocess
+    try:
+        return subprocess.call([_get_pkg_config_executable(),
+                                '--exists', pkgname]) == 0
+    except FileNotFoundError:
+        return False
+def _read_pkg_config_variable(pkgname, cli_args):
+    import subprocess
+    cmd = [_get_pkg_config_executable(), pkgname] + cli_args
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+    out, err = proc.communicate()
+    if proc.returncode != 0:
+        raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
+    return out.rstrip().decode('utf8')
+def get_libraries():
+    """
+    Return list of library names to include in the `libraries` argument for C
+    or Cython extensions using pyarrow
+    """
+    return ['arrow_python', 'arrow']
+def create_library_symlinks():
+    """
+    With Linux and macOS wheels, the bundled shared libraries have an embedded
+    ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them
+    with -larrow won't work unless we create symlinks at locations like
+    site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses
+    prior problems we had with shipping two copies of the shared libraries to
+    permit third party projects like turbodbc to build their C++ extensions
+    against the pyarrow wheels.
+    This function must only be invoked once and only when the shared libraries
+    are bundled with the Python package, which should only apply to wheel-based
+    installs. It requires write access to the site-packages/pyarrow directory
+    and so depending on your system may need to be run with root.
+    """
+    import glob
+    if _sys.platform == 'win32':
+        return
+    package_cwd = _os.path.dirname(__file__)
+    if _sys.platform == 'linux':
+        bundled_libs = glob.glob(_os.path.join(package_cwd, '*.so.*'))
+        def get_symlink_path(hard_path):
+            return hard_path.rsplit('.', 1)[0]
+    else:
+        bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib'))
+        def get_symlink_path(hard_path):
+            return '.'.join((hard_path.rsplit('.', 2)[0], 'dylib'))
+    for lib_hard_path in bundled_libs:
+        symlink_path = get_symlink_path(lib_hard_path)
+        if _os.path.exists(symlink_path):
+            continue
+        try:
+            _os.symlink(lib_hard_path, symlink_path)
+        except PermissionError:
+            print("Tried creating symlink {}. If you need to link to "
+                  "bundled shared libraries, run "
+                  "pyarrow.create_library_symlinks() as root")
+def get_library_dirs():
+    """
+    Return lists of directories likely to contain Arrow C++ libraries for
+    linking C or Cython extensions using pyarrow
+    """
+    package_cwd = _os.path.dirname(__file__)
+    library_dirs = [package_cwd]
+    def append_library_dir(library_dir):
+        if library_dir not in library_dirs:
+            library_dirs.append(library_dir)
+    # Search library paths via pkg-config. This is necessary if the user
+    # installed libarrow and the other shared libraries manually and they
+    # are not shipped inside the pyarrow package (see also ARROW-2976).
+    pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
+    for pkgname in ["arrow", "arrow_python"]:
+        if _has_pkg_config(pkgname):
+            library_dir = _read_pkg_config_variable(pkgname,
+                                                    ["--libs-only-L"])
+            # pkg-config output could be empty if Arrow is installed
+            # as a system package.
+            if library_dir:
+                if not library_dir.startswith("-L"):
+                    raise ValueError(
+                        "pkg-config --libs-only-L returned unexpected "
+                        "value {!r}".format(library_dir))
+                append_library_dir(library_dir[2:])
+    if _sys.platform == 'win32':
+        # TODO(wesm): Is this necessary, or does setuptools within a conda
+        # installation add Library\lib to the linker path for MSVC?
+        python_base_install = _os.path.dirname(_sys.executable)
+        library_dir = _os.path.join(python_base_install, 'Library', 'lib')
+        if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
+            append_library_dir(library_dir)
+    # ARROW-4074: Allow for ARROW_HOME to be set to some other directory
+    if _os.environ.get('ARROW_HOME'):
+        append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
+    else:
+        # Python wheels bundle the Arrow libraries in the pyarrow directory.
+        append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))
+    return library_dirs

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_azurefs.pyx ADDED Viewed

	@@ -0,0 +1,134 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# cython: language_level = 3
+from cython cimport binding
+from pyarrow.lib import frombytes, tobytes
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow._fs cimport FileSystem
+cdef class AzureFileSystem(FileSystem):
+    """
+    Azure Blob Storage backed FileSystem implementation
+    This implementation supports flat namespace and hierarchical namespace (HNS) a.k.a.
+    Data Lake Gen2 storage accounts. HNS will be automatically detected and HNS specific
+    features will be used when they provide a performance advantage. Azurite emulator is
+    also supported. Note: `/` is the only supported delimiter.
+    The storage account is considered the root of the filesystem. When enabled, containers
+    will be created or deleted during relevant directory operations. Obviously, this also
+    requires authentication with the additional permissions.
+    By default `DefaultAzureCredential <https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
+    is used for authentication. This means it will try several types of authentication
+    and go with the first one that works. If any authentication parameters are provided when
+    initialising the FileSystem, they will be used instead of the default credential.
+    Parameters
+    ----------
+    account_name : str
+        Azure Blob Storage account name. This is the globally unique identifier for the
+        storage account.
+    account_key : str, default None
+        Account key of the storage account. Pass None to use default credential.
+    blob_storage_authority : str, default None
+        hostname[:port] of the Blob Service. Defaults to `.blob.core.windows.net`. Useful
+        for connecting to a local emulator, like Azurite.
+    dfs_storage_authority : str, default None
+        hostname[:port] of the Data Lake Gen 2 Service. Defaults to
+        `.dfs.core.windows.net`. Useful for connecting to a local emulator, like Azurite.
+    blob_storage_scheme : str, default None
+        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
+        emulator, like Azurite.
+    dfs_storage_scheme : str, default None
+        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
+        emulator, like Azurite.
+    Examples
+    --------
+    >>> from pyarrow import fs
+    >>> azure_fs = fs.AzureFileSystem(account_name='myaccount')
+    >>> azurite_fs = fs.AzureFileSystem(
+    ...     account_name='devstoreaccount1',
+    ...     account_key='Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
+    ...     blob_storage_authority='127.0.0.1:10000',
+    ...     dfs_storage_authority='127.0.0.1:10000',
+    ...     blob_storage_scheme='http',
+    ...     dfs_storage_scheme='http',
+    ... )
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+    cdef:
+        CAzureFileSystem* azurefs
+        c_string account_key
+    def __init__(self, account_name, *, account_key=None, blob_storage_authority=None,
+                 dfs_storage_authority=None, blob_storage_scheme=None,
+                 dfs_storage_scheme=None):
+        cdef:
+            CAzureOptions options
+            shared_ptr[CAzureFileSystem] wrapped
+        options.account_name = tobytes(account_name)
+        if blob_storage_authority:
+            options.blob_storage_authority = tobytes(blob_storage_authority)
+        if dfs_storage_authority:
+            options.dfs_storage_authority = tobytes(dfs_storage_authority)
+        if blob_storage_scheme:
+            options.blob_storage_scheme = tobytes(blob_storage_scheme)
+        if dfs_storage_scheme:
+            options.dfs_storage_scheme = tobytes(dfs_storage_scheme)
+        if account_key:
+            options.ConfigureAccountKeyCredential(tobytes(account_key))
+            self.account_key = tobytes(account_key)
+        else:
+            options.ConfigureDefaultCredential()
+        with nogil:
+            wrapped = GetResultValue(CAzureFileSystem.Make(options))
+        self.init(<shared_ptr[CFileSystem]> wrapped)
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped):
+        FileSystem.init(self, wrapped)
+        self.azurefs = <CAzureFileSystem*> wrapped.get()
+    @staticmethod
+    @binding(True)  # Required for cython < 3
+    def _reconstruct(kwargs):
+        # __reduce__ doesn't allow passing named arguments directly to the
+        # reconstructor, hence this wrapper.
+        return AzureFileSystem(**kwargs)
+    def __reduce__(self):
+        cdef CAzureOptions opts = self.azurefs.options()
+        return (
+            AzureFileSystem._reconstruct, (dict(
+                account_name=frombytes(opts.account_name),
+                account_key=frombytes(self.account_key),
+                blob_storage_authority=frombytes(opts.blob_storage_authority),
+                dfs_storage_authority=frombytes(opts.dfs_storage_authority),
+                blob_storage_scheme=frombytes(opts.blob_storage_scheme),
+                dfs_storage_scheme=frombytes(opts.dfs_storage_scheme)
+            ),))

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute.pyx ADDED Viewed

The diff for this file is too large to render. See raw diff

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_compute_docstrings.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Custom documentation additions for compute functions.
+"""
+function_doc_additions = {}
+function_doc_additions["filter"] = """
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> arr = pa.array(["a", "b", "c", None, "e"])
+    >>> mask = pa.array([True, False, None, False, True])
+    >>> arr.filter(mask)
+    <pyarrow.lib.StringArray object at ...>
+    [
+      "a",
+      "e"
+    ]
+    >>> arr.filter(mask, null_selection_behavior='emit_null')
+    <pyarrow.lib.StringArray object at ...>
+    [
+      "a",
+      null,
+      "e"
+    ]
+    """
+function_doc_additions["mode"] = """
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
+    >>> modes = pc.mode(arr, 2)
+    >>> modes[0]
+    <pyarrow.StructScalar: [('mode', 2), ('count', 5)]>
+    >>> modes[1]
+    <pyarrow.StructScalar: [('mode', 1), ('count', 2)]>
+    """

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_cuda.pyx ADDED Viewed

	@@ -0,0 +1,1080 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from pyarrow.lib cimport *
+from pyarrow.includes.libarrow_cuda cimport *
+from pyarrow.lib import allocate_buffer, as_buffer, ArrowTypeError
+from pyarrow.util import get_contiguous_span
+cimport cpython as cp
+cdef class Context(_Weakrefable):
+    """
+    CUDA driver context.
+    """
+    def __init__(self, *args, **kwargs):
+        """
+        Create a CUDA driver context for a particular device.
+        If a CUDA context handle is passed, it is wrapped, otherwise
+        a default CUDA context for the given device is requested.
+        Parameters
+        ----------
+        device_number : int (default 0)
+          Specify the GPU device for which the CUDA driver context is
+          requested.
+        handle : int, optional
+          Specify CUDA handle for a shared context that has been created
+          by another library.
+        """
+        # This method exposed because autodoc doesn't pick __cinit__
+    def __cinit__(self, int device_number=0, uintptr_t handle=0):
+        cdef CCudaDeviceManager* manager
+        manager = GetResultValue(CCudaDeviceManager.Instance())
+        cdef int n = manager.num_devices()
+        if device_number >= n or device_number < 0:
+            self.context.reset()
+            raise ValueError('device_number argument must be '
+                             'non-negative less than %s' % (n))
+        if handle == 0:
+            self.context = GetResultValue(manager.GetContext(device_number))
+        else:
+            self.context = GetResultValue(manager.GetSharedContext(
+                device_number, <void*>handle))
+        self.device_number = device_number
+    @staticmethod
+    def from_numba(context=None):
+        """
+        Create a Context instance from a Numba CUDA context.
+        Parameters
+        ----------
+        context : {numba.cuda.cudadrv.driver.Context, None}
+          A Numba CUDA context instance.
+          If None, the current Numba context is used.
+        Returns
+        -------
+        shared_context : pyarrow.cuda.Context
+          Context instance.
+        """
+        if context is None:
+            import numba.cuda
+            context = numba.cuda.current_context()
+        return Context(device_number=context.device.id,
+                       handle=context.handle.value)
+    def to_numba(self):
+        """
+        Convert Context to a Numba CUDA context.
+        Returns
+        -------
+        context : numba.cuda.cudadrv.driver.Context
+          Numba CUDA context instance.
+        """
+        import ctypes
+        import numba.cuda
+        device = numba.cuda.gpus[self.device_number]
+        handle = ctypes.c_void_p(self.handle)
+        context = numba.cuda.cudadrv.driver.Context(device, handle)
+        class DummyPendingDeallocs(object):
+            # Context is managed by pyarrow
+            def add_item(self, *args, **kwargs):
+                pass
+        context.deallocations = DummyPendingDeallocs()
+        return context
+    @staticmethod
+    def get_num_devices():
+        """ Return the number of GPU devices.
+        """
+        cdef CCudaDeviceManager* manager
+        manager = GetResultValue(CCudaDeviceManager.Instance())
+        return manager.num_devices()
+    @property
+    def device_number(self):
+        """ Return context device number.
+        """
+        return self.device_number
+    @property
+    def handle(self):
+        """ Return pointer to context handle.
+        """
+        return <uintptr_t>self.context.get().handle()
+    cdef void init(self, const shared_ptr[CCudaContext]& ctx):
+        self.context = ctx
+    def synchronize(self):
+        """Blocks until the device has completed all preceding requested
+        tasks.
+        """
+        check_status(self.context.get().Synchronize())
+    @property
+    def bytes_allocated(self):
+        """Return the number of allocated bytes.
+        """
+        return self.context.get().bytes_allocated()
+    def get_device_address(self, uintptr_t address):
+        """Return the device address that is reachable from kernels running in
+        the context
+        Parameters
+        ----------
+        address : int
+          Specify memory address value
+        Returns
+        -------
+        device_address : int
+          Device address accessible from device context
+        Notes
+        -----
+        The device address is defined as a memory address accessible
+        by device. While it is often a device memory address but it
+        can be also a host memory address, for instance, when the
+        memory is allocated as host memory (using cudaMallocHost or
+        cudaHostAlloc) or as managed memory (using cudaMallocManaged)
+        or the host memory is page-locked (using cudaHostRegister).
+        """
+        return GetResultValue(self.context.get().GetDeviceAddress(address))
+    def new_buffer(self, int64_t nbytes):
+        """Return new device buffer.
+        Parameters
+        ----------
+        nbytes : int
+          Specify the number of bytes to be allocated.
+        Returns
+        -------
+        buf : CudaBuffer
+          Allocated buffer.
+        """
+        cdef:
+            shared_ptr[CCudaBuffer] cudabuf
+        with nogil:
+            cudabuf = GetResultValue(self.context.get().Allocate(nbytes))
+        return pyarrow_wrap_cudabuffer(cudabuf)
+    @property
+    def memory_manager(self):
+        """
+        The default memory manager tied to this context's device.
+        Returns
+        -------
+        MemoryManager
+        """
+        return MemoryManager.wrap(self.context.get().memory_manager())
+    @property
+    def device(self):
+        """
+        The device instance associated with this context.
+        Returns
+        -------
+        Device
+        """
+        return Device.wrap(self.context.get().device())
+    def foreign_buffer(self, address, size, base=None):
+        """
+        Create device buffer from address and size as a view.
+        The caller is responsible for allocating and freeing the
+        memory. When `address==size==0` then a new zero-sized buffer
+        is returned.
+        Parameters
+        ----------
+        address : int
+          Specify the starting address of the buffer. The address can
+          refer to both device or host memory but it must be
+          accessible from device after mapping it with
+          `get_device_address` method.
+        size : int
+          Specify the size of device buffer in bytes.
+        base : {None, object}
+          Specify object that owns the referenced memory.
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device reachable memory.
+        """
+        if not address and size == 0:
+            return self.new_buffer(0)
+        cdef:
+            uintptr_t c_addr = self.get_device_address(address)
+            int64_t c_size = size
+            shared_ptr[CCudaBuffer] cudabuf
+        cudabuf = GetResultValue(self.context.get().View(
+            <uint8_t*>c_addr, c_size))
+        return pyarrow_wrap_cudabuffer_base(cudabuf, base)
+    def open_ipc_buffer(self, ipc_handle):
+        """ Open existing CUDA IPC memory handle
+        Parameters
+        ----------
+        ipc_handle : IpcMemHandle
+          Specify opaque pointer to CUipcMemHandle (driver API).
+        Returns
+        -------
+        buf : CudaBuffer
+          referencing device buffer
+        """
+        handle = pyarrow_unwrap_cudaipcmemhandle(ipc_handle)
+        cdef shared_ptr[CCudaBuffer] cudabuf
+        with nogil:
+            cudabuf = GetResultValue(
+                self.context.get().OpenIpcBuffer(handle.get()[0]))
+        return pyarrow_wrap_cudabuffer(cudabuf)
+    def buffer_from_data(self, object data, int64_t offset=0, int64_t size=-1):
+        """Create device buffer and initialize with data.
+        Parameters
+        ----------
+        data : {CudaBuffer, HostBuffer, Buffer, array-like}
+          Specify data to be copied to device buffer.
+        offset : int
+          Specify the offset of input buffer for device data
+          buffering. Default: 0.
+        size : int
+          Specify the size of device buffer in bytes. Default: all
+          (starting from input offset)
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer with copied data.
+        """
+        is_host_data = not pyarrow_is_cudabuffer(data)
+        buf = as_buffer(data) if is_host_data else data
+        bsize = buf.size
+        if offset < 0 or (bsize and offset >= bsize):
+            raise ValueError('offset argument is out-of-range')
+        if size < 0:
+            size = bsize - offset
+        elif offset + size > bsize:
+            raise ValueError(
+                'requested larger slice than available in device buffer')
+        if offset != 0 or size != bsize:
+            buf = buf.slice(offset, size)
+        result = self.new_buffer(size)
+        if is_host_data:
+            result.copy_from_host(buf, position=0, nbytes=size)
+        else:
+            result.copy_from_device(buf, position=0, nbytes=size)
+        return result
+    def buffer_from_object(self, obj):
+        """Create device buffer view of arbitrary object that references
+        device accessible memory.
+        When the object contains a non-contiguous view of device
+        accessible memory then the returned device buffer will contain
+        contiguous view of the memory, that is, including the
+        intermediate data that is otherwise invisible to the input
+        object.
+        Parameters
+        ----------
+        obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
+          Specify an object that holds (device or host) address that
+          can be accessed from device. This includes objects with
+          types defined in pyarrow.cuda as well as arbitrary objects
+          that implement the CUDA array interface as defined by numba.
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device accessible memory.
+        """
+        if isinstance(obj, HostBuffer):
+            return self.foreign_buffer(obj.address, obj.size, base=obj)
+        elif isinstance(obj, Buffer):
+            return CudaBuffer.from_buffer(obj)
+        elif isinstance(obj, CudaBuffer):
+            return obj
+        elif hasattr(obj, '__cuda_array_interface__'):
+            desc = obj.__cuda_array_interface__
+            addr = desc['data'][0]
+            if addr is None:
+                return self.new_buffer(0)
+            import numpy as np
+            start, end = get_contiguous_span(
+                desc['shape'], desc.get('strides'),
+                np.dtype(desc['typestr']).itemsize)
+            return self.foreign_buffer(addr + start, end - start, base=obj)
+        raise ArrowTypeError('cannot create device buffer view from'
+                             ' `%s` object' % (type(obj)))
+cdef class IpcMemHandle(_Weakrefable):
+    """A serializable container for a CUDA IPC handle.
+    """
+    cdef void init(self, shared_ptr[CCudaIpcMemHandle]& h):
+        self.handle = h
+    @staticmethod
+    def from_buffer(Buffer opaque_handle):
+        """Create IpcMemHandle from opaque buffer (e.g. from another
+        process)
+        Parameters
+        ----------
+        opaque_handle :
+          a CUipcMemHandle as a const void*
+        Returns
+        -------
+        ipc_handle : IpcMemHandle
+        """
+        c_buf = pyarrow_unwrap_buffer(opaque_handle)
+        cdef:
+            shared_ptr[CCudaIpcMemHandle] handle
+        handle = GetResultValue(
+            CCudaIpcMemHandle.FromBuffer(c_buf.get().data()))
+        return pyarrow_wrap_cudaipcmemhandle(handle)
+    def serialize(self, pool=None):
+        """Write IpcMemHandle to a Buffer
+        Parameters
+        ----------
+        pool : {MemoryPool, None}
+          Specify a pool to allocate memory from
+        Returns
+        -------
+        buf : Buffer
+          The serialized buffer.
+        """
+        cdef CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
+        cdef shared_ptr[CBuffer] buf
+        cdef CCudaIpcMemHandle* h = self.handle.get()
+        with nogil:
+            buf = GetResultValue(h.Serialize(pool_))
+        return pyarrow_wrap_buffer(buf)
+cdef class CudaBuffer(Buffer):
+    """An Arrow buffer with data located in a GPU device.
+    To create a CudaBuffer instance, use Context.device_buffer().
+    The memory allocated in a CudaBuffer is freed when the buffer object
+    is deleted.
+    """
+    def __init__(self):
+        raise TypeError("Do not call CudaBuffer's constructor directly, use "
+                        "`<pyarrow.Context instance>.device_buffer`"
+                        " method instead.")
+    cdef void init_cuda(self,
+                        const shared_ptr[CCudaBuffer]& buffer,
+                        object base):
+        self.cuda_buffer = buffer
+        self.init(<shared_ptr[CBuffer]> buffer)
+        self.base = base
+    @staticmethod
+    def from_buffer(buf):
+        """ Convert back generic buffer into CudaBuffer
+        Parameters
+        ----------
+        buf : Buffer
+          Specify buffer containing CudaBuffer
+        Returns
+        -------
+        dbuf : CudaBuffer
+          Resulting device buffer.
+        """
+        c_buf = pyarrow_unwrap_buffer(buf)
+        cuda_buffer = GetResultValue(CCudaBuffer.FromBuffer(c_buf))
+        return pyarrow_wrap_cudabuffer(cuda_buffer)
+    @staticmethod
+    def from_numba(mem):
+        """Create a CudaBuffer view from numba MemoryPointer instance.
+        Parameters
+        ----------
+        mem :  numba.cuda.cudadrv.driver.MemoryPointer
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of numba MemoryPointer.
+        """
+        ctx = Context.from_numba(mem.context)
+        if mem.device_pointer.value is None and mem.size==0:
+            return ctx.new_buffer(0)
+        return ctx.foreign_buffer(mem.device_pointer.value, mem.size, base=mem)
+    def to_numba(self):
+        """Return numba memory pointer of CudaBuffer instance.
+        """
+        import ctypes
+        from numba.cuda.cudadrv.driver import MemoryPointer
+        return MemoryPointer(self.context.to_numba(),
+                             pointer=ctypes.c_void_p(self.address),
+                             size=self.size)
+    cdef getitem(self, int64_t i):
+        return self.copy_to_host(position=i, nbytes=1)[0]
+    def copy_to_host(self, int64_t position=0, int64_t nbytes=-1,
+                     Buffer buf=None,
+                     MemoryPool memory_pool=None, c_bool resizable=False):
+        """Copy memory from GPU device to CPU host
+        Caller is responsible for ensuring that all tasks affecting
+        the memory are finished. Use
+          `<CudaBuffer instance>.context.synchronize()`
+        when needed.
+        Parameters
+        ----------
+        position : int
+          Specify the starting position of the source data in GPU
+          device buffer. Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          the position until host buffer is full).
+        buf : Buffer
+          Specify a pre-allocated output buffer in host. Default: None
+          (allocate new output buffer).
+        memory_pool : MemoryPool
+        resizable : bool
+          Specify extra arguments to allocate_buffer. Used only when
+          buf is None.
+        Returns
+        -------
+        buf : Buffer
+          Output buffer in host.
+        """
+        if position < 0 or (self.size and position > self.size) \
+           or (self.size == 0 and position != 0):
+            raise ValueError('position argument is out-of-range')
+        cdef:
+            int64_t c_nbytes
+        if buf is None:
+            if nbytes < 0:
+                # copy all starting from position to new host buffer
+                c_nbytes = self.size - position
+            else:
+                if nbytes > self.size - position:
+                    raise ValueError(
+                        'requested more to copy than available from '
+                        'device buffer')
+                # copy nbytes starting from position to new host buffer
+                c_nbytes = nbytes
+            buf = allocate_buffer(c_nbytes, memory_pool=memory_pool,
+                                  resizable=resizable)
+        else:
+            if nbytes < 0:
+                # copy all from position until given host buffer is full
+                c_nbytes = min(self.size - position, buf.size)
+            else:
+                if nbytes > buf.size:
+                    raise ValueError(
+                        'requested copy does not fit into host buffer')
+                # copy nbytes from position to given host buffer
+                c_nbytes = nbytes
+        cdef:
+            shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
+            int64_t c_position = position
+        with nogil:
+            check_status(self.cuda_buffer.get()
+                         .CopyToHost(c_position, c_nbytes,
+                                     c_buf.get().mutable_data()))
+        return buf
+    def copy_from_host(self, data, int64_t position=0, int64_t nbytes=-1):
+        """Copy data from host to device.
+        The device buffer must be pre-allocated.
+        Parameters
+        ----------
+        data : {Buffer, array-like}
+          Specify data in host. It can be array-like that is valid
+          argument to py_buffer
+        position : int
+          Specify the starting position of the copy in device buffer.
+          Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          source until device buffer, starting from position, is full)
+        Returns
+        -------
+        nbytes : int
+          Number of bytes copied.
+        """
+        if position < 0 or position > self.size:
+            raise ValueError('position argument is out-of-range')
+        cdef:
+            int64_t c_nbytes
+        buf = as_buffer(data)
+        if nbytes < 0:
+            # copy from host buffer to device buffer starting from
+            # position until device buffer is full
+            c_nbytes = min(self.size - position, buf.size)
+        else:
+            if nbytes > buf.size:
+                raise ValueError(
+                    'requested more to copy than available from host buffer')
+            if nbytes > self.size - position:
+                raise ValueError(
+                    'requested more to copy than available in device buffer')
+            # copy nbytes from host buffer to device buffer starting
+            # from position
+            c_nbytes = nbytes
+        cdef:
+            shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
+            int64_t c_position = position
+        with nogil:
+            check_status(self.cuda_buffer.get().
+                         CopyFromHost(c_position, c_buf.get().data(),
+                                      c_nbytes))
+        return c_nbytes
+    def copy_from_device(self, buf, int64_t position=0, int64_t nbytes=-1):
+        """Copy data from device to device.
+        Parameters
+        ----------
+        buf : CudaBuffer
+          Specify source device buffer.
+        position : int
+          Specify the starting position of the copy in device buffer.
+          Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          source until device buffer, starting from position, is full)
+        Returns
+        -------
+        nbytes : int
+          Number of bytes copied.
+        """
+        if position < 0 or position > self.size:
+            raise ValueError('position argument is out-of-range')
+        cdef:
+            int64_t c_nbytes
+        if nbytes < 0:
+            # copy from source device buffer to device buffer starting
+            # from position until device buffer is full
+            c_nbytes = min(self.size - position, buf.size)
+        else:
+            if nbytes > buf.size:
+                raise ValueError(
+                    'requested more to copy than available from device buffer')
+            if nbytes > self.size - position:
+                raise ValueError(
+                    'requested more to copy than available in device buffer')
+            # copy nbytes from source device buffer to device buffer
+            # starting from position
+            c_nbytes = nbytes
+        cdef:
+            shared_ptr[CCudaBuffer] c_buf = pyarrow_unwrap_cudabuffer(buf)
+            int64_t c_position = position
+            shared_ptr[CCudaContext] c_src_ctx = pyarrow_unwrap_cudacontext(
+                buf.context)
+            void* c_source_data = <void*>(c_buf.get().address())
+        if self.context.handle != buf.context.handle:
+            with nogil:
+                check_status(self.cuda_buffer.get().
+                             CopyFromAnotherDevice(c_src_ctx, c_position,
+                                                   c_source_data, c_nbytes))
+        else:
+            with nogil:
+                check_status(self.cuda_buffer.get().
+                             CopyFromDevice(c_position, c_source_data,
+                                            c_nbytes))
+        return c_nbytes
+    def export_for_ipc(self):
+        """
+        Expose this device buffer as IPC memory which can be used in other
+        processes.
+        After calling this function, this device memory will not be
+        freed when the CudaBuffer is destructed.
+        Returns
+        -------
+        ipc_handle : IpcMemHandle
+          The exported IPC handle
+        """
+        cdef shared_ptr[CCudaIpcMemHandle] handle
+        with nogil:
+            handle = GetResultValue(self.cuda_buffer.get().ExportForIpc())
+        return pyarrow_wrap_cudaipcmemhandle(handle)
+    @property
+    def context(self):
+        """Returns the CUDA driver context of this buffer.
+        """
+        return pyarrow_wrap_cudacontext(self.cuda_buffer.get().context())
+    def slice(self, offset=0, length=None):
+        """Return slice of device buffer
+        Parameters
+        ----------
+        offset : int, default 0
+          Specify offset from the start of device buffer to slice
+        length : int, default None
+          Specify the length of slice (default is until end of device
+          buffer starting from offset). If the length is larger than
+          the data available, the returned slice will have a size of
+          the available data starting from the offset.
+        Returns
+        -------
+        sliced : CudaBuffer
+          Zero-copy slice of device buffer.
+        """
+        if offset < 0 or (self.size and offset >= self.size):
+            raise ValueError('offset argument is out-of-range')
+        cdef int64_t offset_ = offset
+        cdef int64_t size
+        if length is None:
+            size = self.size - offset_
+        elif offset + length <= self.size:
+            size = length
+        else:
+            size = self.size - offset
+        parent = pyarrow_unwrap_cudabuffer(self)
+        return pyarrow_wrap_cudabuffer(make_shared[CCudaBuffer](parent,
+                                                                offset_, size))
+    def to_pybytes(self):
+        """Return device buffer content as Python bytes.
+        """
+        return self.copy_to_host().to_pybytes()
+    def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
+        # Device buffer contains data pointers on the device. Hence,
+        # cannot support buffer protocol PEP-3118 for CudaBuffer.
+        raise BufferError('buffer protocol for device buffer not supported')
+cdef class HostBuffer(Buffer):
+    """Device-accessible CPU memory created using cudaHostAlloc.
+    To create a HostBuffer instance, use
+      cuda.new_host_buffer(<nbytes>)
+    """
+    def __init__(self):
+        raise TypeError("Do not call HostBuffer's constructor directly,"
+                        " use `cuda.new_host_buffer` function instead.")
+    cdef void init_host(self, const shared_ptr[CCudaHostBuffer]& buffer):
+        self.host_buffer = buffer
+        self.init(<shared_ptr[CBuffer]> buffer)
+    @property
+    def size(self):
+        return self.host_buffer.get().size()
+cdef class BufferReader(NativeFile):
+    """File interface for zero-copy read from CUDA buffers.
+    Note: Read methods return pointers to device memory. This means
+    you must be careful using this interface with any Arrow code which
+    may expect to be able to do anything other than pointer arithmetic
+    on the returned buffers.
+    """
+    def __cinit__(self, CudaBuffer obj):
+        self.buffer = obj
+        self.reader = new CCudaBufferReader(self.buffer.buffer)
+        self.set_random_access_file(
+            shared_ptr[CRandomAccessFile](self.reader))
+        self.is_readable = True
+    def read_buffer(self, nbytes=None):
+        """Return a slice view of the underlying device buffer.
+        The slice will start at the current reader position and will
+        have specified size in bytes.
+        Parameters
+        ----------
+        nbytes : int, default None
+          Specify the number of bytes to read. Default: None (read all
+          remaining bytes).
+        Returns
+        -------
+        cbuf : CudaBuffer
+          New device buffer.
+        """
+        cdef:
+            int64_t c_nbytes
+            shared_ptr[CCudaBuffer] output
+        if nbytes is None:
+            c_nbytes = self.size() - self.tell()
+        else:
+            c_nbytes = nbytes
+        with nogil:
+            output = static_pointer_cast[CCudaBuffer, CBuffer](
+                GetResultValue(self.reader.Read(c_nbytes)))
+        return pyarrow_wrap_cudabuffer(output)
+cdef class BufferWriter(NativeFile):
+    """File interface for writing to CUDA buffers.
+    By default writes are unbuffered. Use set_buffer_size to enable
+    buffering.
+    """
+    def __cinit__(self, CudaBuffer buffer):
+        self.buffer = buffer
+        self.writer = new CCudaBufferWriter(self.buffer.cuda_buffer)
+        self.set_output_stream(shared_ptr[COutputStream](self.writer))
+        self.is_writable = True
+    def writeat(self, int64_t position, object data):
+        """Write data to buffer starting from position.
+        Parameters
+        ----------
+        position : int
+          Specify device buffer position where the data will be
+          written.
+        data : array-like
+          Specify data, the data instance must implement buffer
+          protocol.
+        """
+        cdef:
+            Buffer buf = as_buffer(data)
+            const uint8_t* c_data = buf.buffer.get().data()
+            int64_t c_size = buf.buffer.get().size()
+        with nogil:
+            check_status(self.writer.WriteAt(position, c_data, c_size))
+    def flush(self):
+        """ Flush the buffer stream """
+        with nogil:
+            check_status(self.writer.Flush())
+    def seek(self, int64_t position, int whence=0):
+        # TODO: remove this method after NativeFile.seek supports
+        # writable files.
+        cdef int64_t offset
+        with nogil:
+            if whence == 0:
+                offset = position
+            elif whence == 1:
+                offset = GetResultValue(self.writer.Tell())
+                offset = offset + position
+            else:
+                with gil:
+                    raise ValueError("Invalid value of whence: {0}"
+                                     .format(whence))
+            check_status(self.writer.Seek(offset))
+        return self.tell()
+    @property
+    def buffer_size(self):
+        """Returns size of host (CPU) buffer, 0 for unbuffered
+        """
+        return self.writer.buffer_size()
+    @buffer_size.setter
+    def buffer_size(self, int64_t buffer_size):
+        """Set CPU buffer size to limit calls to cudaMemcpy
+        Parameters
+        ----------
+        buffer_size : int
+          Specify the size of CPU buffer to allocate in bytes.
+        """
+        with nogil:
+            check_status(self.writer.SetBufferSize(buffer_size))
+    @property
+    def num_bytes_buffered(self):
+        """Returns number of bytes buffered on host
+        """
+        return self.writer.num_bytes_buffered()
+# Functions
+def new_host_buffer(const int64_t size, int device=0):
+    """Return buffer with CUDA-accessible memory on CPU host
+    Parameters
+    ----------
+    size : int
+      Specify the number of bytes to be allocated.
+    device : int
+      Specify GPU device number.
+    Returns
+    -------
+    dbuf : HostBuffer
+      Allocated host buffer
+    """
+    cdef shared_ptr[CCudaHostBuffer] buffer
+    with nogil:
+        buffer = GetResultValue(AllocateCudaHostBuffer(device, size))
+    return pyarrow_wrap_cudahostbuffer(buffer)
+def serialize_record_batch(object batch, object ctx):
+    """ Write record batch message to GPU device memory
+    Parameters
+    ----------
+    batch : RecordBatch
+      Record batch to write
+    ctx : Context
+      CUDA Context to allocate device memory from
+    Returns
+    -------
+    dbuf : CudaBuffer
+      device buffer which contains the record batch message
+    """
+    cdef shared_ptr[CCudaBuffer] buffer
+    cdef CRecordBatch* batch_ = pyarrow_unwrap_batch(batch).get()
+    cdef CCudaContext* ctx_ = pyarrow_unwrap_cudacontext(ctx).get()
+    with nogil:
+        buffer = GetResultValue(CudaSerializeRecordBatch(batch_[0], ctx_))
+    return pyarrow_wrap_cudabuffer(buffer)
+def read_message(object source, pool=None):
+    """ Read Arrow IPC message located on GPU device
+    Parameters
+    ----------
+    source : {CudaBuffer, cuda.BufferReader}
+      Device buffer or reader of device buffer.
+    pool : MemoryPool (optional)
+      Pool to allocate CPU memory for the metadata
+    Returns
+    -------
+    message : Message
+      The deserialized message, body still on device
+    """
+    cdef:
+        Message result = Message.__new__(Message)
+    cdef CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
+    if not isinstance(source, BufferReader):
+        reader = BufferReader(source)
+    with nogil:
+        result.message = move(
+            GetResultValue(ReadMessage(reader.reader, pool_)))
+    return result
+def read_record_batch(object buffer, object schema, *,
+                      DictionaryMemo dictionary_memo=None, pool=None):
+    """Construct RecordBatch referencing IPC message located on CUDA device.
+    While the metadata is copied to host memory for deserialization,
+    the record batch data remains on the device.
+    Parameters
+    ----------
+    buffer :
+      Device buffer containing the complete IPC message
+    schema : Schema
+      The schema for the record batch
+    dictionary_memo : DictionaryMemo, optional
+        If message contains dictionaries, must pass a populated
+        DictionaryMemo
+    pool : MemoryPool (optional)
+      Pool to allocate metadata from
+    Returns
+    -------
+    batch : RecordBatch
+      Reconstructed record batch, with device pointers
+    """
+    cdef:
+        shared_ptr[CSchema] schema_ = pyarrow_unwrap_schema(schema)
+        shared_ptr[CCudaBuffer] buffer_ = pyarrow_unwrap_cudabuffer(buffer)
+        CDictionaryMemo temp_memo
+        CDictionaryMemo* arg_dict_memo
+        CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
+        shared_ptr[CRecordBatch] batch
+    if dictionary_memo is not None:
+        arg_dict_memo = dictionary_memo.memo
+    else:
+        arg_dict_memo = &temp_memo
+    with nogil:
+        batch = GetResultValue(CudaReadRecordBatch(
+            schema_, arg_dict_memo, buffer_, pool_))
+    return pyarrow_wrap_batch(batch)
+# Public API
+cdef public api bint pyarrow_is_buffer(object buffer):
+    return isinstance(buffer, Buffer)
+# cudabuffer
+cdef public api bint pyarrow_is_cudabuffer(object buffer):
+    return isinstance(buffer, CudaBuffer)
+cdef public api object \
+        pyarrow_wrap_cudabuffer_base(const shared_ptr[CCudaBuffer]& buf, base):
+    cdef CudaBuffer result = CudaBuffer.__new__(CudaBuffer)
+    result.init_cuda(buf, base)
+    return result
+cdef public api object \
+        pyarrow_wrap_cudabuffer(const shared_ptr[CCudaBuffer]& buf):
+    cdef CudaBuffer result = CudaBuffer.__new__(CudaBuffer)
+    result.init_cuda(buf, None)
+    return result
+cdef public api shared_ptr[CCudaBuffer] pyarrow_unwrap_cudabuffer(object obj):
+    if pyarrow_is_cudabuffer(obj):
+        return (<CudaBuffer>obj).cuda_buffer
+    raise TypeError('expected CudaBuffer instance, got %s'
+                    % (type(obj).__name__))
+# cudahostbuffer
+cdef public api bint pyarrow_is_cudahostbuffer(object buffer):
+    return isinstance(buffer, HostBuffer)
+cdef public api object \
+        pyarrow_wrap_cudahostbuffer(const shared_ptr[CCudaHostBuffer]& buf):
+    cdef HostBuffer result = HostBuffer.__new__(HostBuffer)
+    result.init_host(buf)
+    return result
+cdef public api shared_ptr[CCudaHostBuffer] \
+        pyarrow_unwrap_cudahostbuffer(object obj):
+    if pyarrow_is_cudahostbuffer(obj):
+        return (<HostBuffer>obj).host_buffer
+    raise TypeError('expected HostBuffer instance, got %s'
+                    % (type(obj).__name__))
+# cudacontext
+cdef public api bint pyarrow_is_cudacontext(object ctx):
+    return isinstance(ctx, Context)
+cdef public api object \
+        pyarrow_wrap_cudacontext(const shared_ptr[CCudaContext]& ctx):
+    cdef Context result = Context.__new__(Context)
+    result.init(ctx)
+    return result
+cdef public api shared_ptr[CCudaContext] \
+        pyarrow_unwrap_cudacontext(object obj):
+    if pyarrow_is_cudacontext(obj):
+        return (<Context>obj).context
+    raise TypeError('expected Context instance, got %s'
+                    % (type(obj).__name__))
+# cudaipcmemhandle
+cdef public api bint pyarrow_is_cudaipcmemhandle(object handle):
+    return isinstance(handle, IpcMemHandle)
+cdef public api object \
+        pyarrow_wrap_cudaipcmemhandle(shared_ptr[CCudaIpcMemHandle]& h):
+    cdef IpcMemHandle result = IpcMemHandle.__new__(IpcMemHandle)
+    result.init(h)
+    return result
+cdef public api shared_ptr[CCudaIpcMemHandle] \
+        pyarrow_unwrap_cudaipcmemhandle(object obj):
+    if pyarrow_is_cudaipcmemhandle(obj):
+        return (<IpcMemHandle>obj).handle
+    raise TypeError('expected IpcMemHandle instance, got %s'
+                    % (type(obj).__name__))

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_dataset_parquet.pyx ADDED Viewed

	@@ -0,0 +1,1053 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# cython: language_level = 3
+"""Dataset support for Parquet file format."""
+from cython cimport binding
+from cython.operator cimport dereference as deref
+import os
+import warnings
+import pyarrow as pa
+from pyarrow.lib cimport *
+from pyarrow.lib import frombytes, tobytes, is_threading_enabled
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_dataset cimport *
+from pyarrow.includes.libarrow_dataset_parquet cimport *
+from pyarrow._fs cimport FileSystem
+from pyarrow._compute cimport Expression, _bind
+from pyarrow._dataset cimport (
+    _make_file_source,
+    DatasetFactory,
+    FileFormat,
+    FileFragment,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    CacheOptions,
+    Partitioning,
+    PartitioningFactory,
+    WrittenFile
+)
+from pyarrow._parquet cimport (
+    _create_writer_properties, _create_arrow_writer_properties,
+    FileMetaData,
+)
+try:
+    from pyarrow._dataset_parquet_encryption import (
+        set_encryption_config, set_decryption_config, set_decryption_properties
+    )
+    parquet_encryption_enabled = True
+except ImportError:
+    parquet_encryption_enabled = False
+cdef Expression _true = Expression._scalar(True)
+ctypedef CParquetFileWriter* _CParquetFileWriterPtr
+cdef class ParquetFileFormat(FileFormat):
+    """
+    FileFormat for Parquet
+    Parameters
+    ----------
+    read_options : ParquetReadOptions
+        Read options for the file.
+    default_fragment_scan_options : ParquetFragmentScanOptions
+        Scan Options for the file.
+    **kwargs : dict
+        Additional options for read option or scan option
+    """
+    cdef:
+        CParquetFileFormat* parquet_format
+    def __init__(self, read_options=None,
+                 default_fragment_scan_options=None,
+                 **kwargs):
+        cdef:
+            shared_ptr[CParquetFileFormat] wrapped
+            CParquetFileFormatReaderOptions* options
+        # Read/scan options
+        read_options_args = {option: kwargs[option] for option in kwargs
+                             if option in _PARQUET_READ_OPTIONS}
+        scan_args = {option: kwargs[option] for option in kwargs
+                     if option not in _PARQUET_READ_OPTIONS}
+        if read_options and read_options_args:
+            duplicates = ', '.join(sorted(read_options_args))
+            raise ValueError(f'If `read_options` is given, '
+                             f'cannot specify {duplicates}')
+        if default_fragment_scan_options and scan_args:
+            duplicates = ', '.join(sorted(scan_args))
+            raise ValueError(f'If `default_fragment_scan_options` is given, '
+                             f'cannot specify {duplicates}')
+        if read_options is None:
+            read_options = ParquetReadOptions(**read_options_args)
+        elif isinstance(read_options, dict):
+            # For backwards compatibility
+            duplicates = []
+            for option, value in read_options.items():
+                if option in _PARQUET_READ_OPTIONS:
+                    read_options_args[option] = value
+                else:
+                    duplicates.append(option)
+                    scan_args[option] = value
+            if duplicates:
+                duplicates = ", ".join(duplicates)
+                warnings.warn(f'The scan options {duplicates} should be '
+                              'specified directly as keyword arguments')
+            read_options = ParquetReadOptions(**read_options_args)
+        elif not isinstance(read_options, ParquetReadOptions):
+            raise TypeError('`read_options` must be either a dictionary or an '
+                            'instance of ParquetReadOptions')
+        if default_fragment_scan_options is None:
+            default_fragment_scan_options = ParquetFragmentScanOptions(**scan_args)
+        elif isinstance(default_fragment_scan_options, dict):
+            default_fragment_scan_options = ParquetFragmentScanOptions(
+                **default_fragment_scan_options)
+        elif not isinstance(default_fragment_scan_options,
+                            ParquetFragmentScanOptions):
+            raise TypeError('`default_fragment_scan_options` must be either a '
+                            'dictionary or an instance of '
+                            'ParquetFragmentScanOptions')
+        wrapped = make_shared[CParquetFileFormat]()
+        options = &(wrapped.get().reader_options)
+        if read_options.dictionary_columns is not None:
+            for column in read_options.dictionary_columns:
+                options.dict_columns.insert(tobytes(column))
+        options.coerce_int96_timestamp_unit = \
+            read_options._coerce_int96_timestamp_unit
+        self.init(<shared_ptr[CFileFormat]> wrapped)
+        self.default_fragment_scan_options = default_fragment_scan_options
+    cdef void init(self, const shared_ptr[CFileFormat]& sp):
+        FileFormat.init(self, sp)
+        self.parquet_format = <CParquetFileFormat*> sp.get()
+    cdef WrittenFile _finish_write(self, path, base_dir,
+                                   CFileWriter* file_writer):
+        cdef:
+            FileMetaData parquet_metadata
+            CParquetFileWriter* parquet_file_writer
+        parquet_metadata = None
+        parquet_file_writer = dynamic_cast[_CParquetFileWriterPtr](file_writer)
+        with nogil:
+            metadata = deref(
+                deref(parquet_file_writer).parquet_writer()).metadata()
+        if metadata:
+            parquet_metadata = FileMetaData()
+            parquet_metadata.init(metadata)
+            parquet_metadata.set_file_path(os.path.relpath(path, base_dir))
+        size = GetResultValue(file_writer.GetBytesWritten())
+        return WrittenFile(path, parquet_metadata, size)
+    @property
+    def read_options(self):
+        cdef CParquetFileFormatReaderOptions* options
+        options = &self.parquet_format.reader_options
+        parquet_read_options = ParquetReadOptions(
+            dictionary_columns={frombytes(col)
+                                for col in options.dict_columns},
+        )
+        # Read options getter/setter works with strings so setting
+        # the private property which uses the C Type
+        parquet_read_options._coerce_int96_timestamp_unit = \
+            options.coerce_int96_timestamp_unit
+        return parquet_read_options
+    def make_write_options(self, **kwargs):
+        """
+        Parameters
+        ----------
+        **kwargs : dict
+        Returns
+        -------
+        pyarrow.dataset.FileWriteOptions
+        """
+        # Safeguard from calling make_write_options as a static class method
+        if not isinstance(self, ParquetFileFormat):
+            raise TypeError("make_write_options() should be called on "
+                            "an instance of ParquetFileFormat")
+        opts = FileFormat.make_write_options(self)
+        (<ParquetFileWriteOptions> opts).update(**kwargs)
+        return opts
+    cdef _set_default_fragment_scan_options(self, FragmentScanOptions options):
+        if options.type_name == 'parquet':
+            self.parquet_format.default_fragment_scan_options = options.wrapped
+        else:
+            super()._set_default_fragment_scan_options(options)
+    def equals(self, ParquetFileFormat other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.ParquetFileFormat
+        Returns
+        -------
+        bool
+        """
+        return (
+            self.read_options.equals(other.read_options) and
+            self.default_fragment_scan_options ==
+            other.default_fragment_scan_options
+        )
+    @property
+    def default_extname(self):
+        return "parquet"
+    def __reduce__(self):
+        return ParquetFileFormat, (self.read_options,
+                                   self.default_fragment_scan_options)
+    def __repr__(self):
+        return f"<ParquetFileFormat read_options={self.read_options}>"
+    def make_fragment(self, file, filesystem=None,
+                      Expression partition_expression=None, row_groups=None, *, file_size=None):
+        """
+        Make a FileFragment from a given file.
+        Parameters
+        ----------
+        file : file-like object, path-like or str
+            The file or file path to make a fragment from.
+        filesystem : Filesystem, optional
+            If `filesystem` is given, `file` must be a string and specifies
+            the path of the file to read from the filesystem.
+        partition_expression : Expression, optional
+            An expression that is guaranteed true for all rows in the fragment.  Allows
+            fragment to be potentially skipped while scanning with a filter.
+        row_groups : Iterable, optional
+            The indices of the row groups to include
+        file_size : int, optional
+            The size of the file in bytes. Can improve performance with high-latency filesystems
+            when file size needs to be known before reading.
+        Returns
+        -------
+        fragment : Fragment
+            The file fragment
+        """
+        cdef:
+            vector[int] c_row_groups
+        if partition_expression is None:
+            partition_expression = _true
+        if row_groups is None:
+            return super().make_fragment(file, filesystem,
+                                         partition_expression, file_size=file_size)
+        c_source = _make_file_source(file, filesystem, file_size)
+        c_row_groups = [<int> row_group for row_group in set(row_groups)]
+        c_fragment = <shared_ptr[CFragment]> GetResultValue(
+            self.parquet_format.MakeFragment(move(c_source),
+                                             partition_expression.unwrap(),
+                                             <shared_ptr[CSchema]>nullptr,
+                                             move(c_row_groups)))
+        return Fragment.wrap(move(c_fragment))
+class RowGroupInfo:
+    """
+    A wrapper class for RowGroup information
+    Parameters
+    ----------
+    id : integer
+        The group ID.
+    metadata : FileMetaData
+        The rowgroup metadata.
+    schema : Schema
+        Schema of the rows.
+    """
+    def __init__(self, id, metadata, schema):
+        self.id = id
+        self.metadata = metadata
+        self.schema = schema
+    @property
+    def num_rows(self):
+        return self.metadata.num_rows
+    @property
+    def total_byte_size(self):
+        return self.metadata.total_byte_size
+    @property
+    def statistics(self):
+        def name_stats(i):
+            col = self.metadata.column(i)
+            stats = col.statistics
+            if stats is None or not stats.has_min_max:
+                return None, None
+            name = col.path_in_schema
+            field_index = self.schema.get_field_index(name)
+            if field_index < 0:
+                return None, None
+            typ = self.schema.field(field_index).type
+            return col.path_in_schema, {
+                'min': pa.scalar(stats.min, type=typ).as_py(),
+                'max': pa.scalar(stats.max, type=typ).as_py()
+            }
+        return {
+            name: stats for name, stats
+            in map(name_stats, range(self.metadata.num_columns))
+            if stats is not None
+        }
+    def __repr__(self):
+        return "RowGroupInfo({})".format(self.id)
+    def __eq__(self, other):
+        if isinstance(other, int):
+            return self.id == other
+        if not isinstance(other, RowGroupInfo):
+            return False
+        return self.id == other.id
+cdef class ParquetFileFragment(FileFragment):
+    """A Fragment representing a parquet file."""
+    cdef:
+        CParquetFileFragment* parquet_file_fragment
+    cdef void init(self, const shared_ptr[CFragment]& sp):
+        FileFragment.init(self, sp)
+        self.parquet_file_fragment = <CParquetFileFragment*> sp.get()
+    def __reduce__(self):
+        buffer = self.buffer
+        # parquet_file_fragment.row_groups() is empty if the metadata
+        # information of the file is not yet populated
+        if not bool(self.parquet_file_fragment.row_groups()):
+            row_groups = None
+        else:
+            row_groups = [row_group.id for row_group in self.row_groups]
+        return self.format.make_fragment, (
+            self.path if buffer is None else buffer,
+            self.filesystem,
+            self.partition_expression,
+            row_groups
+        )
+    def ensure_complete_metadata(self):
+        """
+        Ensure that all metadata (statistics, physical schema, ...) have
+        been read and cached in this fragment.
+        """
+        with nogil:
+            check_status(self.parquet_file_fragment.EnsureCompleteMetadata())
+    @property
+    def row_groups(self):
+        metadata = self.metadata
+        cdef vector[int] row_groups = self.parquet_file_fragment.row_groups()
+        return [RowGroupInfo(i, metadata.row_group(i), self.physical_schema)
+                for i in row_groups]
+    @property
+    def metadata(self):
+        self.ensure_complete_metadata()
+        cdef FileMetaData metadata = FileMetaData()
+        metadata.init(self.parquet_file_fragment.metadata())
+        return metadata
+    @property
+    def num_row_groups(self):
+        """
+        Return the number of row groups viewed by this fragment (not the
+        number of row groups in the origin file).
+        """
+        self.ensure_complete_metadata()
+        return self.parquet_file_fragment.row_groups().size()
+    def split_by_row_group(self, Expression filter=None,
+                           Schema schema=None):
+        """
+        Split the fragment into multiple fragments.
+        Yield a Fragment wrapping each row group in this ParquetFileFragment.
+        Row groups will be excluded whose metadata contradicts the optional
+        filter.
+        Parameters
+        ----------
+        filter : Expression, default None
+            Only include the row groups which satisfy this predicate (using
+            the Parquet RowGroup statistics).
+        schema : Schema, default None
+            Schema to use when filtering row groups. Defaults to the
+            Fragment's physical schema
+        Returns
+        -------
+        A list of Fragments
+        """
+        cdef:
+            vector[shared_ptr[CFragment]] c_fragments
+            CExpression c_filter
+            shared_ptr[CFragment] c_fragment
+        schema = schema or self.physical_schema
+        c_filter = _bind(filter, schema)
+        with nogil:
+            c_fragments = move(GetResultValue(
+                self.parquet_file_fragment.SplitByRowGroup(move(c_filter))))
+        return [Fragment.wrap(c_fragment) for c_fragment in c_fragments]
+    def subset(self, Expression filter=None, Schema schema=None,
+               object row_group_ids=None):
+        """
+        Create a subset of the fragment (viewing a subset of the row groups).
+        Subset can be specified by either a filter predicate (with optional
+        schema) or by a list of row group IDs. Note that when using a filter,
+        the resulting fragment can be empty (viewing no row groups).
+        Parameters
+        ----------
+        filter : Expression, default None
+            Only include the row groups which satisfy this predicate (using
+            the Parquet RowGroup statistics).
+        schema : Schema, default None
+            Schema to use when filtering row groups. Defaults to the
+            Fragment's physical schema
+        row_group_ids : list of ints
+            The row group IDs to include in the subset. Can only be specified
+            if `filter` is None.
+        Returns
+        -------
+        ParquetFileFragment
+        """
+        cdef:
+            CExpression c_filter
+            vector[int] c_row_group_ids
+            shared_ptr[CFragment] c_fragment
+        if filter is not None and row_group_ids is not None:
+            raise ValueError(
+                "Cannot specify both 'filter' and 'row_group_ids'."
+            )
+        if filter is not None:
+            schema = schema or self.physical_schema
+            c_filter = _bind(filter, schema)
+            with nogil:
+                c_fragment = move(GetResultValue(
+                    self.parquet_file_fragment.SubsetWithFilter(
+                        move(c_filter))))
+        elif row_group_ids is not None:
+            c_row_group_ids = [
+                <int> row_group for row_group in sorted(set(row_group_ids))
+            ]
+            with nogil:
+                c_fragment = move(GetResultValue(
+                    self.parquet_file_fragment.SubsetWithIds(
+                        move(c_row_group_ids))))
+        else:
+            raise ValueError(
+                "Need to specify one of 'filter' or 'row_group_ids'"
+            )
+        return Fragment.wrap(c_fragment)
+cdef class ParquetReadOptions(_Weakrefable):
+    """
+    Parquet format specific options for reading.
+    Parameters
+    ----------
+    dictionary_columns : list of string, default None
+        Names of columns which should be dictionary encoded as
+        they are read
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds
+    """
+    cdef public:
+        set dictionary_columns
+        TimeUnit _coerce_int96_timestamp_unit
+    # Also see _PARQUET_READ_OPTIONS
+    def __init__(self, dictionary_columns=None,
+                 coerce_int96_timestamp_unit=None):
+        self.dictionary_columns = set(dictionary_columns or set())
+        self.coerce_int96_timestamp_unit = coerce_int96_timestamp_unit
+    @property
+    def coerce_int96_timestamp_unit(self):
+        return timeunit_to_string(self._coerce_int96_timestamp_unit)
+    @coerce_int96_timestamp_unit.setter
+    def coerce_int96_timestamp_unit(self, unit):
+        if unit is not None:
+            self._coerce_int96_timestamp_unit = string_to_timeunit(unit)
+        else:
+            self._coerce_int96_timestamp_unit = TimeUnit_NANO
+    def equals(self, ParquetReadOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.ParquetReadOptions
+        Returns
+        -------
+        bool
+        """
+        return (self.dictionary_columns == other.dictionary_columns and
+                self.coerce_int96_timestamp_unit ==
+                other.coerce_int96_timestamp_unit)
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return False
+    def __repr__(self):
+        return (
+            f"<ParquetReadOptions"
+            f" dictionary_columns={self.dictionary_columns}"
+            f" coerce_int96_timestamp_unit={self.coerce_int96_timestamp_unit}>"
+        )
+cdef class ParquetFileWriteOptions(FileWriteOptions):
+    def update(self, **kwargs):
+        """
+        Parameters
+        ----------
+        **kwargs : dict
+        """
+        arrow_fields = {
+            "use_deprecated_int96_timestamps",
+            "coerce_timestamps",
+            "allow_truncated_timestamps",
+            "use_compliant_nested_type",
+        }
+        setters = set()
+        for name, value in kwargs.items():
+            if name not in self._properties:
+                raise TypeError("unexpected parquet write option: " + name)
+            self._properties[name] = value
+            if name in arrow_fields:
+                setters.add(self._set_arrow_properties)
+            elif name == "encryption_config" and value is not None:
+                setters.add(self._set_encryption_config)
+            else:
+                setters.add(self._set_properties)
+        for setter in setters:
+            setter()
+    def _set_properties(self):
+        cdef CParquetFileWriteOptions* opts = self.parquet_options
+        opts.writer_properties = _create_writer_properties(
+            use_dictionary=self._properties["use_dictionary"],
+            compression=self._properties["compression"],
+            version=self._properties["version"],
+            write_statistics=self._properties["write_statistics"],
+            data_page_size=self._properties["data_page_size"],
+            compression_level=self._properties["compression_level"],
+            use_byte_stream_split=(
+                self._properties["use_byte_stream_split"]
+            ),
+            column_encoding=self._properties["column_encoding"],
+            data_page_version=self._properties["data_page_version"],
+            encryption_properties=self._properties["encryption_properties"],
+            write_batch_size=self._properties["write_batch_size"],
+            dictionary_pagesize_limit=self._properties["dictionary_pagesize_limit"],
+            write_page_index=self._properties["write_page_index"],
+            write_page_checksum=self._properties["write_page_checksum"],
+            sorting_columns=self._properties["sorting_columns"],
+            store_decimal_as_integer=self._properties["store_decimal_as_integer"],
+        )
+    def _set_arrow_properties(self):
+        cdef CParquetFileWriteOptions* opts = self.parquet_options
+        opts.arrow_writer_properties = _create_arrow_writer_properties(
+            use_deprecated_int96_timestamps=(
+                self._properties["use_deprecated_int96_timestamps"]
+            ),
+            coerce_timestamps=self._properties["coerce_timestamps"],
+            allow_truncated_timestamps=(
+                self._properties["allow_truncated_timestamps"]
+            ),
+            writer_engine_version="V2",
+            use_compliant_nested_type=(
+                self._properties["use_compliant_nested_type"]
+            )
+        )
+    def _set_encryption_config(self):
+        if not parquet_encryption_enabled:
+            raise NotImplementedError(
+                "Encryption is not enabled in your installation of pyarrow, but an "
+                "encryption_config was provided."
+            )
+        set_encryption_config(self, self._properties["encryption_config"])
+    cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
+        FileWriteOptions.init(self, sp)
+        self.parquet_options = <CParquetFileWriteOptions*> sp.get()
+        self._properties = dict(
+            use_dictionary=True,
+            compression="snappy",
+            version="2.6",
+            write_statistics=None,
+            data_page_size=None,
+            compression_level=None,
+            use_byte_stream_split=False,
+            column_encoding=None,
+            data_page_version="1.0",
+            use_deprecated_int96_timestamps=False,
+            coerce_timestamps=None,
+            allow_truncated_timestamps=False,
+            use_compliant_nested_type=True,
+            encryption_properties=None,
+            write_batch_size=None,
+            dictionary_pagesize_limit=None,
+            write_page_index=False,
+            encryption_config=None,
+            write_page_checksum=False,
+            sorting_columns=None,
+            store_decimal_as_integer=False,
+        )
+        self._set_properties()
+        self._set_arrow_properties()
+    def __repr__(self):
+        return "<pyarrow.dataset.ParquetFileWriteOptions {0}>".format(
+            " ".join([f"{key}={value}" for key, value in self._properties.items()])
+        )
+cdef set _PARQUET_READ_OPTIONS = {
+    'dictionary_columns', 'coerce_int96_timestamp_unit'
+}
+cdef class ParquetFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for Parquet fragments.
+    Parameters
+    ----------
+    use_buffered_stream : bool, default False
+        Read files through buffered input streams rather than loading entire
+        row groups at once. This may be enabled to reduce memory overhead.
+        Disabled by default.
+    buffer_size : int, default 8192
+        Size of buffered stream, if enabled. Default is 8KB.
+    pre_buffer : bool, default True
+        If enabled, pre-buffer the raw Parquet data instead of issuing one
+        read per column chunk. This can improve performance on high-latency
+        filesystems (e.g. S3, GCS) by coalescing and issuing file reads in
+        parallel using a background I/O thread pool.
+        Set to False if you want to prioritize minimal memory usage
+        over maximum speed.
+    cache_options : pyarrow.CacheOptions, default None
+        Cache options used when pre_buffer is enabled. The default values should
+        be good for most use cases. You may want to adjust these for example if
+        you have exceptionally high latency to the file system.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    decryption_config : pyarrow.dataset.ParquetDecryptionConfig, default None
+        If not None, use the provided ParquetDecryptionConfig to decrypt the
+        Parquet file.
+    decryption_properties : pyarrow.parquet.FileDecryptionProperties, default None
+        If not None, use the provided FileDecryptionProperties to decrypt encrypted
+        Parquet file.
+    page_checksum_verification : bool, default False
+        If True, verify the page checksum for each page read from the file.
+    """
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+    def __init__(self, *, bint use_buffered_stream=False,
+                 buffer_size=8192,
+                 bint pre_buffer=True,
+                 cache_options=None,
+                 thrift_string_size_limit=None,
+                 thrift_container_size_limit=None,
+                 decryption_config=None,
+                 decryption_properties=None,
+                 bint page_checksum_verification=False):
+        self.init(shared_ptr[CFragmentScanOptions](
+            new CParquetFragmentScanOptions()))
+        self.use_buffered_stream = use_buffered_stream
+        self.buffer_size = buffer_size
+        if pre_buffer and not is_threading_enabled():
+            pre_buffer = False
+        self.pre_buffer = pre_buffer
+        if cache_options is not None:
+            self.cache_options = cache_options
+        if thrift_string_size_limit is not None:
+            self.thrift_string_size_limit = thrift_string_size_limit
+        if thrift_container_size_limit is not None:
+            self.thrift_container_size_limit = thrift_container_size_limit
+        if decryption_config is not None:
+            self.parquet_decryption_config = decryption_config
+        if decryption_properties is not None:
+            self.decryption_properties = decryption_properties
+        self.page_checksum_verification = page_checksum_verification
+    cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp):
+        FragmentScanOptions.init(self, sp)
+        self.parquet_options = <CParquetFragmentScanOptions*> sp.get()
+    cdef CReaderProperties* reader_properties(self):
+        return self.parquet_options.reader_properties.get()
+    cdef ArrowReaderProperties* arrow_reader_properties(self):
+        return self.parquet_options.arrow_reader_properties.get()
+    @property
+    def use_buffered_stream(self):
+        return self.reader_properties().is_buffered_stream_enabled()
+    @use_buffered_stream.setter
+    def use_buffered_stream(self, bint use_buffered_stream):
+        if use_buffered_stream:
+            self.reader_properties().enable_buffered_stream()
+        else:
+            self.reader_properties().disable_buffered_stream()
+    @property
+    def buffer_size(self):
+        return self.reader_properties().buffer_size()
+    @buffer_size.setter
+    def buffer_size(self, buffer_size):
+        if buffer_size <= 0:
+            raise ValueError("Buffer size must be larger than zero")
+        self.reader_properties().set_buffer_size(buffer_size)
+    @property
+    def pre_buffer(self):
+        return self.arrow_reader_properties().pre_buffer()
+    @pre_buffer.setter
+    def pre_buffer(self, bint pre_buffer):
+        if pre_buffer and not is_threading_enabled():
+            return
+        self.arrow_reader_properties().set_pre_buffer(pre_buffer)
+    @property
+    def cache_options(self):
+        return CacheOptions.wrap(self.arrow_reader_properties().cache_options())
+    @cache_options.setter
+    def cache_options(self, CacheOptions options):
+        self.arrow_reader_properties().set_cache_options(options.unwrap())
+    @property
+    def thrift_string_size_limit(self):
+        return self.reader_properties().thrift_string_size_limit()
+    @thrift_string_size_limit.setter
+    def thrift_string_size_limit(self, size):
+        if size <= 0:
+            raise ValueError("size must be larger than zero")
+        self.reader_properties().set_thrift_string_size_limit(size)
+    @property
+    def thrift_container_size_limit(self):
+        return self.reader_properties().thrift_container_size_limit()
+    @thrift_container_size_limit.setter
+    def thrift_container_size_limit(self, size):
+        if size <= 0:
+            raise ValueError("size must be larger than zero")
+        self.reader_properties().set_thrift_container_size_limit(size)
+    @property
+    def decryption_properties(self):
+        if not parquet_encryption_enabled:
+            raise NotImplementedError(
+                "Unable to access encryption features. "
+                "Encryption is not enabled in your installation of pyarrow."
+            )
+        return self._decryption_properties
+    @decryption_properties.setter
+    def decryption_properties(self, config):
+        if not parquet_encryption_enabled:
+            raise NotImplementedError(
+                "Encryption is not enabled in your installation of pyarrow, but "
+                "decryption_properties were provided."
+            )
+        set_decryption_properties(self, config)
+        self._decryption_properties = config
+    @property
+    def parquet_decryption_config(self):
+        if not parquet_encryption_enabled:
+            raise NotImplementedError(
+                "Unable to access encryption features. "
+                "Encryption is not enabled in your installation of pyarrow."
+            )
+        return self._parquet_decryption_config
+    @parquet_decryption_config.setter
+    def parquet_decryption_config(self, config):
+        if not parquet_encryption_enabled:
+            raise NotImplementedError(
+                "Encryption is not enabled in your installation of pyarrow, but a "
+                "decryption_config was provided."
+            )
+        set_decryption_config(self, config)
+        self._parquet_decryption_config = config
+    @property
+    def page_checksum_verification(self):
+        return self.reader_properties().page_checksum_verification()
+    @page_checksum_verification.setter
+    def page_checksum_verification(self, bint page_checksum_verification):
+        self.reader_properties().set_page_checksum_verification(page_checksum_verification)
+    def equals(self, ParquetFragmentScanOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.ParquetFragmentScanOptions
+        Returns
+        -------
+        bool
+        """
+        attrs = (
+            self.use_buffered_stream, self.buffer_size, self.pre_buffer, self.cache_options,
+            self.thrift_string_size_limit, self.thrift_container_size_limit,
+            self.page_checksum_verification)
+        other_attrs = (
+            other.use_buffered_stream, other.buffer_size, other.pre_buffer, other.cache_options,
+            other.thrift_string_size_limit,
+            other.thrift_container_size_limit, other.page_checksum_verification)
+        return attrs == other_attrs
+    @staticmethod
+    @binding(True)  # Required for Cython < 3
+    def _reconstruct(kwargs):
+        # __reduce__ doesn't allow passing named arguments directly to the
+        # reconstructor, hence this wrapper.
+        return ParquetFragmentScanOptions(**kwargs)
+    def __reduce__(self):
+        kwargs = dict(
+            use_buffered_stream=self.use_buffered_stream,
+            buffer_size=self.buffer_size,
+            pre_buffer=self.pre_buffer,
+            cache_options=self.cache_options,
+            thrift_string_size_limit=self.thrift_string_size_limit,
+            thrift_container_size_limit=self.thrift_container_size_limit,
+            page_checksum_verification=self.page_checksum_verification
+        )
+        return ParquetFragmentScanOptions._reconstruct, (kwargs,)
+cdef class ParquetFactoryOptions(_Weakrefable):
+    """
+    Influences the discovery of parquet dataset.
+    Parameters
+    ----------
+    partition_base_dir : str, optional
+        For the purposes of applying the partitioning, paths will be
+        stripped of the partition_base_dir. Files not matching the
+        partition_base_dir prefix will be skipped for partitioning discovery.
+        The ignored files will still be part of the Dataset, but will not
+        have partition information.
+    partitioning : Partitioning, PartitioningFactory, optional
+        The partitioning scheme applied to fragments, see ``Partitioning``.
+    validate_column_chunk_paths : bool, default False
+        Assert that all ColumnChunk paths are consistent. The parquet spec
+        allows for ColumnChunk data to be stored in multiple files, but
+        ParquetDatasetFactory supports only a single file with all ColumnChunk
+        data. If this flag is set construction of a ParquetDatasetFactory will
+        raise an error if ColumnChunk data is not resident in a single file.
+    """
+    cdef:
+        CParquetFactoryOptions options
+    __slots__ = ()  # avoid mistakingly creating attributes
+    def __init__(self, partition_base_dir=None, partitioning=None,
+                 validate_column_chunk_paths=False):
+        if isinstance(partitioning, PartitioningFactory):
+            self.partitioning_factory = partitioning
+        elif isinstance(partitioning, Partitioning):
+            self.partitioning = partitioning
+        if partition_base_dir is not None:
+            self.partition_base_dir = partition_base_dir
+        self.options.validate_column_chunk_paths = validate_column_chunk_paths
+    cdef inline CParquetFactoryOptions unwrap(self):
+        return self.options
+    @property
+    def partitioning(self):
+        """Partitioning to apply to discovered files.
+        NOTE: setting this property will overwrite partitioning_factory.
+        """
+        c_partitioning = self.options.partitioning.partitioning()
+        if c_partitioning.get() == nullptr:
+            return None
+        return Partitioning.wrap(c_partitioning)
+    @partitioning.setter
+    def partitioning(self, Partitioning value):
+        self.options.partitioning = (<Partitioning> value).unwrap()
+    @property
+    def partitioning_factory(self):
+        """PartitioningFactory to apply to discovered files and
+        discover a Partitioning.
+        NOTE: setting this property will overwrite partitioning.
+        """
+        c_factory = self.options.partitioning.factory()
+        if c_factory.get() == nullptr:
+            return None
+        return PartitioningFactory.wrap(c_factory, None, None)
+    @partitioning_factory.setter
+    def partitioning_factory(self, PartitioningFactory value):
+        self.options.partitioning = (<PartitioningFactory> value).unwrap()
+    @property
+    def partition_base_dir(self):
+        """
+        Base directory to strip paths before applying the partitioning.
+        """
+        return frombytes(self.options.partition_base_dir)
+    @partition_base_dir.setter
+    def partition_base_dir(self, value):
+        self.options.partition_base_dir = tobytes(value)
+    @property
+    def validate_column_chunk_paths(self):
+        """
+        Base directory to strip paths before applying the partitioning.
+        """
+        return self.options.validate_column_chunk_paths
+    @validate_column_chunk_paths.setter
+    def validate_column_chunk_paths(self, value):
+        self.options.validate_column_chunk_paths = value
+cdef class ParquetDatasetFactory(DatasetFactory):
+    """
+    Create a ParquetDatasetFactory from a Parquet `_metadata` file.
+    Parameters
+    ----------
+    metadata_path : str
+        Path to the `_metadata` parquet metadata-only file generated with
+        `pyarrow.parquet.write_metadata`.
+    filesystem : pyarrow.fs.FileSystem
+        Filesystem to read the metadata_path from, and subsequent parquet
+        files.
+    format : ParquetFileFormat
+        Parquet format options.
+    options : ParquetFactoryOptions, optional
+        Various flags influencing the discovery of filesystem paths.
+    """
+    cdef:
+        CParquetDatasetFactory* parquet_factory
+    def __init__(self, metadata_path, FileSystem filesystem not None,
+                 FileFormat format not None,
+                 ParquetFactoryOptions options=None):
+        cdef:
+            c_string c_path
+            shared_ptr[CFileSystem] c_filesystem
+            shared_ptr[CParquetFileFormat] c_format
+            CResult[shared_ptr[CDatasetFactory]] result
+            CParquetFactoryOptions c_options
+        c_path = tobytes(metadata_path)
+        c_filesystem = filesystem.unwrap()
+        c_format = static_pointer_cast[CParquetFileFormat, CFileFormat](
+            format.unwrap())
+        options = options or ParquetFactoryOptions()
+        c_options = options.unwrap()
+        with nogil:
+            result = CParquetDatasetFactory.MakeFromMetaDataPath(
+                c_path, c_filesystem, c_format, c_options)
+        self.init(GetResultValue(result))
+    cdef init(self, shared_ptr[CDatasetFactory]& sp):
+        DatasetFactory.init(self, sp)
+        self.parquet_factory = <CParquetDatasetFactory*> sp.get()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_fs.pxd ADDED Viewed

	@@ -0,0 +1,91 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# cython: language_level = 3
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow.lib import _detect_compression, frombytes, tobytes
+from pyarrow.lib cimport *
+cpdef enum FileType:
+    NotFound = <int8_t> CFileType_NotFound
+    Unknown = <int8_t> CFileType_Unknown
+    File = <int8_t> CFileType_File
+    Directory = <int8_t> CFileType_Directory
+cdef class FileInfo(_Weakrefable):
+    cdef:
+        CFileInfo info
+    @staticmethod
+    cdef wrap(CFileInfo info)
+    cdef inline CFileInfo unwrap(self) nogil
+    @staticmethod
+    cdef CFileInfo unwrap_safe(obj)
+cdef class FileSelector(_Weakrefable):
+    cdef:
+        CFileSelector selector
+    @staticmethod
+    cdef FileSelector wrap(CFileSelector selector)
+    cdef inline CFileSelector unwrap(self) nogil
+cdef class FileSystem(_Weakrefable):
+    cdef:
+        shared_ptr[CFileSystem] wrapped
+        CFileSystem* fs
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped)
+    @staticmethod
+    cdef wrap(const shared_ptr[CFileSystem]& sp)
+    cdef inline shared_ptr[CFileSystem] unwrap(self) nogil
+cdef class LocalFileSystem(FileSystem):
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped)
+cdef class SubTreeFileSystem(FileSystem):
+    cdef:
+        CSubTreeFileSystem* subtreefs
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped)
+cdef class _MockFileSystem(FileSystem):
+    cdef:
+        CMockFileSystem* mockfs
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped)
+cdef class PyFileSystem(FileSystem):
+    cdef:
+        CPyFileSystem* pyfs
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_hdfs.pyx ADDED Viewed

	@@ -0,0 +1,160 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# cython: language_level = 3
+from cython cimport binding
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow._fs cimport FileSystem
+from pyarrow.lib import frombytes, tobytes
+from pyarrow.util import _stringify_path
+cdef class HadoopFileSystem(FileSystem):
+    """
+    HDFS backed FileSystem implementation
+    Parameters
+    ----------
+    host : str
+        HDFS host to connect to. Set to "default" for fs.defaultFS from
+        core-site.xml.
+    port : int, default 8020
+        HDFS port to connect to. Set to 0 for default or logical (HA) nodes.
+    user : str, default None
+        Username when connecting to HDFS; None implies login user.
+    replication : int, default 3
+        Number of copies each block will have.
+    buffer_size : int, default 0
+        If 0, no buffering will happen otherwise the size of the temporary read
+        and write buffer.
+    default_block_size : int, default None
+        None means the default configuration for HDFS, a typical block size is
+        128 MB.
+    kerb_ticket : string or path, default None
+        If not None, the path to the Kerberos ticket cache.
+    extra_conf : dict, default None
+        Extra key/value pairs for configuration; will override any
+        hdfs-site.xml properties.
+    Examples
+    --------
+    >>> from pyarrow import fs
+    >>> hdfs = fs.HadoopFileSystem(host, port, user=user, kerb_ticket=ticket_cache_path) # doctest: +SKIP
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+    cdef:
+        CHadoopFileSystem* hdfs
+    def __init__(self, str host, int port=8020, *, str user=None,
+                 int replication=3, int buffer_size=0,
+                 default_block_size=None, kerb_ticket=None,
+                 extra_conf=None):
+        cdef:
+            CHdfsOptions options
+            shared_ptr[CHadoopFileSystem] wrapped
+        if not host.startswith(('hdfs://', 'viewfs://')) and host != "default":
+            # TODO(kszucs): do more sanitization
+            host = 'hdfs://{}'.format(host)
+        options.ConfigureEndPoint(tobytes(host), int(port))
+        options.ConfigureReplication(replication)
+        options.ConfigureBufferSize(buffer_size)
+        if user is not None:
+            options.ConfigureUser(tobytes(user))
+        if default_block_size is not None:
+            options.ConfigureBlockSize(default_block_size)
+        if kerb_ticket is not None:
+            options.ConfigureKerberosTicketCachePath(
+                tobytes(_stringify_path(kerb_ticket)))
+        if extra_conf is not None:
+            for k, v in extra_conf.items():
+                options.ConfigureExtraConf(tobytes(k), tobytes(v))
+        with nogil:
+            wrapped = GetResultValue(CHadoopFileSystem.Make(options))
+        self.init(<shared_ptr[CFileSystem]> wrapped)
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped):
+        FileSystem.init(self, wrapped)
+        self.hdfs = <CHadoopFileSystem*> wrapped.get()
+    @staticmethod
+    def from_uri(uri):
+        """
+        Instantiate HadoopFileSystem object from an URI string.
+        The following two calls are equivalent
+        * ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
+&replication=1')``
+        * ``HadoopFileSystem('localhost', port=8020, user='test', \
+replication=1)``
+        Parameters
+        ----------
+        uri : str
+            A string URI describing the connection to HDFS.
+            In order to change the user, replication, buffer_size or
+            default_block_size pass the values as query parts.
+        Returns
+        -------
+        HadoopFileSystem
+        """
+        cdef:
+            HadoopFileSystem self = HadoopFileSystem.__new__(HadoopFileSystem)
+            shared_ptr[CHadoopFileSystem] wrapped
+            CHdfsOptions options
+        options = GetResultValue(CHdfsOptions.FromUriString(tobytes(uri)))
+        with nogil:
+            wrapped = GetResultValue(CHadoopFileSystem.Make(options))
+        self.init(<shared_ptr[CFileSystem]> wrapped)
+        return self
+    @staticmethod
+    @binding(True)  # Required for cython < 3
+    def _reconstruct(kwargs):
+        # __reduce__ doesn't allow passing named arguments directly to the
+        # reconstructor, hence this wrapper.
+        return HadoopFileSystem(**kwargs)
+    def __reduce__(self):
+        cdef CHdfsOptions opts = self.hdfs.options()
+        return (
+            HadoopFileSystem._reconstruct, (dict(
+                host=frombytes(opts.connection_config.host),
+                port=opts.connection_config.port,
+                user=frombytes(opts.connection_config.user),
+                replication=opts.replication,
+                buffer_size=opts.buffer_size,
+                default_block_size=opts.default_block_size,
+                kerb_ticket=frombytes(opts.connection_config.kerb_ticket),
+                extra_conf={frombytes(k): frombytes(v)
+                            for k, v in opts.connection_config.extra_conf},
+            ),)
+        )

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pxd ADDED Viewed

	@@ -0,0 +1,36 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# cython: language_level = 3
+from pyarrow.includes.libarrow cimport *
+from pyarrow.lib cimport _Weakrefable
+cdef class ParseOptions(_Weakrefable):
+    cdef:
+        CJSONParseOptions options
+    @staticmethod
+    cdef ParseOptions wrap(CJSONParseOptions options)
+cdef class ReadOptions(_Weakrefable):
+    cdef:
+        CJSONReadOptions options
+    @staticmethod
+    cdef ReadOptions wrap(CJSONReadOptions options)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_json.pyx ADDED Viewed

	@@ -0,0 +1,310 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# cython: profile=False
+# distutils: language = c++
+# cython: language_level = 3
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.lib cimport (_Weakrefable, MemoryPool,
+                          maybe_unbox_memory_pool,
+                          get_input_stream, pyarrow_wrap_table,
+                          pyarrow_wrap_schema, pyarrow_unwrap_schema)
+cdef class ReadOptions(_Weakrefable):
+    """
+    Options for reading JSON files.
+    Parameters
+    ----------
+    use_threads : bool, optional (default True)
+        Whether to use multiple threads to accelerate reading
+    block_size : int, optional
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as
+        the size of individual chunks in the Table.
+    """
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+    def __init__(self, use_threads=None, block_size=None):
+        self.options = CJSONReadOptions.Defaults()
+        if use_threads is not None:
+            self.use_threads = use_threads
+        if block_size is not None:
+            self.block_size = block_size
+    @property
+    def use_threads(self):
+        """
+        Whether to use multiple threads to accelerate reading.
+        """
+        return self.options.use_threads
+    @use_threads.setter
+    def use_threads(self, value):
+        self.options.use_threads = value
+    @property
+    def block_size(self):
+        """
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as the size of
+        individual chunks in the Table.
+        """
+        return self.options.block_size
+    @block_size.setter
+    def block_size(self, value):
+        self.options.block_size = value
+    def __reduce__(self):
+        return ReadOptions, (
+            self.use_threads,
+            self.block_size
+        )
+    def equals(self, ReadOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.json.ReadOptions
+        Returns
+        -------
+        bool
+        """
+        return (
+            self.use_threads == other.use_threads and
+            self.block_size == other.block_size
+        )
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return False
+    @staticmethod
+    cdef ReadOptions wrap(CJSONReadOptions options):
+        out = ReadOptions()
+        out.options = options  # shallow copy
+        return out
+cdef class ParseOptions(_Weakrefable):
+    """
+    Options for parsing JSON files.
+    Parameters
+    ----------
+    explicit_schema : Schema, optional (default None)
+        Optional explicit schema (no type inference, ignores other fields).
+    newlines_in_values : bool, optional (default False)
+        Whether objects may be printed across multiple lines (for example
+        pretty printed). If false, input must end with an empty line.
+    unexpected_field_behavior : str, default "infer"
+        How JSON fields outside of explicit_schema (if given) are treated.
+        Possible behaviors:
+         - "ignore": unexpected JSON fields are ignored
+         - "error": error out on unexpected JSON fields
+         - "infer": unexpected JSON fields are type-inferred and included in
+           the output
+    """
+    __slots__ = ()
+    def __init__(self, explicit_schema=None, newlines_in_values=None,
+                 unexpected_field_behavior=None):
+        self.options = CJSONParseOptions.Defaults()
+        if explicit_schema is not None:
+            self.explicit_schema = explicit_schema
+        if newlines_in_values is not None:
+            self.newlines_in_values = newlines_in_values
+        if unexpected_field_behavior is not None:
+            self.unexpected_field_behavior = unexpected_field_behavior
+    def __reduce__(self):
+        return ParseOptions, (
+            self.explicit_schema,
+            self.newlines_in_values,
+            self.unexpected_field_behavior
+        )
+    @property
+    def explicit_schema(self):
+        """
+        Optional explicit schema (no type inference, ignores other fields)
+        """
+        if self.options.explicit_schema.get() == NULL:
+            return None
+        else:
+            return pyarrow_wrap_schema(self.options.explicit_schema)
+    @explicit_schema.setter
+    def explicit_schema(self, value):
+        self.options.explicit_schema = pyarrow_unwrap_schema(value)
+    @property
+    def newlines_in_values(self):
+        """
+        Whether newline characters are allowed in JSON values.
+        Setting this to True reduces the performance of multi-threaded
+        JSON reading.
+        """
+        return self.options.newlines_in_values
+    @newlines_in_values.setter
+    def newlines_in_values(self, value):
+        self.options.newlines_in_values = value
+    @property
+    def unexpected_field_behavior(self):
+        """
+        How JSON fields outside of explicit_schema (if given) are treated.
+        Possible behaviors:
+         - "ignore": unexpected JSON fields are ignored
+         - "error": error out on unexpected JSON fields
+         - "infer": unexpected JSON fields are type-inferred and included in
+           the output
+        Set to "infer" by default.
+        """
+        v = self.options.unexpected_field_behavior
+        if v == CUnexpectedFieldBehavior_Ignore:
+            return "ignore"
+        elif v == CUnexpectedFieldBehavior_Error:
+            return "error"
+        elif v == CUnexpectedFieldBehavior_InferType:
+            return "infer"
+        else:
+            raise ValueError('Unexpected value for unexpected_field_behavior')
+    @unexpected_field_behavior.setter
+    def unexpected_field_behavior(self, value):
+        cdef CUnexpectedFieldBehavior v
+        if value == "ignore":
+            v = CUnexpectedFieldBehavior_Ignore
+        elif value == "error":
+            v = CUnexpectedFieldBehavior_Error
+        elif value == "infer":
+            v = CUnexpectedFieldBehavior_InferType
+        else:
+            raise ValueError(
+                "Unexpected value `{}` for `unexpected_field_behavior`, pass "
+                "either `ignore`, `error` or `infer`.".format(value)
+            )
+        self.options.unexpected_field_behavior = v
+    def equals(self, ParseOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.json.ParseOptions
+        Returns
+        -------
+        bool
+        """
+        return (
+            self.explicit_schema == other.explicit_schema and
+            self.newlines_in_values == other.newlines_in_values and
+            self.unexpected_field_behavior == other.unexpected_field_behavior
+        )
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return False
+    @staticmethod
+    cdef ParseOptions wrap(CJSONParseOptions options):
+        out = ParseOptions()
+        out.options = options  # shallow copy
+        return out
+cdef _get_reader(input_file, shared_ptr[CInputStream]* out):
+    use_memory_map = False
+    get_input_stream(input_file, use_memory_map, out)
+cdef _get_read_options(ReadOptions read_options, CJSONReadOptions* out):
+    if read_options is None:
+        out[0] = CJSONReadOptions.Defaults()
+    else:
+        out[0] = read_options.options
+cdef _get_parse_options(ParseOptions parse_options, CJSONParseOptions* out):
+    if parse_options is None:
+        out[0] = CJSONParseOptions.Defaults()
+    else:
+        out[0] = parse_options.options
+def read_json(input_file, read_options=None, parse_options=None,
+              MemoryPool memory_pool=None):
+    """
+    Read a Table from a stream of JSON data.
+    Parameters
+    ----------
+    input_file : str, path or file-like object
+        The location of JSON data. Currently only the line-delimited JSON
+        format is supported.
+    read_options : pyarrow.json.ReadOptions, optional
+        Options for the JSON reader (see ReadOptions constructor for defaults).
+    parse_options : pyarrow.json.ParseOptions, optional
+        Options for the JSON parser
+        (see ParseOptions constructor for defaults).
+    memory_pool : MemoryPool, optional
+        Pool to allocate Table memory from.
+    Returns
+    -------
+    :class:`pyarrow.Table`
+        Contents of the JSON file as a in-memory table.
+    """
+    cdef:
+        shared_ptr[CInputStream] stream
+        CJSONReadOptions c_read_options
+        CJSONParseOptions c_parse_options
+        shared_ptr[CJSONReader] reader
+        shared_ptr[CTable] table
+    _get_reader(input_file, &stream)
+    _get_read_options(read_options, &c_read_options)
+    _get_parse_options(parse_options, &c_parse_options)
+    reader = GetResultValue(
+        CJSONReader.Make(maybe_unbox_memory_pool(memory_pool),
+                         stream, c_read_options, c_parse_options))
+    with nogil:
+        table = GetResultValue(reader.get().Read())
+    return pyarrow_wrap_table(table)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_orc.pyx ADDED Viewed

	@@ -0,0 +1,445 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# cython: profile=False
+# distutils: language = c++
+from cython.operator cimport dereference as deref
+from libcpp.vector cimport vector as std_vector
+from libcpp.utility cimport move
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.lib cimport (check_status, _Weakrefable,
+                          MemoryPool, maybe_unbox_memory_pool,
+                          pyarrow_wrap_schema,
+                          pyarrow_wrap_batch,
+                          Table,
+                          pyarrow_wrap_table,
+                          pyarrow_wrap_metadata,
+                          pyarrow_unwrap_table,
+                          get_reader,
+                          get_writer)
+from pyarrow.lib import frombytes, tobytes
+from pyarrow.util import _stringify_path
+cdef compression_type_from_enum(CCompressionType compression_type):
+    compression_map = {
+        CCompressionType_UNCOMPRESSED: 'UNCOMPRESSED',
+        CCompressionType_GZIP: 'ZLIB',
+        CCompressionType_SNAPPY: 'SNAPPY',
+        CCompressionType_LZ4: 'LZ4',
+        CCompressionType_ZSTD: 'ZSTD',
+    }
+    if compression_type in compression_map:
+        return compression_map[compression_type]
+    raise ValueError('Unsupported compression')
+cdef CCompressionType compression_type_from_name(name) except *:
+    if not isinstance(name, str):
+        raise TypeError('compression must be a string')
+    name = name.upper()
+    if name == 'ZLIB':
+        return CCompressionType_GZIP
+    elif name == 'SNAPPY':
+        return CCompressionType_SNAPPY
+    elif name == 'LZ4':
+        return CCompressionType_LZ4
+    elif name == 'ZSTD':
+        return CCompressionType_ZSTD
+    elif name == 'UNCOMPRESSED':
+        return CCompressionType_UNCOMPRESSED
+    raise ValueError(f'Unknown CompressionKind: {name}')
+cdef compression_strategy_from_enum(
+    CompressionStrategy compression_strategy
+):
+    compression_strategy_map = {
+        _CompressionStrategy_SPEED: 'SPEED',
+        _CompressionStrategy_COMPRESSION: 'COMPRESSION',
+    }
+    if compression_strategy in compression_strategy_map:
+        return compression_strategy_map[compression_strategy]
+    raise ValueError('Unsupported compression strategy')
+cdef CompressionStrategy compression_strategy_from_name(name) except *:
+    if not isinstance(name, str):
+        raise TypeError('compression strategy must be a string')
+    name = name.upper()
+    if name == 'COMPRESSION':
+        return _CompressionStrategy_COMPRESSION
+    elif name == 'SPEED':
+        return _CompressionStrategy_SPEED
+    raise ValueError(f'Unknown CompressionStrategy: {name}')
+cdef file_version_from_class(FileVersion file_version):
+    return frombytes(file_version.ToString())
+cdef writer_id_from_enum(WriterId writer_id):
+    writer_id_map = {
+        _WriterId_ORC_JAVA_WRITER: 'ORC_JAVA',
+        _WriterId_ORC_CPP_WRITER: 'ORC_CPP',
+        _WriterId_PRESTO_WRITER: 'PRESTO',
+        _WriterId_SCRITCHLEY_GO: 'SCRITCHLEY_GO',
+        _WriterId_TRINO_WRITER: 'TRINO',
+    }
+    if writer_id in writer_id_map:
+        return writer_id_map[writer_id]
+    raise ValueError('Unsupported writer ID')
+cdef writer_version_from_enum(WriterVersion writer_version):
+    writer_version_map = {
+        _WriterVersion_ORIGINAL: 'ORIGINAL',
+        _WriterVersion_HIVE_8732: 'HIVE_8732',
+        _WriterVersion_HIVE_4243: 'HIVE_4243',
+        _WriterVersion_HIVE_12055: 'HIVE_12055',
+        _WriterVersion_HIVE_13083: 'HIVE_13083',
+        _WriterVersion_ORC_101: 'ORC_101',
+        _WriterVersion_ORC_135: 'ORC_135',
+        _WriterVersion_ORC_517: 'ORC_517',
+        _WriterVersion_ORC_203: 'ORC_203',
+        _WriterVersion_ORC_14: 'ORC_14',
+    }
+    if writer_version in writer_version_map:
+        return writer_version_map[writer_version]
+    raise ValueError('Unsupported writer version')
+cdef shared_ptr[WriteOptions] _create_write_options(
+    file_version=None,
+    batch_size=None,
+    stripe_size=None,
+    compression=None,
+    compression_block_size=None,
+    compression_strategy=None,
+    row_index_stride=None,
+    padding_tolerance=None,
+    dictionary_key_size_threshold=None,
+    bloom_filter_columns=None,
+    bloom_filter_fpp=None
+) except *:
+    """General writer options"""
+    cdef:
+        shared_ptr[WriteOptions] options
+    options = make_shared[WriteOptions]()
+    # batch_size
+    if batch_size is not None:
+        if isinstance(batch_size, int) and batch_size > 0:
+            deref(options).batch_size = batch_size
+        else:
+            raise ValueError(f"Invalid ORC writer batch size: {batch_size}")
+    # file_version
+    if file_version is not None:
+        if file_version == "0.12":
+            deref(options).file_version = FileVersion(0, 12)
+        elif file_version == "0.11":
+            deref(options).file_version = FileVersion(0, 11)
+        else:
+            raise ValueError(f"Unsupported ORC file version: {file_version}")
+    # stripe_size
+    if stripe_size is not None:
+        if isinstance(stripe_size, int) and stripe_size > 0:
+            deref(options).stripe_size = stripe_size
+        else:
+            raise ValueError(f"Invalid ORC stripe size: {stripe_size}")
+    # compression
+    if compression is not None:
+        if isinstance(compression, str):
+            deref(options).compression = compression_type_from_name(
+                compression)
+        else:
+            raise TypeError("Unsupported ORC compression type: "
+                            f"{compression}")
+    # compression_block_size
+    if compression_block_size is not None:
+        if (isinstance(compression_block_size, int) and
+                compression_block_size > 0):
+            deref(options).compression_block_size = compression_block_size
+        else:
+            raise ValueError("Invalid ORC compression block size: "
+                             f"{compression_block_size}")
+    # compression_strategy
+    if compression_strategy is not None:
+        if isinstance(compression, str):
+            deref(options).compression_strategy = \
+                compression_strategy_from_name(compression_strategy)
+        else:
+            raise TypeError("Unsupported ORC compression strategy: "
+                            f"{compression_strategy}")
+    # row_index_stride
+    if row_index_stride is not None:
+        if isinstance(row_index_stride, int) and row_index_stride > 0:
+            deref(options).row_index_stride = row_index_stride
+        else:
+            raise ValueError("Invalid ORC row index stride: "
+                             f"{row_index_stride}")
+    # padding_tolerance
+    if padding_tolerance is not None:
+        try:
+            padding_tolerance = float(padding_tolerance)
+            deref(options).padding_tolerance = padding_tolerance
+        except Exception:
+            raise ValueError("Invalid ORC padding tolerance: "
+                             f"{padding_tolerance}")
+    # dictionary_key_size_threshold
+    if dictionary_key_size_threshold is not None:
+        try:
+            dictionary_key_size_threshold = float(
+                dictionary_key_size_threshold)
+            assert 0 <= dictionary_key_size_threshold <= 1
+            deref(options).dictionary_key_size_threshold = \
+                dictionary_key_size_threshold
+        except Exception:
+            raise ValueError("Invalid ORC dictionary key size threshold: "
+                             f"{dictionary_key_size_threshold}")
+    # bloom_filter_columns
+    if bloom_filter_columns is not None:
+        try:
+            bloom_filter_columns = list(bloom_filter_columns)
+            for col in bloom_filter_columns:
+                assert isinstance(col, int) and col >= 0
+            deref(options).bloom_filter_columns = bloom_filter_columns
+        except Exception:
+            raise ValueError("Invalid ORC BloomFilter columns: "
+                             f"{bloom_filter_columns}")
+    # Max false positive rate of the Bloom Filter
+    if bloom_filter_fpp is not None:
+        try:
+            bloom_filter_fpp = float(bloom_filter_fpp)
+            assert 0 <= bloom_filter_fpp <= 1
+            deref(options).bloom_filter_fpp = bloom_filter_fpp
+        except Exception:
+            raise ValueError("Invalid ORC BloomFilter false positive rate: "
+                             f"{bloom_filter_fpp}")
+    return options
+cdef class ORCReader(_Weakrefable):
+    cdef:
+        object source
+        CMemoryPool* allocator
+        unique_ptr[ORCFileReader] reader
+    def __cinit__(self, MemoryPool memory_pool=None):
+        self.allocator = maybe_unbox_memory_pool(memory_pool)
+    def open(self, object source, c_bool use_memory_map=True):
+        cdef:
+            shared_ptr[CRandomAccessFile] rd_handle
+        self.source = source
+        get_reader(source, use_memory_map, &rd_handle)
+        with nogil:
+            self.reader = move(GetResultValue(
+                ORCFileReader.Open(rd_handle, self.allocator)
+            ))
+    def metadata(self):
+        """
+        The arrow metadata for this file.
+        Returns
+        -------
+        metadata : pyarrow.KeyValueMetadata
+        """
+        cdef:
+            shared_ptr[const CKeyValueMetadata] sp_arrow_metadata
+        with nogil:
+            sp_arrow_metadata = GetResultValue(
+                deref(self.reader).ReadMetadata()
+            )
+        return pyarrow_wrap_metadata(sp_arrow_metadata)
+    def schema(self):
+        """
+        The arrow schema for this file.
+        Returns
+        -------
+        schema : pyarrow.Schema
+        """
+        cdef:
+            shared_ptr[CSchema] sp_arrow_schema
+        with nogil:
+            sp_arrow_schema = GetResultValue(deref(self.reader).ReadSchema())
+        return pyarrow_wrap_schema(sp_arrow_schema)
+    def nrows(self):
+        return deref(self.reader).NumberOfRows()
+    def nstripes(self):
+        return deref(self.reader).NumberOfStripes()
+    def file_version(self):
+        return file_version_from_class(deref(self.reader).GetFileVersion())
+    def software_version(self):
+        return frombytes(deref(self.reader).GetSoftwareVersion())
+    def compression(self):
+        return compression_type_from_enum(
+            GetResultValue(deref(self.reader).GetCompression()))
+    def compression_size(self):
+        return deref(self.reader).GetCompressionSize()
+    def row_index_stride(self):
+        return deref(self.reader).GetRowIndexStride()
+    def writer(self):
+        writer_name = writer_id_from_enum(deref(self.reader).GetWriterId())
+        if writer_name == 'UNKNOWN':
+            return deref(self.reader).GetWriterIdValue()
+        else:
+            return writer_name
+    def writer_version(self):
+        return writer_version_from_enum(deref(self.reader).GetWriterVersion())
+    def nstripe_statistics(self):
+        return deref(self.reader).GetNumberOfStripeStatistics()
+    def content_length(self):
+        return deref(self.reader).GetContentLength()
+    def stripe_statistics_length(self):
+        return deref(self.reader).GetStripeStatisticsLength()
+    def file_footer_length(self):
+        return deref(self.reader).GetFileFooterLength()
+    def file_postscript_length(self):
+        return deref(self.reader).GetFilePostscriptLength()
+    def file_length(self):
+        return deref(self.reader).GetFileLength()
+    def serialized_file_tail(self):
+        return deref(self.reader).GetSerializedFileTail()
+    def read_stripe(self, n, columns=None):
+        cdef:
+            shared_ptr[CRecordBatch] sp_record_batch
+            int64_t stripe
+            std_vector[c_string] c_names
+        stripe = n
+        if columns is None:
+            with nogil:
+                sp_record_batch = GetResultValue(
+                    deref(self.reader).ReadStripe(stripe)
+                )
+        else:
+            c_names = [tobytes(name) for name in columns]
+            with nogil:
+                sp_record_batch = GetResultValue(
+                    deref(self.reader).ReadStripe(stripe, c_names)
+                )
+        return pyarrow_wrap_batch(sp_record_batch)
+    def read(self, columns=None):
+        cdef:
+            shared_ptr[CTable] sp_table
+            std_vector[c_string] c_names
+        if columns is None:
+            with nogil:
+                sp_table = GetResultValue(deref(self.reader).Read())
+        else:
+            c_names = [tobytes(name) for name in columns]
+            with nogil:
+                sp_table = GetResultValue(deref(self.reader).Read(c_names))
+        return pyarrow_wrap_table(sp_table)
+cdef class ORCWriter(_Weakrefable):
+    cdef:
+        unique_ptr[ORCFileWriter] writer
+        shared_ptr[COutputStream] sink
+        c_bool own_sink
+    def open(self, object where, *,
+             file_version=None,
+             batch_size=None,
+             stripe_size=None,
+             compression=None,
+             compression_block_size=None,
+             compression_strategy=None,
+             row_index_stride=None,
+             padding_tolerance=None,
+             dictionary_key_size_threshold=None,
+             bloom_filter_columns=None,
+             bloom_filter_fpp=None):
+        cdef:
+            shared_ptr[WriteOptions] write_options
+            c_string c_where
+        try:
+            where = _stringify_path(where)
+        except TypeError:
+            get_writer(where, &self.sink)
+            self.own_sink = False
+        else:
+            c_where = tobytes(where)
+            with nogil:
+                self.sink = GetResultValue(FileOutputStream.Open(c_where))
+                self.own_sink = True
+        write_options = _create_write_options(
+            file_version=file_version,
+            batch_size=batch_size,
+            stripe_size=stripe_size,
+            compression=compression,
+            compression_block_size=compression_block_size,
+            compression_strategy=compression_strategy,
+            row_index_stride=row_index_stride,
+            padding_tolerance=padding_tolerance,
+            dictionary_key_size_threshold=dictionary_key_size_threshold,
+            bloom_filter_columns=bloom_filter_columns,
+            bloom_filter_fpp=bloom_filter_fpp
+        )
+        with nogil:
+            self.writer = move(GetResultValue(
+                ORCFileWriter.Open(self.sink.get(),
+                                   deref(write_options))))
+    def write(self, Table table):
+        cdef:
+            shared_ptr[CTable] sp_table
+        sp_table = pyarrow_unwrap_table(table)
+        with nogil:
+            check_status(deref(self.writer).Write(deref(sp_table)))
+    def close(self):
+        with nogil:
+            check_status(deref(self.writer).Close())
+            if self.own_sink:
+                check_status(deref(self.sink).Close())

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_parquet.pyx ADDED Viewed

	@@ -0,0 +1,2266 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# cython: profile=False
+# distutils: language = c++
+from collections.abc import Sequence
+from textwrap import indent
+import warnings
+from cython.operator cimport dereference as deref
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_python cimport *
+from pyarrow.lib cimport (_Weakrefable, Buffer, Schema,
+                          check_status,
+                          MemoryPool, maybe_unbox_memory_pool,
+                          Table, KeyValueMetadata,
+                          pyarrow_wrap_chunked_array,
+                          pyarrow_wrap_schema,
+                          pyarrow_unwrap_metadata,
+                          pyarrow_unwrap_schema,
+                          pyarrow_wrap_table,
+                          pyarrow_wrap_batch,
+                          pyarrow_wrap_scalar,
+                          NativeFile, get_reader, get_writer,
+                          string_to_timeunit)
+from pyarrow.lib import (ArrowException, NativeFile, BufferOutputStream,
+                         _stringify_path,
+                         tobytes, frombytes, is_threading_enabled)
+cimport cpython as cp
+_DEFAULT_ROW_GROUP_SIZE = 1024*1024
+_MAX_ROW_GROUP_SIZE = 64*1024*1024
+cdef class Statistics(_Weakrefable):
+    """Statistics for a single column in a single row group."""
+    def __cinit__(self):
+        pass
+    def __repr__(self):
+        return """{}
+  has_min_max: {}
+  min: {}
+  max: {}
+  null_count: {}
+  distinct_count: {}
+  num_values: {}
+  physical_type: {}
+  logical_type: {}
+  converted_type (legacy): {}""".format(object.__repr__(self),
+                                        self.has_min_max,
+                                        self.min,
+                                        self.max,
+                                        self.null_count,
+                                        self.distinct_count,
+                                        self.num_values,
+                                        self.physical_type,
+                                        str(self.logical_type),
+                                        self.converted_type)
+    def to_dict(self):
+        """
+        Get dictionary representation of statistics.
+        Returns
+        -------
+        dict
+            Dictionary with a key for each attribute of this class.
+        """
+        d = dict(
+            has_min_max=self.has_min_max,
+            min=self.min,
+            max=self.max,
+            null_count=self.null_count,
+            distinct_count=self.distinct_count,
+            num_values=self.num_values,
+            physical_type=self.physical_type
+        )
+        return d
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return NotImplemented
+    def equals(self, Statistics other):
+        """
+        Return whether the two column statistics objects are equal.
+        Parameters
+        ----------
+        other : Statistics
+            Statistics to compare against.
+        Returns
+        -------
+        are_equal : bool
+        """
+        return self.statistics.get().Equals(deref(other.statistics.get()))
+    @property
+    def has_min_max(self):
+        """Whether min and max are present (bool)."""
+        return self.statistics.get().HasMinMax()
+    @property
+    def has_null_count(self):
+        """Whether null count is present (bool)."""
+        return self.statistics.get().HasNullCount()
+    @property
+    def has_distinct_count(self):
+        """Whether distinct count is preset (bool)."""
+        return self.statistics.get().HasDistinctCount()
+    @property
+    def min_raw(self):
+        """Min value as physical type (bool, int, float, or bytes)."""
+        if self.has_min_max:
+            return _cast_statistic_raw_min(self.statistics.get())
+        else:
+            return None
+    @property
+    def max_raw(self):
+        """Max value as physical type (bool, int, float, or bytes)."""
+        if self.has_min_max:
+            return _cast_statistic_raw_max(self.statistics.get())
+        else:
+            return None
+    @property
+    def min(self):
+        """
+        Min value as logical type.
+        Returned as the Python equivalent of logical type, such as datetime.date
+        for dates and decimal.Decimal for decimals.
+        """
+        if self.has_min_max:
+            min_scalar, _ = _cast_statistics(self.statistics.get())
+            return min_scalar.as_py()
+        else:
+            return None
+    @property
+    def max(self):
+        """
+        Max value as logical type.
+        Returned as the Python equivalent of logical type, such as datetime.date
+        for dates and decimal.Decimal for decimals.
+        """
+        if self.has_min_max:
+            _, max_scalar = _cast_statistics(self.statistics.get())
+            return max_scalar.as_py()
+        else:
+            return None
+    @property
+    def null_count(self):
+        """Number of null values in chunk (int)."""
+        if self.has_null_count:
+            return self.statistics.get().null_count()
+        else:
+            return None
+    @property
+    def distinct_count(self):
+        """Distinct number of values in chunk (int)."""
+        if self.has_distinct_count:
+            return self.statistics.get().distinct_count()
+        else:
+            return None
+    @property
+    def num_values(self):
+        """Number of non-null values (int)."""
+        return self.statistics.get().num_values()
+    @property
+    def physical_type(self):
+        """Physical type of column (str)."""
+        raw_physical_type = self.statistics.get().physical_type()
+        return physical_type_name_from_enum(raw_physical_type)
+    @property
+    def logical_type(self):
+        """Logical type of column (:class:`ParquetLogicalType`)."""
+        return wrap_logical_type(self.statistics.get().descr().logical_type())
+    @property
+    def converted_type(self):
+        """Legacy converted type (str or None)."""
+        raw_converted_type = self.statistics.get().descr().converted_type()
+        return converted_type_name_from_enum(raw_converted_type)
+cdef class ParquetLogicalType(_Weakrefable):
+    """Logical type of parquet type."""
+    cdef:
+        shared_ptr[const CParquetLogicalType] type
+    def __cinit__(self):
+        pass
+    cdef init(self, const shared_ptr[const CParquetLogicalType]& type):
+        self.type = type
+    def __repr__(self):
+        return "{}\n  {}".format(object.__repr__(self), str(self))
+    def __str__(self):
+        return frombytes(self.type.get().ToString(), safe=True)
+    def to_json(self):
+        """
+        Get a JSON string containing type and type parameters.
+        Returns
+        -------
+        json : str
+            JSON representation of type, with at least a field called 'Type'
+            which contains the type name. If the type is parameterized, such
+            as a decimal with scale and precision, will contain those as fields
+            as well.
+        """
+        return frombytes(self.type.get().ToJSON())
+    @property
+    def type(self):
+        """Name of the logical type (str)."""
+        return logical_type_name_from_enum(self.type.get().type())
+cdef wrap_logical_type(const shared_ptr[const CParquetLogicalType]& type):
+    cdef ParquetLogicalType out = ParquetLogicalType()
+    out.init(type)
+    return out
+cdef _cast_statistic_raw_min(CStatistics* statistics):
+    cdef ParquetType physical_type = statistics.physical_type()
+    cdef uint32_t type_length = statistics.descr().type_length()
+    if physical_type == ParquetType_BOOLEAN:
+        return (<CBoolStatistics*> statistics).min()
+    elif physical_type == ParquetType_INT32:
+        return (<CInt32Statistics*> statistics).min()
+    elif physical_type == ParquetType_INT64:
+        return (<CInt64Statistics*> statistics).min()
+    elif physical_type == ParquetType_FLOAT:
+        return (<CFloatStatistics*> statistics).min()
+    elif physical_type == ParquetType_DOUBLE:
+        return (<CDoubleStatistics*> statistics).min()
+    elif physical_type == ParquetType_BYTE_ARRAY:
+        return _box_byte_array((<CByteArrayStatistics*> statistics).min())
+    elif physical_type == ParquetType_FIXED_LEN_BYTE_ARRAY:
+        return _box_flba((<CFLBAStatistics*> statistics).min(), type_length)
+cdef _cast_statistic_raw_max(CStatistics* statistics):
+    cdef ParquetType physical_type = statistics.physical_type()
+    cdef uint32_t type_length = statistics.descr().type_length()
+    if physical_type == ParquetType_BOOLEAN:
+        return (<CBoolStatistics*> statistics).max()
+    elif physical_type == ParquetType_INT32:
+        return (<CInt32Statistics*> statistics).max()
+    elif physical_type == ParquetType_INT64:
+        return (<CInt64Statistics*> statistics).max()
+    elif physical_type == ParquetType_FLOAT:
+        return (<CFloatStatistics*> statistics).max()
+    elif physical_type == ParquetType_DOUBLE:
+        return (<CDoubleStatistics*> statistics).max()
+    elif physical_type == ParquetType_BYTE_ARRAY:
+        return _box_byte_array((<CByteArrayStatistics*> statistics).max())
+    elif physical_type == ParquetType_FIXED_LEN_BYTE_ARRAY:
+        return _box_flba((<CFLBAStatistics*> statistics).max(), type_length)
+cdef _cast_statistics(CStatistics* statistics):
+    cdef:
+        shared_ptr[CScalar] c_min
+        shared_ptr[CScalar] c_max
+    check_status(StatisticsAsScalars(statistics[0], &c_min, &c_max))
+    return (pyarrow_wrap_scalar(c_min), pyarrow_wrap_scalar(c_max))
+cdef _box_byte_array(ParquetByteArray val):
+    return cp.PyBytes_FromStringAndSize(<char*> val.ptr, <Py_ssize_t> val.len)
+cdef _box_flba(ParquetFLBA val, uint32_t len):
+    return cp.PyBytes_FromStringAndSize(<char*> val.ptr, <Py_ssize_t> len)
+cdef class ColumnChunkMetaData(_Weakrefable):
+    """Column metadata for a single row group."""
+    def __cinit__(self):
+        pass
+    def __repr__(self):
+        statistics = indent(repr(self.statistics), 4 * ' ')
+        return """{0}
+  file_offset: {1}
+  file_path: {2}
+  physical_type: {3}
+  num_values: {4}
+  path_in_schema: {5}
+  is_stats_set: {6}
+  statistics:
+{7}
+  compression: {8}
+  encodings: {9}
+  has_dictionary_page: {10}
+  dictionary_page_offset: {11}
+  data_page_offset: {12}
+  total_compressed_size: {13}
+  total_uncompressed_size: {14}""".format(object.__repr__(self),
+                                          self.file_offset,
+                                          self.file_path,
+                                          self.physical_type,
+                                          self.num_values,
+                                          self.path_in_schema,
+                                          self.is_stats_set,
+                                          statistics,
+                                          self.compression,
+                                          self.encodings,
+                                          self.has_dictionary_page,
+                                          self.dictionary_page_offset,
+                                          self.data_page_offset,
+                                          self.total_compressed_size,
+                                          self.total_uncompressed_size)
+    def to_dict(self):
+        """
+        Get dictionary representation of the column chunk metadata.
+        Returns
+        -------
+        dict
+            Dictionary with a key for each attribute of this class.
+        """
+        statistics = self.statistics.to_dict() if self.is_stats_set else None
+        d = dict(
+            file_offset=self.file_offset,
+            file_path=self.file_path,
+            physical_type=self.physical_type,
+            num_values=self.num_values,
+            path_in_schema=self.path_in_schema,
+            is_stats_set=self.is_stats_set,
+            statistics=statistics,
+            compression=self.compression,
+            encodings=self.encodings,
+            has_dictionary_page=self.has_dictionary_page,
+            dictionary_page_offset=self.dictionary_page_offset,
+            data_page_offset=self.data_page_offset,
+            total_compressed_size=self.total_compressed_size,
+            total_uncompressed_size=self.total_uncompressed_size
+        )
+        return d
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return NotImplemented
+    def equals(self, ColumnChunkMetaData other):
+        """
+        Return whether the two column chunk metadata objects are equal.
+        Parameters
+        ----------
+        other : ColumnChunkMetaData
+            Metadata to compare against.
+        Returns
+        -------
+        are_equal : bool
+        """
+        return self.metadata.Equals(deref(other.metadata))
+    @property
+    def file_offset(self):
+        """Offset into file where column chunk is located (int)."""
+        return self.metadata.file_offset()
+    @property
+    def file_path(self):
+        """Optional file path if set (str or None)."""
+        return frombytes(self.metadata.file_path())
+    @property
+    def physical_type(self):
+        """Physical type of column (str)."""
+        return physical_type_name_from_enum(self.metadata.type())
+    @property
+    def num_values(self):
+        """Total number of values (int)."""
+        return self.metadata.num_values()
+    @property
+    def path_in_schema(self):
+        """Nested path to field, separated by periods (str)."""
+        path = self.metadata.path_in_schema().get().ToDotString()
+        return frombytes(path)
+    @property
+    def is_stats_set(self):
+        """Whether or not statistics are present in metadata (bool)."""
+        return self.metadata.is_stats_set()
+    @property
+    def statistics(self):
+        """Statistics for column chunk (:class:`Statistics`)."""
+        if not self.metadata.is_stats_set():
+            return None
+        statistics = Statistics()
+        statistics.init(self.metadata.statistics(), self)
+        return statistics
+    @property
+    def compression(self):
+        """
+        Type of compression used for column (str).
+        One of 'UNCOMPRESSED', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4', 'ZSTD',
+        or 'UNKNOWN'.
+        """
+        return compression_name_from_enum(self.metadata.compression())
+    @property
+    def encodings(self):
+        """
+        Encodings used for column (tuple of str).
+        One of 'PLAIN', 'BIT_PACKED', 'RLE', 'BYTE_STREAM_SPLIT', 'DELTA_BINARY_PACKED',
+        'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'.
+        """
+        return tuple(map(encoding_name_from_enum, self.metadata.encodings()))
+    @property
+    def has_dictionary_page(self):
+        """Whether there is dictionary data present in the column chunk (bool)."""
+        return bool(self.metadata.has_dictionary_page())
+    @property
+    def dictionary_page_offset(self):
+        """Offset of dictionary page relative to beginning of the file (int)."""
+        if self.has_dictionary_page:
+            return self.metadata.dictionary_page_offset()
+        else:
+            return None
+    @property
+    def data_page_offset(self):
+        """Offset of data page relative to beginning of the file (int)."""
+        return self.metadata.data_page_offset()
+    @property
+    def has_index_page(self):
+        """Not yet supported."""
+        raise NotImplementedError('not supported in parquet-cpp')
+    @property
+    def index_page_offset(self):
+        """Not yet supported."""
+        raise NotImplementedError("parquet-cpp doesn't return valid values")
+    @property
+    def total_compressed_size(self):
+        """Compressed size in bytes (int)."""
+        return self.metadata.total_compressed_size()
+    @property
+    def total_uncompressed_size(self):
+        """Uncompressed size in bytes (int)."""
+        return self.metadata.total_uncompressed_size()
+    @property
+    def has_offset_index(self):
+        """Whether the column chunk has an offset index"""
+        return self.metadata.GetOffsetIndexLocation().has_value()
+    @property
+    def has_column_index(self):
+        """Whether the column chunk has a column index"""
+        return self.metadata.GetColumnIndexLocation().has_value()
+    @property
+    def metadata(self):
+        """Additional metadata as key value pairs (dict[bytes, bytes])."""
+        cdef:
+            unordered_map[c_string, c_string] metadata
+            const CKeyValueMetadata* underlying_metadata
+        underlying_metadata = self.metadata.key_value_metadata().get()
+        if underlying_metadata != NULL:
+            underlying_metadata.ToUnorderedMap(&metadata)
+            return metadata
+        else:
+            return None
+cdef class SortingColumn:
+    """
+    Sorting specification for a single column.
+    Returned by :meth:`RowGroupMetaData.sorting_columns` and used in
+    :class:`ParquetWriter` to specify the sort order of the data.
+    Parameters
+    ----------
+    column_index : int
+        Index of column that data is sorted by.
+    descending : bool, default False
+        Whether column is sorted in descending order.
+    nulls_first : bool, default False
+        Whether null values appear before valid values.
+    Notes
+    -----
+    Column indices are zero-based, refer only to leaf fields, and are in
+    depth-first order. This may make the column indices for nested schemas
+    different from what you expect. In most cases, it will be easier to
+    specify the sort order using column names instead of column indices
+    and converting using the ``from_ordering`` method.
+    Examples
+    --------
+    In other APIs, sort order is specified by names, such as:
+    >>> sort_order = [('id', 'ascending'), ('timestamp', 'descending')]
+    For Parquet, the column index must be used instead:
+    >>> import pyarrow.parquet as pq
+    >>> [pq.SortingColumn(0), pq.SortingColumn(1, descending=True)]
+    [SortingColumn(column_index=0, descending=False, nulls_first=False), SortingColumn(column_index=1, descending=True, nulls_first=False)]
+    Convert the sort_order into the list of sorting columns with
+    ``from_ordering`` (note that the schema must be provided as well):
+    >>> import pyarrow as pa
+    >>> schema = pa.schema([('id', pa.int64()), ('timestamp', pa.timestamp('ms'))])
+    >>> sorting_columns = pq.SortingColumn.from_ordering(schema, sort_order)
+    >>> sorting_columns
+    (SortingColumn(column_index=0, descending=False, nulls_first=False), SortingColumn(column_index=1, descending=True, nulls_first=False))
+    Convert back to the sort order with ``to_ordering``:
+    >>> pq.SortingColumn.to_ordering(schema, sorting_columns)
+    ((('id', 'ascending'), ('timestamp', 'descending')), 'at_end')
+    See Also
+    --------
+    RowGroupMetaData.sorting_columns
+    """
+    cdef int column_index
+    cdef c_bool descending
+    cdef c_bool nulls_first
+    def __init__(self, int column_index, c_bool descending=False, c_bool nulls_first=False):
+        self.column_index = column_index
+        self.descending = descending
+        self.nulls_first = nulls_first
+    @classmethod
+    def from_ordering(cls, Schema schema, sort_keys, null_placement='at_end'):
+        """
+        Create a tuple of SortingColumn objects from the same arguments as
+        :class:`pyarrow.compute.SortOptions`.
+        Parameters
+        ----------
+        schema : Schema
+            Schema of the input data.
+        sort_keys : Sequence of (name, order) tuples
+            Names of field/column keys (str) to sort the input on,
+            along with the order each field/column is sorted in.
+            Accepted values for `order` are "ascending", "descending".
+        null_placement : {'at_start', 'at_end'}, default 'at_end'
+            Where null values should appear in the sort order.
+        Returns
+        -------
+        sorting_columns : tuple of SortingColumn
+        """
+        if null_placement == 'at_start':
+            nulls_first = True
+        elif null_placement == 'at_end':
+            nulls_first = False
+        else:
+            raise ValueError('null_placement must be "at_start" or "at_end"')
+        col_map = _name_to_index_map(schema)
+        sorting_columns = []
+        for sort_key in sort_keys:
+            if isinstance(sort_key, str):
+                name = sort_key
+                descending = False
+            elif (isinstance(sort_key, tuple) and len(sort_key) == 2 and
+                    isinstance(sort_key[0], str) and
+                    isinstance(sort_key[1], str)):
+                name, descending = sort_key
+                if descending == "descending":
+                    descending = True
+                elif descending == "ascending":
+                    descending = False
+                else:
+                    raise ValueError("Invalid sort key direction: {0}"
+                                     .format(descending))
+            else:
+                raise ValueError("Invalid sort key: {0}".format(sort_key))
+            try:
+                column_index = col_map[name]
+            except KeyError:
+                raise ValueError("Sort key name '{0}' not found in schema:\n{1}"
+                                 .format(name, schema))
+            sorting_columns.append(
+                cls(column_index, descending=descending, nulls_first=nulls_first)
+            )
+        return tuple(sorting_columns)
+    @staticmethod
+    def to_ordering(Schema schema, sorting_columns):
+        """
+        Convert a tuple of SortingColumn objects to the same format as
+        :class:`pyarrow.compute.SortOptions`.
+        Parameters
+        ----------
+        schema : Schema
+            Schema of the input data.
+        sorting_columns : tuple of SortingColumn
+            Columns to sort the input on.
+        Returns
+        -------
+        sort_keys : tuple of (name, order) tuples
+        null_placement : {'at_start', 'at_end'}
+        """
+        col_map = {i: name for name, i in _name_to_index_map(schema).items()}
+        sort_keys = []
+        nulls_first = None
+        for sorting_column in sorting_columns:
+            name = col_map[sorting_column.column_index]
+            if sorting_column.descending:
+                order = "descending"
+            else:
+                order = "ascending"
+            sort_keys.append((name, order))
+            if nulls_first is None:
+                nulls_first = sorting_column.nulls_first
+            elif nulls_first != sorting_column.nulls_first:
+                raise ValueError("Sorting columns have inconsistent null placement")
+        if nulls_first:
+            null_placement = "at_start"
+        else:
+            null_placement = "at_end"
+        return tuple(sort_keys), null_placement
+    def __repr__(self):
+        return """{}(column_index={}, descending={}, nulls_first={})""".format(
+            self.__class__.__name__,
+            self.column_index, self.descending, self.nulls_first)
+    def __eq__(self, SortingColumn other):
+        return (self.column_index == other.column_index and
+                self.descending == other.descending and
+                self.nulls_first == other.nulls_first)
+    def __hash__(self):
+        return hash((self.column_index, self.descending, self.nulls_first))
+    @property
+    def column_index(self):
+        """"Index of column data is sorted by (int)."""
+        return self.column_index
+    @property
+    def descending(self):
+        """Whether column is sorted in descending order (bool)."""
+        return self.descending
+    @property
+    def nulls_first(self):
+        """Whether null values appear before valid values (bool)."""
+        return self.nulls_first
+    def to_dict(self):
+        """
+        Get dictionary representation of the SortingColumn.
+        Returns
+        -------
+        dict
+            Dictionary with a key for each attribute of this class.
+        """
+        d = dict(
+            column_index=self.column_index,
+            descending=self.descending,
+            nulls_first=self.nulls_first
+        )
+        return d
+cdef class RowGroupMetaData(_Weakrefable):
+    """Metadata for a single row group."""
+    def __cinit__(self, FileMetaData parent, int index):
+        if index < 0 or index >= parent.num_row_groups:
+            raise IndexError('{0} out of bounds'.format(index))
+        self.up_metadata = parent._metadata.RowGroup(index)
+        self.metadata = self.up_metadata.get()
+        self.parent = parent
+        self.index = index
+    def __reduce__(self):
+        return RowGroupMetaData, (self.parent, self.index)
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return NotImplemented
+    def equals(self, RowGroupMetaData other):
+        """
+        Return whether the two row group metadata objects are equal.
+        Parameters
+        ----------
+        other : RowGroupMetaData
+            Metadata to compare against.
+        Returns
+        -------
+        are_equal : bool
+        """
+        return self.metadata.Equals(deref(other.metadata))
+    def column(self, int i):
+        """
+        Get column metadata at given index.
+        Parameters
+        ----------
+        i : int
+            Index of column to get metadata for.
+        Returns
+        -------
+        ColumnChunkMetaData
+            Metadata for column within this chunk.
+        """
+        if i < 0 or i >= self.num_columns:
+            raise IndexError('{0} out of bounds'.format(i))
+        chunk = ColumnChunkMetaData()
+        chunk.init(self, i)
+        return chunk
+    def __repr__(self):
+        return """{0}
+  num_columns: {1}
+  num_rows: {2}
+  total_byte_size: {3}
+  sorting_columns: {4}""".format(object.__repr__(self),
+                                 self.num_columns,
+                                 self.num_rows,
+                                 self.total_byte_size,
+                                 self.sorting_columns)
+    def to_dict(self):
+        """
+        Get dictionary representation of the row group metadata.
+        Returns
+        -------
+        dict
+            Dictionary with a key for each attribute of this class.
+        """
+        columns = []
+        d = dict(
+            num_columns=self.num_columns,
+            num_rows=self.num_rows,
+            total_byte_size=self.total_byte_size,
+            columns=columns,
+            sorting_columns=[col.to_dict() for col in self.sorting_columns]
+        )
+        for i in range(self.num_columns):
+            columns.append(self.column(i).to_dict())
+        return d
+    @property
+    def num_columns(self):
+        """Number of columns in this row group (int)."""
+        return self.metadata.num_columns()
+    @property
+    def num_rows(self):
+        """Number of rows in this row group (int)."""
+        return self.metadata.num_rows()
+    @property
+    def total_byte_size(self):
+        """Total byte size of all the uncompressed column data in this row group (int)."""
+        return self.metadata.total_byte_size()
+    @property
+    def sorting_columns(self):
+        """Columns the row group is sorted by (tuple of :class:`SortingColumn`))."""
+        out = []
+        cdef vector[CSortingColumn] sorting_columns = self.metadata.sorting_columns()
+        for sorting_col in sorting_columns:
+            out.append(SortingColumn(
+                sorting_col.column_idx,
+                sorting_col.descending,
+                sorting_col.nulls_first
+            ))
+        return tuple(out)
+def _reconstruct_filemetadata(Buffer serialized):
+    cdef:
+        FileMetaData metadata = FileMetaData.__new__(FileMetaData)
+        CBuffer *buffer = serialized.buffer.get()
+        uint32_t metadata_len = <uint32_t>buffer.size()
+    metadata.init(CFileMetaData_Make(buffer.data(), &metadata_len))
+    return metadata
+cdef class FileMetaData(_Weakrefable):
+    """Parquet metadata for a single file."""
+    def __cinit__(self):
+        pass
+    def __reduce__(self):
+        cdef:
+            NativeFile sink = BufferOutputStream()
+            COutputStream* c_sink = sink.get_output_stream().get()
+        with nogil:
+            self._metadata.WriteTo(c_sink)
+        cdef Buffer buffer = sink.getvalue()
+        return _reconstruct_filemetadata, (buffer,)
+    def __hash__(self):
+        return hash((self.schema,
+                     self.num_rows,
+                     self.num_row_groups,
+                     self.format_version,
+                     self.serialized_size))
+    def __repr__(self):
+        return """{0}
+  created_by: {1}
+  num_columns: {2}
+  num_rows: {3}
+  num_row_groups: {4}
+  format_version: {5}
+  serialized_size: {6}""".format(object.__repr__(self),
+                                 self.created_by, self.num_columns,
+                                 self.num_rows, self.num_row_groups,
+                                 self.format_version,
+                                 self.serialized_size)
+    def to_dict(self):
+        """
+        Get dictionary representation of the file metadata.
+        Returns
+        -------
+        dict
+            Dictionary with a key for each attribute of this class.
+        """
+        row_groups = []
+        d = dict(
+            created_by=self.created_by,
+            num_columns=self.num_columns,
+            num_rows=self.num_rows,
+            num_row_groups=self.num_row_groups,
+            row_groups=row_groups,
+            format_version=self.format_version,
+            serialized_size=self.serialized_size
+        )
+        for i in range(self.num_row_groups):
+            row_groups.append(self.row_group(i).to_dict())
+        return d
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return NotImplemented
+    def equals(self, FileMetaData other not None):
+        """
+        Return whether the two file metadata objects are equal.
+        Parameters
+        ----------
+        other : FileMetaData
+            Metadata to compare against.
+        Returns
+        -------
+        are_equal : bool
+        """
+        return self._metadata.Equals(deref(other._metadata))
+    @property
+    def schema(self):
+        """Schema of the file (:class:`ParquetSchema`)."""
+        if self._schema is None:
+            self._schema = ParquetSchema(self)
+        return self._schema
+    @property
+    def serialized_size(self):
+        """Size of the original thrift encoded metadata footer (int)."""
+        return self._metadata.size()
+    @property
+    def num_columns(self):
+        """Number of columns in file (int)."""
+        return self._metadata.num_columns()
+    @property
+    def num_rows(self):
+        """Total number of rows in file (int)."""
+        return self._metadata.num_rows()
+    @property
+    def num_row_groups(self):
+        """Number of row groups in file (int)."""
+        return self._metadata.num_row_groups()
+    @property
+    def format_version(self):
+        """
+        Parquet format version used in file (str, such as '1.0', '2.4').
+        If version is missing or unparsable, will default to assuming '2.6'.
+        """
+        cdef ParquetVersion version = self._metadata.version()
+        if version == ParquetVersion_V1:
+            return '1.0'
+        elif version == ParquetVersion_V2_0:
+            return 'pseudo-2.0'
+        elif version == ParquetVersion_V2_4:
+            return '2.4'
+        elif version == ParquetVersion_V2_6:
+            return '2.6'
+        else:
+            warnings.warn('Unrecognized file version, assuming 2.6: {}'
+                          .format(version))
+            return '2.6'
+    @property
+    def created_by(self):
+        """
+        String describing source of the parquet file (str).
+        This typically includes library name and version number. For example, Arrow 7.0's
+        writer returns 'parquet-cpp-arrow version 7.0.0'.
+        """
+        return frombytes(self._metadata.created_by())
+    @property
+    def metadata(self):
+        """Additional metadata as key value pairs (dict[bytes, bytes])."""
+        cdef:
+            unordered_map[c_string, c_string] metadata
+            const CKeyValueMetadata* underlying_metadata
+        underlying_metadata = self._metadata.key_value_metadata().get()
+        if underlying_metadata != NULL:
+            underlying_metadata.ToUnorderedMap(&metadata)
+            return metadata
+        else:
+            return None
+    def row_group(self, int i):
+        """
+        Get metadata for row group at index i.
+        Parameters
+        ----------
+        i : int
+            Row group index to get.
+        Returns
+        -------
+        row_group_metadata : RowGroupMetaData
+        """
+        return RowGroupMetaData(self, i)
+    def set_file_path(self, path):
+        """
+        Set ColumnChunk file paths to the given value.
+        This method modifies the ``file_path`` field of each ColumnChunk
+        in the FileMetaData to be a particular value.
+        Parameters
+        ----------
+        path : str
+            The file path to set on all ColumnChunks.
+        """
+        cdef:
+            c_string c_path = tobytes(path)
+        self._metadata.set_file_path(c_path)
+    def append_row_groups(self, FileMetaData other):
+        """
+        Append row groups from other FileMetaData object.
+        Parameters
+        ----------
+        other : FileMetaData
+            Other metadata to append row groups from.
+        """
+        cdef shared_ptr[CFileMetaData] c_metadata
+        c_metadata = other.sp_metadata
+        self._metadata.AppendRowGroups(deref(c_metadata))
+    def write_metadata_file(self, where):
+        """
+        Write the metadata to a metadata-only Parquet file.
+        Parameters
+        ----------
+        where : path or file-like object
+            Where to write the metadata.  Should be a writable path on
+            the local filesystem, or a writable file-like object.
+        """
+        cdef:
+            shared_ptr[COutputStream] sink
+            c_string c_where
+        try:
+            where = _stringify_path(where)
+        except TypeError:
+            get_writer(where, &sink)
+        else:
+            c_where = tobytes(where)
+            with nogil:
+                sink = GetResultValue(FileOutputStream.Open(c_where))
+        with nogil:
+            check_status(
+                WriteMetaDataFile(deref(self._metadata), sink.get()))
+cdef class ParquetSchema(_Weakrefable):
+    """A Parquet schema."""
+    def __cinit__(self, FileMetaData container):
+        self.parent = container
+        self.schema = container._metadata.schema()
+    def __repr__(self):
+        return "{0}\n{1}".format(
+            object.__repr__(self),
+            frombytes(self.schema.ToString(), safe=True))
+    def __reduce__(self):
+        return ParquetSchema, (self.parent,)
+    def __len__(self):
+        return self.schema.num_columns()
+    def __getitem__(self, i):
+        return self.column(i)
+    def __hash__(self):
+        return hash(self.schema.ToString())
+    @property
+    def names(self):
+        """Name of each field (list of str)."""
+        return [self[i].name for i in range(len(self))]
+    def to_arrow_schema(self):
+        """
+        Convert Parquet schema to effective Arrow schema.
+        Returns
+        -------
+        schema : Schema
+        """
+        cdef shared_ptr[CSchema] sp_arrow_schema
+        with nogil:
+            check_status(FromParquetSchema(
+                self.schema, default_arrow_reader_properties(),
+                self.parent._metadata.key_value_metadata(),
+                &sp_arrow_schema))
+        return pyarrow_wrap_schema(sp_arrow_schema)
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return NotImplemented
+    def equals(self, ParquetSchema other):
+        """
+        Return whether the two schemas are equal.
+        Parameters
+        ----------
+        other : ParquetSchema
+            Schema to compare against.
+        Returns
+        -------
+        are_equal : bool
+        """
+        return self.schema.Equals(deref(other.schema))
+    def column(self, i):
+        """
+        Return the schema for a single column.
+        Parameters
+        ----------
+        i : int
+            Index of column in schema.
+        Returns
+        -------
+        column_schema : ColumnSchema
+        """
+        if i < 0 or i >= len(self):
+            raise IndexError('{0} out of bounds'.format(i))
+        return ColumnSchema(self, i)
+cdef class ColumnSchema(_Weakrefable):
+    """Schema for a single column."""
+    cdef:
+        int index
+        ParquetSchema parent
+        const ColumnDescriptor* descr
+    def __cinit__(self, ParquetSchema schema, int index):
+        self.parent = schema
+        self.index = index  # for pickling support
+        self.descr = schema.schema.Column(index)
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return NotImplemented
+    def __reduce__(self):
+        return ColumnSchema, (self.parent, self.index)
+    def equals(self, ColumnSchema other):
+        """
+        Return whether the two column schemas are equal.
+        Parameters
+        ----------
+        other : ColumnSchema
+            Schema to compare against.
+        Returns
+        -------
+        are_equal : bool
+        """
+        return self.descr.Equals(deref(other.descr))
+    def __repr__(self):
+        physical_type = self.physical_type
+        converted_type = self.converted_type
+        if converted_type == 'DECIMAL':
+            converted_type = 'DECIMAL({0}, {1})'.format(self.precision,
+                                                        self.scale)
+        elif physical_type == 'FIXED_LEN_BYTE_ARRAY':
+            converted_type = ('FIXED_LEN_BYTE_ARRAY(length={0})'
+                              .format(self.length))
+        return """<ParquetColumnSchema>
+  name: {0}
+  path: {1}
+  max_definition_level: {2}
+  max_repetition_level: {3}
+  physical_type: {4}
+  logical_type: {5}
+  converted_type (legacy): {6}""".format(self.name, self.path,
+                                         self.max_definition_level,
+                                         self.max_repetition_level,
+                                         physical_type,
+                                         str(self.logical_type),
+                                         converted_type)
+    @property
+    def name(self):
+        """Name of field (str)."""
+        return frombytes(self.descr.name())
+    @property
+    def path(self):
+        """Nested path to field, separated by periods (str)."""
+        return frombytes(self.descr.path().get().ToDotString())
+    @property
+    def max_definition_level(self):
+        """Maximum definition level (int)."""
+        return self.descr.max_definition_level()
+    @property
+    def max_repetition_level(self):
+        """Maximum repetition level (int)."""
+        return self.descr.max_repetition_level()
+    @property
+    def physical_type(self):
+        """Name of physical type (str)."""
+        return physical_type_name_from_enum(self.descr.physical_type())
+    @property
+    def logical_type(self):
+        """Logical type of column (:class:`ParquetLogicalType`)."""
+        return wrap_logical_type(self.descr.logical_type())
+    @property
+    def converted_type(self):
+        """Legacy converted type (str or None)."""
+        return converted_type_name_from_enum(self.descr.converted_type())
+    # FIXED_LEN_BYTE_ARRAY attribute
+    @property
+    def length(self):
+        """Array length if fixed length byte array type, None otherwise (int or None)."""
+        return self.descr.type_length()
+    # Decimal attributes
+    @property
+    def precision(self):
+        """Precision if decimal type, None otherwise (int or None)."""
+        return self.descr.type_precision()
+    @property
+    def scale(self):
+        """Scale if decimal type, None otherwise (int or None)."""
+        return self.descr.type_scale()
+cdef physical_type_name_from_enum(ParquetType type_):
+    return {
+        ParquetType_BOOLEAN: 'BOOLEAN',
+        ParquetType_INT32: 'INT32',
+        ParquetType_INT64: 'INT64',
+        ParquetType_INT96: 'INT96',
+        ParquetType_FLOAT: 'FLOAT',
+        ParquetType_DOUBLE: 'DOUBLE',
+        ParquetType_BYTE_ARRAY: 'BYTE_ARRAY',
+        ParquetType_FIXED_LEN_BYTE_ARRAY: 'FIXED_LEN_BYTE_ARRAY',
+    }.get(type_, 'UNKNOWN')
+cdef logical_type_name_from_enum(ParquetLogicalTypeId type_):
+    return {
+        ParquetLogicalType_UNDEFINED: 'UNDEFINED',
+        ParquetLogicalType_STRING: 'STRING',
+        ParquetLogicalType_MAP: 'MAP',
+        ParquetLogicalType_LIST: 'LIST',
+        ParquetLogicalType_ENUM: 'ENUM',
+        ParquetLogicalType_DECIMAL: 'DECIMAL',
+        ParquetLogicalType_DATE: 'DATE',
+        ParquetLogicalType_TIME: 'TIME',
+        ParquetLogicalType_TIMESTAMP: 'TIMESTAMP',
+        ParquetLogicalType_INT: 'INT',
+        ParquetLogicalType_FLOAT16: 'FLOAT16',
+        ParquetLogicalType_JSON: 'JSON',
+        ParquetLogicalType_BSON: 'BSON',
+        ParquetLogicalType_UUID: 'UUID',
+        ParquetLogicalType_NONE: 'NONE',
+    }.get(type_, 'UNKNOWN')
+cdef converted_type_name_from_enum(ParquetConvertedType type_):
+    return {
+        ParquetConvertedType_NONE: 'NONE',
+        ParquetConvertedType_UTF8: 'UTF8',
+        ParquetConvertedType_MAP: 'MAP',
+        ParquetConvertedType_MAP_KEY_VALUE: 'MAP_KEY_VALUE',
+        ParquetConvertedType_LIST: 'LIST',
+        ParquetConvertedType_ENUM: 'ENUM',
+        ParquetConvertedType_DECIMAL: 'DECIMAL',
+        ParquetConvertedType_DATE: 'DATE',
+        ParquetConvertedType_TIME_MILLIS: 'TIME_MILLIS',
+        ParquetConvertedType_TIME_MICROS: 'TIME_MICROS',
+        ParquetConvertedType_TIMESTAMP_MILLIS: 'TIMESTAMP_MILLIS',
+        ParquetConvertedType_TIMESTAMP_MICROS: 'TIMESTAMP_MICROS',
+        ParquetConvertedType_UINT_8: 'UINT_8',
+        ParquetConvertedType_UINT_16: 'UINT_16',
+        ParquetConvertedType_UINT_32: 'UINT_32',
+        ParquetConvertedType_UINT_64: 'UINT_64',
+        ParquetConvertedType_INT_8: 'INT_8',
+        ParquetConvertedType_INT_16: 'INT_16',
+        ParquetConvertedType_INT_32: 'INT_32',
+        ParquetConvertedType_INT_64: 'INT_64',
+        ParquetConvertedType_JSON: 'JSON',
+        ParquetConvertedType_BSON: 'BSON',
+        ParquetConvertedType_INTERVAL: 'INTERVAL',
+    }.get(type_, 'UNKNOWN')
+cdef encoding_name_from_enum(ParquetEncoding encoding_):
+    return {
+        ParquetEncoding_PLAIN: 'PLAIN',
+        ParquetEncoding_PLAIN_DICTIONARY: 'PLAIN_DICTIONARY',
+        ParquetEncoding_RLE: 'RLE',
+        ParquetEncoding_BIT_PACKED: 'BIT_PACKED',
+        ParquetEncoding_DELTA_BINARY_PACKED: 'DELTA_BINARY_PACKED',
+        ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY: 'DELTA_LENGTH_BYTE_ARRAY',
+        ParquetEncoding_DELTA_BYTE_ARRAY: 'DELTA_BYTE_ARRAY',
+        ParquetEncoding_RLE_DICTIONARY: 'RLE_DICTIONARY',
+        ParquetEncoding_BYTE_STREAM_SPLIT: 'BYTE_STREAM_SPLIT',
+    }.get(encoding_, 'UNKNOWN')
+cdef encoding_enum_from_name(str encoding_name):
+    enc = {
+        'PLAIN': ParquetEncoding_PLAIN,
+        'BIT_PACKED': ParquetEncoding_BIT_PACKED,
+        'RLE': ParquetEncoding_RLE,
+        'BYTE_STREAM_SPLIT': ParquetEncoding_BYTE_STREAM_SPLIT,
+        'DELTA_BINARY_PACKED': ParquetEncoding_DELTA_BINARY_PACKED,
+        'DELTA_LENGTH_BYTE_ARRAY': ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY,
+        'DELTA_BYTE_ARRAY': ParquetEncoding_DELTA_BYTE_ARRAY,
+        'RLE_DICTIONARY': 'dict',
+        'PLAIN_DICTIONARY': 'dict',
+    }.get(encoding_name, None)
+    if enc is None:
+        raise ValueError(f"Unsupported column encoding: {encoding_name!r}")
+    elif enc == 'dict':
+        raise ValueError(f"{encoding_name!r} is already used by default.")
+    else:
+        return enc
+cdef compression_name_from_enum(ParquetCompression compression_):
+    return {
+        ParquetCompression_UNCOMPRESSED: 'UNCOMPRESSED',
+        ParquetCompression_SNAPPY: 'SNAPPY',
+        ParquetCompression_GZIP: 'GZIP',
+        ParquetCompression_LZO: 'LZO',
+        ParquetCompression_BROTLI: 'BROTLI',
+        ParquetCompression_LZ4: 'LZ4',
+        ParquetCompression_ZSTD: 'ZSTD',
+    }.get(compression_, 'UNKNOWN')
+cdef int check_compression_name(name) except -1:
+    if name.upper() not in {'NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4',
+                            'ZSTD'}:
+        raise ArrowException("Unsupported compression: " + name)
+    return 0
+cdef ParquetCompression compression_from_name(name):
+    name = name.upper()
+    if name == 'SNAPPY':
+        return ParquetCompression_SNAPPY
+    elif name == 'GZIP':
+        return ParquetCompression_GZIP
+    elif name == 'LZO':
+        return ParquetCompression_LZO
+    elif name == 'BROTLI':
+        return ParquetCompression_BROTLI
+    elif name == 'LZ4':
+        return ParquetCompression_LZ4
+    elif name == 'ZSTD':
+        return ParquetCompression_ZSTD
+    else:
+        return ParquetCompression_UNCOMPRESSED
+cdef class ParquetReader(_Weakrefable):
+    cdef:
+        object source
+        CMemoryPool* pool
+        UniquePtrNoGIL[FileReader] reader
+        FileMetaData _metadata
+        shared_ptr[CRandomAccessFile] rd_handle
+    cdef public:
+        _column_idx_map
+    def __cinit__(self, MemoryPool memory_pool=None):
+        self.pool = maybe_unbox_memory_pool(memory_pool)
+        self._metadata = None
+    def open(self, object source not None, *, bint use_memory_map=False,
+             read_dictionary=None, FileMetaData metadata=None,
+             int buffer_size=0, bint pre_buffer=False,
+             coerce_int96_timestamp_unit=None,
+             FileDecryptionProperties decryption_properties=None,
+             thrift_string_size_limit=None,
+             thrift_container_size_limit=None,
+             page_checksum_verification=False):
+        """
+        Open a parquet file for reading.
+        Parameters
+        ----------
+        source : str, pathlib.Path, pyarrow.NativeFile, or file-like object
+        use_memory_map : bool, default False
+        read_dictionary : iterable[int or str], optional
+        metadata : FileMetaData, optional
+        buffer_size : int, default 0
+        pre_buffer : bool, default False
+        coerce_int96_timestamp_unit : str, optional
+        decryption_properties : FileDecryptionProperties, optional
+        thrift_string_size_limit : int, optional
+        thrift_container_size_limit : int, optional
+        page_checksum_verification : bool, default False
+        """
+        cdef:
+            shared_ptr[CFileMetaData] c_metadata
+            CReaderProperties properties = default_reader_properties()
+            ArrowReaderProperties arrow_props = (
+                default_arrow_reader_properties())
+            FileReaderBuilder builder
+        if pre_buffer and not is_threading_enabled():
+            pre_buffer = False
+        if metadata is not None:
+            c_metadata = metadata.sp_metadata
+        if buffer_size > 0:
+            properties.enable_buffered_stream()
+            properties.set_buffer_size(buffer_size)
+        elif buffer_size == 0:
+            properties.disable_buffered_stream()
+        else:
+            raise ValueError('Buffer size must be larger than zero')
+        if thrift_string_size_limit is not None:
+            if thrift_string_size_limit <= 0:
+                raise ValueError("thrift_string_size_limit "
+                                 "must be larger than zero")
+            properties.set_thrift_string_size_limit(thrift_string_size_limit)
+        if thrift_container_size_limit is not None:
+            if thrift_container_size_limit <= 0:
+                raise ValueError("thrift_container_size_limit "
+                                 "must be larger than zero")
+            properties.set_thrift_container_size_limit(
+                thrift_container_size_limit)
+        if decryption_properties is not None:
+            properties.file_decryption_properties(
+                decryption_properties.unwrap())
+        arrow_props.set_pre_buffer(pre_buffer)
+        properties.set_page_checksum_verification(page_checksum_verification)
+        if coerce_int96_timestamp_unit is None:
+            # use the default defined in default_arrow_reader_properties()
+            pass
+        else:
+            arrow_props.set_coerce_int96_timestamp_unit(
+                string_to_timeunit(coerce_int96_timestamp_unit))
+        self.source = source
+        get_reader(source, use_memory_map, &self.rd_handle)
+        with nogil:
+            check_status(builder.Open(self.rd_handle, properties, c_metadata))
+        # Set up metadata
+        with nogil:
+            c_metadata = builder.raw_reader().metadata()
+        self._metadata = result = FileMetaData()
+        result.init(c_metadata)
+        if read_dictionary is not None:
+            self._set_read_dictionary(read_dictionary, &arrow_props)
+        with nogil:
+            check_status(builder.memory_pool(self.pool)
+                         .properties(arrow_props)
+                         .Build(&self.reader))
+    cdef _set_read_dictionary(self, read_dictionary,
+                              ArrowReaderProperties* props):
+        for column in read_dictionary:
+            if not isinstance(column, int):
+                column = self.column_name_idx(column)
+            props.set_read_dictionary(column, True)
+    @property
+    def column_paths(self):
+        cdef:
+            FileMetaData container = self.metadata
+            const CFileMetaData* metadata = container._metadata
+            vector[c_string] path
+            int i = 0
+        paths = []
+        for i in range(0, metadata.num_columns()):
+            path = (metadata.schema().Column(i)
+                    .path().get().ToDotVector())
+            paths.append([frombytes(x) for x in path])
+        return paths
+    @property
+    def metadata(self):
+        return self._metadata
+    @property
+    def schema_arrow(self):
+        cdef shared_ptr[CSchema] out
+        with nogil:
+            check_status(self.reader.get().GetSchema(&out))
+        return pyarrow_wrap_schema(out)
+    @property
+    def num_row_groups(self):
+        return self.reader.get().num_row_groups()
+    def set_use_threads(self, bint use_threads):
+        """
+        Parameters
+        ----------
+        use_threads : bool
+        """
+        if is_threading_enabled():
+            self.reader.get().set_use_threads(use_threads)
+        else:
+            self.reader.get().set_use_threads(False)
+    def set_batch_size(self, int64_t batch_size):
+        """
+        Parameters
+        ----------
+        batch_size : int64
+        """
+        self.reader.get().set_batch_size(batch_size)
+    def iter_batches(self, int64_t batch_size, row_groups, column_indices=None,
+                     bint use_threads=True):
+        """
+        Parameters
+        ----------
+        batch_size : int64
+        row_groups : list[int]
+        column_indices : list[int], optional
+        use_threads : bool, default True
+        Yields
+        ------
+        next : RecordBatch
+        """
+        cdef:
+            vector[int] c_row_groups
+            vector[int] c_column_indices
+            shared_ptr[CRecordBatch] record_batch
+            UniquePtrNoGIL[CRecordBatchReader] recordbatchreader
+        self.set_batch_size(batch_size)
+        if use_threads:
+            self.set_use_threads(use_threads)
+        for row_group in row_groups:
+            c_row_groups.push_back(row_group)
+        if column_indices is not None:
+            for index in column_indices:
+                c_column_indices.push_back(index)
+            with nogil:
+                check_status(
+                    self.reader.get().GetRecordBatchReader(
+                        c_row_groups, c_column_indices, &recordbatchreader
+                    )
+                )
+        else:
+            with nogil:
+                check_status(
+                    self.reader.get().GetRecordBatchReader(
+                        c_row_groups, &recordbatchreader
+                    )
+                )
+        while True:
+            with nogil:
+                check_status(
+                    recordbatchreader.get().ReadNext(&record_batch)
+                )
+            if record_batch.get() == NULL:
+                break
+            yield pyarrow_wrap_batch(record_batch)
+    def read_row_group(self, int i, column_indices=None,
+                       bint use_threads=True):
+        """
+        Parameters
+        ----------
+        i : int
+        column_indices : list[int], optional
+        use_threads : bool, default True
+        Returns
+        -------
+        table : pyarrow.Table
+        """
+        return self.read_row_groups([i], column_indices, use_threads)
+    def read_row_groups(self, row_groups not None, column_indices=None,
+                        bint use_threads=True):
+        """
+        Parameters
+        ----------
+        row_groups : list[int]
+        column_indices : list[int], optional
+        use_threads : bool, default True
+        Returns
+        -------
+        table : pyarrow.Table
+        """
+        cdef:
+            shared_ptr[CTable] ctable
+            vector[int] c_row_groups
+            vector[int] c_column_indices
+        self.set_use_threads(use_threads)
+        for row_group in row_groups:
+            c_row_groups.push_back(row_group)
+        if column_indices is not None:
+            for index in column_indices:
+                c_column_indices.push_back(index)
+            with nogil:
+                check_status(self.reader.get()
+                             .ReadRowGroups(c_row_groups, c_column_indices,
+                                            &ctable))
+        else:
+            # Read all columns
+            with nogil:
+                check_status(self.reader.get()
+                             .ReadRowGroups(c_row_groups, &ctable))
+        return pyarrow_wrap_table(ctable)
+    def read_all(self, column_indices=None, bint use_threads=True):
+        """
+        Parameters
+        ----------
+        column_indices : list[int], optional
+        use_threads : bool, default True
+        Returns
+        -------
+        table : pyarrow.Table
+        """
+        cdef:
+            shared_ptr[CTable] ctable
+            vector[int] c_column_indices
+        self.set_use_threads(use_threads)
+        if column_indices is not None:
+            for index in column_indices:
+                c_column_indices.push_back(index)
+            with nogil:
+                check_status(self.reader.get()
+                             .ReadTable(c_column_indices, &ctable))
+        else:
+            # Read all columns
+            with nogil:
+                check_status(self.reader.get()
+                             .ReadTable(&ctable))
+        return pyarrow_wrap_table(ctable)
+    def scan_contents(self, column_indices=None, batch_size=65536):
+        """
+        Parameters
+        ----------
+        column_indices : list[int], optional
+        batch_size : int32, default 65536
+        Returns
+        -------
+        num_rows : int64
+        """
+        cdef:
+            vector[int] c_column_indices
+            int32_t c_batch_size
+            int64_t c_num_rows
+        if column_indices is not None:
+            for index in column_indices:
+                c_column_indices.push_back(index)
+        c_batch_size = batch_size
+        with nogil:
+            check_status(self.reader.get()
+                         .ScanContents(c_column_indices, c_batch_size,
+                                       &c_num_rows))
+        return c_num_rows
+    def column_name_idx(self, column_name):
+        """
+        Find the index of a column by its name.
+        Parameters
+        ----------
+        column_name : str
+            Name of the column; separation of nesting levels is done via ".".
+        Returns
+        -------
+        column_idx : int
+            Integer index of the column in the schema.
+        """
+        cdef:
+            FileMetaData container = self.metadata
+            const CFileMetaData* metadata = container._metadata
+            int i = 0
+        if self._column_idx_map is None:
+            self._column_idx_map = {}
+            for i in range(0, metadata.num_columns()):
+                col_bytes = tobytes(metadata.schema().Column(i)
+                                    .path().get().ToDotString())
+                self._column_idx_map[col_bytes] = i
+        return self._column_idx_map[tobytes(column_name)]
+    def read_column(self, int column_index):
+        """
+        Read the column at the specified index.
+        Parameters
+        ----------
+        column_index : int
+            Index of the column.
+        Returns
+        -------
+        column : pyarrow.ChunkedArray
+        """
+        cdef shared_ptr[CChunkedArray] out
+        with nogil:
+            check_status(self.reader.get()
+                         .ReadColumn(column_index, &out))
+        return pyarrow_wrap_chunked_array(out)
+    def close(self):
+        if not self.closed:
+            with nogil:
+                check_status(self.rd_handle.get().Close())
+    @property
+    def closed(self):
+        if self.rd_handle == NULL:
+            return True
+        with nogil:
+            closed = self.rd_handle.get().closed()
+        return closed
+cdef CSortingColumn _convert_sorting_column(SortingColumn sorting_column):
+    cdef CSortingColumn c_sorting_column
+    c_sorting_column.column_idx = sorting_column.column_index
+    c_sorting_column.descending = sorting_column.descending
+    c_sorting_column.nulls_first = sorting_column.nulls_first
+    return c_sorting_column
+cdef vector[CSortingColumn] _convert_sorting_columns(sorting_columns) except *:
+    if not (isinstance(sorting_columns, Sequence)
+            and all(isinstance(col, SortingColumn) for col in sorting_columns)):
+        raise ValueError(
+            "'sorting_columns' must be a list of `SortingColumn`")
+    cdef vector[CSortingColumn] c_sorting_columns = [_convert_sorting_column(col)
+                                                     for col in sorting_columns]
+    return c_sorting_columns
+cdef shared_ptr[WriterProperties] _create_writer_properties(
+        use_dictionary=None,
+        compression=None,
+        version=None,
+        write_statistics=None,
+        data_page_size=None,
+        compression_level=None,
+        use_byte_stream_split=False,
+        column_encoding=None,
+        data_page_version=None,
+        FileEncryptionProperties encryption_properties=None,
+        write_batch_size=None,
+        dictionary_pagesize_limit=None,
+        write_page_index=False,
+        write_page_checksum=False,
+        sorting_columns=None,
+        store_decimal_as_integer=False) except *:
+    """General writer properties"""
+    cdef:
+        shared_ptr[WriterProperties] properties
+        WriterProperties.Builder props
+    # data_page_version
+    if data_page_version is not None:
+        if data_page_version == "1.0":
+            props.data_page_version(ParquetDataPageVersion_V1)
+        elif data_page_version == "2.0":
+            props.data_page_version(ParquetDataPageVersion_V2)
+        else:
+            raise ValueError("Unsupported Parquet data page version: {0}"
+                             .format(data_page_version))
+    # version
+    if version is not None:
+        if version == "1.0":
+            props.version(ParquetVersion_V1)
+        elif version in ("2.0", "pseudo-2.0"):
+            warnings.warn(
+                "Parquet format '2.0' pseudo version is deprecated, use "
+                "'2.4' or '2.6' for fine-grained feature selection",
+                FutureWarning, stacklevel=2)
+            props.version(ParquetVersion_V2_0)
+        elif version == "2.4":
+            props.version(ParquetVersion_V2_4)
+        elif version == "2.6":
+            props.version(ParquetVersion_V2_6)
+        else:
+            raise ValueError("Unsupported Parquet format version: {0}"
+                             .format(version))
+    # compression
+    if isinstance(compression, basestring):
+        check_compression_name(compression)
+        props.compression(compression_from_name(compression))
+    elif compression is not None:
+        for column, codec in compression.iteritems():
+            check_compression_name(codec)
+            props.compression(tobytes(column), compression_from_name(codec))
+    if isinstance(compression_level, int):
+        props.compression_level(compression_level)
+    elif compression_level is not None:
+        for column, level in compression_level.iteritems():
+            props.compression_level(tobytes(column), level)
+    # use_dictionary
+    if isinstance(use_dictionary, bool):
+        if use_dictionary:
+            props.enable_dictionary()
+            if column_encoding is not None:
+                raise ValueError(
+                    "To use 'column_encoding' set 'use_dictionary' to False")
+        else:
+            props.disable_dictionary()
+    elif use_dictionary is not None:
+        # Deactivate dictionary encoding by default
+        props.disable_dictionary()
+        for column in use_dictionary:
+            props.enable_dictionary(tobytes(column))
+            if (column_encoding is not None and
+                    column_encoding.get(column) is not None):
+                raise ValueError(
+                    "To use 'column_encoding' set 'use_dictionary' to False")
+    # write_statistics
+    if isinstance(write_statistics, bool):
+        if write_statistics:
+            props.enable_statistics()
+        else:
+            props.disable_statistics()
+    elif write_statistics is not None:
+        # Deactivate statistics by default and enable for specified columns
+        props.disable_statistics()
+        for column in write_statistics:
+            props.enable_statistics(tobytes(column))
+    # sorting_columns
+    if sorting_columns is not None:
+        props.set_sorting_columns(_convert_sorting_columns(sorting_columns))
+    # use_byte_stream_split
+    if isinstance(use_byte_stream_split, bool):
+        if use_byte_stream_split:
+            if column_encoding is not None:
+                raise ValueError(
+                    "'use_byte_stream_split' cannot be passed"
+                    "together with 'column_encoding'")
+            else:
+                props.encoding(ParquetEncoding_BYTE_STREAM_SPLIT)
+    elif use_byte_stream_split is not None:
+        for column in use_byte_stream_split:
+            if column_encoding is None:
+                column_encoding = {column: 'BYTE_STREAM_SPLIT'}
+            elif column_encoding.get(column, None) is None:
+                column_encoding[column] = 'BYTE_STREAM_SPLIT'
+            else:
+                raise ValueError(
+                    "'use_byte_stream_split' cannot be passed"
+                    "together with 'column_encoding'")
+    # store_decimal_as_integer
+    if isinstance(store_decimal_as_integer, bool):
+        if store_decimal_as_integer:
+            props.enable_store_decimal_as_integer()
+        else:
+            props.disable_store_decimal_as_integer()
+    else:
+        raise TypeError("'store_decimal_as_integer' must be a boolean")
+    # column_encoding
+    # encoding map - encode individual columns
+    if column_encoding is not None:
+        if isinstance(column_encoding, dict):
+            for column, _encoding in column_encoding.items():
+                props.encoding(tobytes(column),
+                               encoding_enum_from_name(_encoding))
+        elif isinstance(column_encoding, str):
+            props.encoding(encoding_enum_from_name(column_encoding))
+        else:
+            raise TypeError(
+                "'column_encoding' should be a dictionary or a string")
+    if data_page_size is not None:
+        props.data_pagesize(data_page_size)
+    if write_batch_size is not None:
+        props.write_batch_size(write_batch_size)
+    if dictionary_pagesize_limit is not None:
+        props.dictionary_pagesize_limit(dictionary_pagesize_limit)
+    # encryption
+    if encryption_properties is not None:
+        props.encryption(
+            (<FileEncryptionProperties>encryption_properties).unwrap())
+    # For backwards compatibility reasons we cap the maximum row group size
+    # at 64Mi rows.  This could be changed in the future, though it would be
+    # a breaking change.
+    #
+    # The user can always specify a smaller row group size (and the default
+    # is smaller) when calling write_table.  If the call to write_table uses
+    # a size larger than this then it will be latched to this value.
+    props.max_row_group_length(_MAX_ROW_GROUP_SIZE)
+    # checksum
+    if write_page_checksum:
+        props.enable_page_checksum()
+    else:
+        props.disable_page_checksum()
+    # page index
+    if write_page_index:
+        props.enable_write_page_index()
+    else:
+        props.disable_write_page_index()
+    properties = props.build()
+    return properties
+cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
+        use_deprecated_int96_timestamps=False,
+        coerce_timestamps=None,
+        allow_truncated_timestamps=False,
+        writer_engine_version=None,
+        use_compliant_nested_type=True,
+        store_schema=True) except *:
+    """Arrow writer properties"""
+    cdef:
+        shared_ptr[ArrowWriterProperties] arrow_properties
+        ArrowWriterProperties.Builder arrow_props
+    # Store the original Arrow schema so things like dictionary types can
+    # be automatically reconstructed
+    if store_schema:
+        arrow_props.store_schema()
+    # int96 support
+    if use_deprecated_int96_timestamps:
+        arrow_props.enable_deprecated_int96_timestamps()
+    else:
+        arrow_props.disable_deprecated_int96_timestamps()
+    # coerce_timestamps
+    if coerce_timestamps == 'ms':
+        arrow_props.coerce_timestamps(TimeUnit_MILLI)
+    elif coerce_timestamps == 'us':
+        arrow_props.coerce_timestamps(TimeUnit_MICRO)
+    elif coerce_timestamps is not None:
+        raise ValueError('Invalid value for coerce_timestamps: {0}'
+                         .format(coerce_timestamps))
+    # allow_truncated_timestamps
+    if allow_truncated_timestamps:
+        arrow_props.allow_truncated_timestamps()
+    else:
+        arrow_props.disallow_truncated_timestamps()
+    # use_compliant_nested_type
+    if use_compliant_nested_type:
+        arrow_props.enable_compliant_nested_types()
+    else:
+        arrow_props.disable_compliant_nested_types()
+    # writer_engine_version
+    if writer_engine_version == "V1":
+        warnings.warn("V1 parquet writer engine is a no-op.  Use V2.")
+        arrow_props.set_engine_version(ArrowWriterEngineVersion.V1)
+    elif writer_engine_version != "V2":
+        raise ValueError("Unsupported Writer Engine Version: {0}"
+                         .format(writer_engine_version))
+    arrow_properties = arrow_props.build()
+    return arrow_properties
+cdef _name_to_index_map(Schema arrow_schema):
+    cdef:
+        shared_ptr[CSchema] sp_arrow_schema
+        shared_ptr[SchemaDescriptor] sp_parquet_schema
+        shared_ptr[WriterProperties] props = _create_writer_properties()
+        shared_ptr[ArrowWriterProperties] arrow_props = _create_arrow_writer_properties(
+            use_deprecated_int96_timestamps=False,
+            coerce_timestamps=None,
+            allow_truncated_timestamps=False,
+            writer_engine_version="V2"
+        )
+    sp_arrow_schema = pyarrow_unwrap_schema(arrow_schema)
+    with nogil:
+        check_status(ToParquetSchema(
+            sp_arrow_schema.get(), deref(props.get()), deref(arrow_props.get()), &sp_parquet_schema))
+    out = dict()
+    cdef SchemaDescriptor* parquet_schema = sp_parquet_schema.get()
+    for i in range(parquet_schema.num_columns()):
+        name = frombytes(parquet_schema.Column(i).path().get().ToDotString())
+        out[name] = i
+    return out
+cdef class ParquetWriter(_Weakrefable):
+    cdef:
+        unique_ptr[FileWriter] writer
+        shared_ptr[COutputStream] sink
+        bint own_sink
+    cdef readonly:
+        object use_dictionary
+        object use_deprecated_int96_timestamps
+        object use_byte_stream_split
+        object column_encoding
+        object coerce_timestamps
+        object allow_truncated_timestamps
+        object compression
+        object compression_level
+        object data_page_version
+        object use_compliant_nested_type
+        object version
+        object write_statistics
+        object writer_engine_version
+        int row_group_size
+        int64_t data_page_size
+        FileEncryptionProperties encryption_properties
+        int64_t write_batch_size
+        int64_t dictionary_pagesize_limit
+        object store_schema
+        object store_decimal_as_integer
+    def __cinit__(self, where, Schema schema not None, use_dictionary=None,
+                  compression=None, version=None,
+                  write_statistics=None,
+                  MemoryPool memory_pool=None,
+                  use_deprecated_int96_timestamps=False,
+                  coerce_timestamps=None,
+                  data_page_size=None,
+                  allow_truncated_timestamps=False,
+                  compression_level=None,
+                  use_byte_stream_split=False,
+                  column_encoding=None,
+                  writer_engine_version=None,
+                  data_page_version=None,
+                  use_compliant_nested_type=True,
+                  encryption_properties=None,
+                  write_batch_size=None,
+                  dictionary_pagesize_limit=None,
+                  store_schema=True,
+                  write_page_index=False,
+                  write_page_checksum=False,
+                  sorting_columns=None,
+                  store_decimal_as_integer=False):
+        cdef:
+            shared_ptr[WriterProperties] properties
+            shared_ptr[ArrowWriterProperties] arrow_properties
+            c_string c_where
+            CMemoryPool* pool
+        try:
+            where = _stringify_path(where)
+        except TypeError:
+            get_writer(where, &self.sink)
+            self.own_sink = False
+        else:
+            c_where = tobytes(where)
+            with nogil:
+                self.sink = GetResultValue(FileOutputStream.Open(c_where))
+            self.own_sink = True
+        properties = _create_writer_properties(
+            use_dictionary=use_dictionary,
+            compression=compression,
+            version=version,
+            write_statistics=write_statistics,
+            data_page_size=data_page_size,
+            compression_level=compression_level,
+            use_byte_stream_split=use_byte_stream_split,
+            column_encoding=column_encoding,
+            data_page_version=data_page_version,
+            encryption_properties=encryption_properties,
+            write_batch_size=write_batch_size,
+            dictionary_pagesize_limit=dictionary_pagesize_limit,
+            write_page_index=write_page_index,
+            write_page_checksum=write_page_checksum,
+            sorting_columns=sorting_columns,
+            store_decimal_as_integer=store_decimal_as_integer,
+        )
+        arrow_properties = _create_arrow_writer_properties(
+            use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
+            coerce_timestamps=coerce_timestamps,
+            allow_truncated_timestamps=allow_truncated_timestamps,
+            writer_engine_version=writer_engine_version,
+            use_compliant_nested_type=use_compliant_nested_type,
+            store_schema=store_schema,
+        )
+        pool = maybe_unbox_memory_pool(memory_pool)
+        with nogil:
+            self.writer = move(GetResultValue(
+                FileWriter.Open(deref(schema.schema), pool,
+                                self.sink, properties, arrow_properties)))
+    def close(self):
+        with nogil:
+            check_status(self.writer.get().Close())
+            if self.own_sink:
+                check_status(self.sink.get().Close())
+    def write_table(self, Table table, row_group_size=None):
+        cdef:
+            CTable* ctable = table.table
+            int64_t c_row_group_size
+        if row_group_size is None or row_group_size == -1:
+            c_row_group_size = min(ctable.num_rows(), _DEFAULT_ROW_GROUP_SIZE)
+        elif row_group_size == 0:
+            raise ValueError('Row group size cannot be 0')
+        else:
+            c_row_group_size = row_group_size
+        with nogil:
+            check_status(self.writer.get()
+                         .WriteTable(deref(ctable), c_row_group_size))
+    def add_key_value_metadata(self, key_value_metadata):
+        cdef:
+            shared_ptr[const CKeyValueMetadata] c_metadata
+        c_metadata = pyarrow_unwrap_metadata(KeyValueMetadata(key_value_metadata))
+        with nogil:
+            check_status(self.writer.get()
+                         .AddKeyValueMetadata(c_metadata))
+    @property
+    def metadata(self):
+        cdef:
+            shared_ptr[CFileMetaData] metadata
+            FileMetaData result
+        with nogil:
+            metadata = self.writer.get().metadata()
+        if metadata:
+            result = FileMetaData()
+            result.init(metadata)
+            return result
+        raise RuntimeError(
+            'file metadata is only available after writer close')

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_pyarrow_cpp_tests.pxd ADDED Viewed

	@@ -0,0 +1,33 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# distutils: language = c++
+# cython: language_level = 3
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport CStatus
+ctypedef CStatus cb_test_func()
+cdef extern from "arrow/python/python_test.h" namespace "arrow::py::testing" nogil:
+    cdef cppclass CTestCase "arrow::py::testing::TestCase":
+        c_string name
+        cb_test_func func
+    vector[CTestCase] GetCppTestCases()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/_substrait.pyx ADDED Viewed

	@@ -0,0 +1,481 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# cython: language_level = 3
+from cython.operator cimport dereference as deref
+from libcpp.vector cimport vector as std_vector
+from pyarrow import Buffer, py_buffer
+from pyarrow._compute cimport Expression
+from pyarrow.lib import frombytes, tobytes
+from pyarrow.lib cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_substrait cimport *
+try:
+    import substrait as py_substrait
+except ImportError:
+    py_substrait = None
+else:
+    import substrait.proto  # no-cython-lint
+# TODO GH-37235: Fix exception handling
+cdef CDeclaration _create_named_table_provider(
+    dict named_args, const std_vector[c_string]& names, const CSchema& schema
+) noexcept:
+    cdef:
+        c_string c_name
+        shared_ptr[CTable] c_in_table
+        shared_ptr[CTableSourceNodeOptions] c_tablesourceopts
+        shared_ptr[CExecNodeOptions] c_input_node_opts
+        vector[CDeclaration.Input] no_c_inputs
+    py_names = []
+    for i in range(names.size()):
+        c_name = names[i]
+        py_names.append(frombytes(c_name))
+    py_schema = pyarrow_wrap_schema(make_shared[CSchema](schema))
+    py_table = named_args["provider"](py_names, py_schema)
+    c_in_table = pyarrow_unwrap_table(py_table)
+    c_tablesourceopts = make_shared[CTableSourceNodeOptions](c_in_table)
+    c_input_node_opts = static_pointer_cast[CExecNodeOptions, CTableSourceNodeOptions](
+        c_tablesourceopts)
+    return CDeclaration(tobytes("table_source"),
+                        no_c_inputs, c_input_node_opts)
+def run_query(plan, *, table_provider=None, use_threads=True):
+    """
+    Execute a Substrait plan and read the results as a RecordBatchReader.
+    Parameters
+    ----------
+    plan : Union[Buffer, bytes]
+        The serialized Substrait plan to execute.
+    table_provider : object (optional)
+        A function to resolve any NamedTable relation to a table.
+        The function will receive two arguments which will be a list
+        of strings representing the table name and a pyarrow.Schema representing
+        the expected schema and should return a pyarrow.Table.
+    use_threads : bool, default True
+        If True then multiple threads will be used to run the query.  If False then
+        all CPU intensive work will be done on the calling thread.
+    Returns
+    -------
+    RecordBatchReader
+        A reader containing the result of the executed query
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> from pyarrow.lib import tobytes
+    >>> import pyarrow.substrait as substrait
+    >>> test_table_1 = pa.Table.from_pydict({"x": [1, 2, 3]})
+    >>> test_table_2 = pa.Table.from_pydict({"x": [4, 5, 6]})
+    >>> def table_provider(names, schema):
+    ...     if not names:
+    ...        raise Exception("No names provided")
+    ...     elif names[0] == "t1":
+    ...        return test_table_1
+    ...     elif names[1] == "t2":
+    ...        return test_table_2
+    ...     else:
+    ...        raise Exception("Unrecognized table name")
+    ...
+    >>> substrait_query = '''
+    ...         {
+    ...             "relations": [
+    ...             {"rel": {
+    ...                 "read": {
+    ...                 "base_schema": {
+    ...                     "struct": {
+    ...                     "types": [
+    ...                                 {"i64": {}}
+    ...                             ]
+    ...                     },
+    ...                     "names": [
+    ...                             "x"
+    ...                             ]
+    ...                 },
+    ...                 "namedTable": {
+    ...                         "names": ["t1"]
+    ...                 }
+    ...                 }
+    ...             }}
+    ...             ]
+    ...         }
+    ... '''
+    >>> buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
+    >>> reader = pa.substrait.run_query(buf, table_provider=table_provider)
+    >>> reader.read_all()
+    pyarrow.Table
+    x: int64
+    ----
+    x: [[1,2,3]]
+    """
+    cdef:
+        CResult[shared_ptr[CRecordBatchReader]] c_res_reader
+        shared_ptr[CRecordBatchReader] c_reader
+        RecordBatchReader reader
+        shared_ptr[CBuffer] c_buf_plan
+        CConversionOptions c_conversion_options
+        c_bool c_use_threads
+    c_use_threads = use_threads
+    if isinstance(plan, (bytes, memoryview)):
+        c_buf_plan = pyarrow_unwrap_buffer(py_buffer(plan))
+    elif isinstance(plan, Buffer):
+        c_buf_plan = pyarrow_unwrap_buffer(plan)
+    else:
+        raise TypeError(
+            f"Expected 'pyarrow.Buffer' or bytes, got '{type(plan)}'")
+    if table_provider is not None:
+        named_table_args = {
+            "provider": table_provider
+        }
+        c_conversion_options.named_table_provider = BindFunction[CNamedTableProvider](
+            &_create_named_table_provider, named_table_args)
+    with nogil:
+        c_res_reader = ExecuteSerializedPlan(
+            deref(c_buf_plan), default_extension_id_registry(),
+            GetFunctionRegistry(), c_conversion_options, c_use_threads)
+    c_reader = GetResultValue(c_res_reader)
+    reader = RecordBatchReader.__new__(RecordBatchReader)
+    reader.reader = c_reader
+    return reader
+def _parse_json_plan(plan):
+    """
+    Parse a JSON plan into equivalent serialized Protobuf.
+    Parameters
+    ----------
+    plan : bytes
+        Substrait plan in JSON.
+    Returns
+    -------
+    Buffer
+        A buffer containing the serialized Protobuf plan.
+    """
+    cdef:
+        CResult[shared_ptr[CBuffer]] c_res_buffer
+        c_string c_str_plan
+        shared_ptr[CBuffer] c_buf_plan
+    c_str_plan = plan
+    c_res_buffer = SerializeJsonPlan(c_str_plan)
+    with nogil:
+        c_buf_plan = GetResultValue(c_res_buffer)
+    return pyarrow_wrap_buffer(c_buf_plan)
+class SubstraitSchema:
+    """A Schema encoded for Substrait usage.
+    The SubstraitSchema contains a schema represented
+    both as a substrait ``NamedStruct`` and as an
+    ``ExtendedExpression``.
+    The ``ExtendedExpression`` is available for cases where types
+    used by the schema require extensions to decode them.
+    In such case the schema will be the ``base_schema`` of the
+    ``ExtendedExpression`` and all extensions will be provided.
+    """
+    def __init__(self, schema, expression):
+        self.schema = schema
+        self.expression = expression
+    def to_pysubstrait(self):
+        """Convert the schema to a substrait-python ExtendedExpression object."""
+        if py_substrait is None:
+            raise ImportError("The 'substrait' package is required.")
+        return py_substrait.proto.ExtendedExpression.FromString(self.expression)
+def serialize_schema(schema):
+    """
+    Serialize a schema into a SubstraitSchema object.
+    Parameters
+    ----------
+    schema : Schema
+        The schema to serialize
+    Returns
+    -------
+    SubstraitSchema
+        The schema stored in a SubstraitSchema object.
+    """
+    return SubstraitSchema(
+        schema=_serialize_namedstruct_schema(schema),
+        expression=serialize_expressions([], [], schema, allow_arrow_extensions=True)
+    )
+def _serialize_namedstruct_schema(schema):
+    cdef:
+        CResult[shared_ptr[CBuffer]] c_res_buffer
+        shared_ptr[CBuffer] c_buffer
+        CConversionOptions c_conversion_options
+        CExtensionSet c_extensions
+    with nogil:
+        c_res_buffer = SerializeSchema(deref((<Schema> schema).sp_schema), &c_extensions, c_conversion_options)
+        c_buffer = GetResultValue(c_res_buffer)
+    return memoryview(pyarrow_wrap_buffer(c_buffer))
+def deserialize_schema(buf):
+    """
+    Deserialize a ``NamedStruct`` Substrait message
+    or a SubstraitSchema object into an Arrow Schema object
+    Parameters
+    ----------
+    buf : Buffer or bytes or SubstraitSchema
+        The message to deserialize
+    Returns
+    -------
+    Schema
+        The deserialized schema
+    """
+    cdef:
+        shared_ptr[CBuffer] c_buffer
+        CResult[shared_ptr[CSchema]] c_res_schema
+        shared_ptr[CSchema] c_schema
+        CConversionOptions c_conversion_options
+        CExtensionSet c_extensions
+    if isinstance(buf, SubstraitSchema):
+        return deserialize_expressions(buf.expression).schema
+    if isinstance(buf, (bytes, memoryview)):
+        c_buffer = pyarrow_unwrap_buffer(py_buffer(buf))
+    elif isinstance(buf, Buffer):
+        c_buffer = pyarrow_unwrap_buffer(buf)
+    else:
+        raise TypeError(
+            f"Expected 'pyarrow.Buffer' or bytes, got '{type(buf)}'")
+    with nogil:
+        c_res_schema = DeserializeSchema(
+            deref(c_buffer), c_extensions, c_conversion_options)
+        c_schema = GetResultValue(c_res_schema)
+    return pyarrow_wrap_schema(c_schema)
+def serialize_expressions(exprs, names, schema, *, allow_arrow_extensions=False):
+    """
+    Serialize a collection of expressions into Substrait
+    Substrait expressions must be bound to a schema.  For example,
+    the Substrait expression ``a:i32 + b:i32`` is different from the
+    Substrait expression ``a:i64 + b:i64``.  Pyarrow expressions are
+    typically unbound.  For example, both of the above expressions
+    would be represented as ``a + b`` in pyarrow.
+    This means a schema must be provided when serializing an expression.
+    It also means that the serialization may fail if a matching function
+    call cannot be found for the expression.
+    Parameters
+    ----------
+    exprs : list of Expression
+        The expressions to serialize
+    names : list of str
+        Names for the expressions
+    schema : Schema
+        The schema the expressions will be bound to
+    allow_arrow_extensions : bool, default False
+        If False then only functions that are part of the core Substrait function
+        definitions will be allowed.  Set this to True to allow pyarrow-specific functions
+        and user defined functions but the result may not be accepted by other
+        compute libraries.
+    Returns
+    -------
+    Buffer
+        An ExtendedExpression message containing the serialized expressions
+    """
+    cdef:
+        CResult[shared_ptr[CBuffer]] c_res_buffer
+        shared_ptr[CBuffer] c_buffer
+        CNamedExpression c_named_expr
+        CBoundExpressions c_bound_exprs
+        CConversionOptions c_conversion_options
+    if len(exprs) != len(names):
+        raise ValueError("exprs and names need to have the same length")
+    for expr, name in zip(exprs, names):
+        if not isinstance(expr, Expression):
+            raise TypeError(f"Expected Expression, got '{type(expr)}' in exprs")
+        if not isinstance(name, str):
+            raise TypeError(f"Expected str, got '{type(name)}' in names")
+        c_named_expr.expression = (<Expression> expr).unwrap()
+        c_named_expr.name = tobytes(<str> name)
+        c_bound_exprs.named_expressions.push_back(c_named_expr)
+    c_bound_exprs.schema = (<Schema> schema).sp_schema
+    c_conversion_options.allow_arrow_extensions = allow_arrow_extensions
+    with nogil:
+        c_res_buffer = SerializeExpressions(c_bound_exprs, c_conversion_options)
+        c_buffer = GetResultValue(c_res_buffer)
+    return memoryview(pyarrow_wrap_buffer(c_buffer))
+cdef class BoundExpressions(_Weakrefable):
+    """
+    A collection of named expressions and the schema they are bound to
+    This is equivalent to the Substrait ExtendedExpression message
+    """
+    cdef:
+        CBoundExpressions c_bound_exprs
+    def __init__(self):
+        msg = 'BoundExpressions is an abstract class thus cannot be initialized.'
+        raise TypeError(msg)
+    cdef void init(self, CBoundExpressions bound_expressions):
+        self.c_bound_exprs = bound_expressions
+    @property
+    def schema(self):
+        """
+        The common schema that all expressions are bound to
+        """
+        return pyarrow_wrap_schema(self.c_bound_exprs.schema)
+    @property
+    def expressions(self):
+        """
+        A dict from expression name to expression
+        """
+        expr_dict = {}
+        for named_expr in self.c_bound_exprs.named_expressions:
+            name = frombytes(named_expr.name)
+            expr = Expression.wrap(named_expr.expression)
+            expr_dict[name] = expr
+        return expr_dict
+    @staticmethod
+    cdef wrap(const CBoundExpressions& bound_expressions):
+        cdef BoundExpressions self = BoundExpressions.__new__(BoundExpressions)
+        self.init(bound_expressions)
+        return self
+    @classmethod
+    def from_substrait(cls, message):
+        """
+        Convert a Substrait message into a BoundExpressions object
+        Parameters
+        ----------
+        message : Buffer or bytes or protobuf Message
+            The message to convert to a BoundExpressions object
+        Returns
+        -------
+        BoundExpressions
+            The converted expressions, their names, and the bound schema
+        """
+        if isinstance(message, (bytes, memoryview)):
+            return deserialize_expressions(message)
+        elif isinstance(message, Buffer):
+            return deserialize_expressions(message)
+        else:
+            try:
+                return deserialize_expressions(message.SerializeToString())
+            except AttributeError:
+                raise TypeError(
+                    f"Expected 'pyarrow.Buffer' or bytes or protobuf Message, got '{type(message)}'")
+def deserialize_expressions(buf):
+    """
+    Deserialize an ExtendedExpression Substrait message into a BoundExpressions object
+    Parameters
+    ----------
+    buf : Buffer or bytes
+        The message to deserialize
+    Returns
+    -------
+    BoundExpressions
+        The deserialized expressions, their names, and the bound schema
+    """
+    cdef:
+        shared_ptr[CBuffer] c_buffer
+        CResult[CBoundExpressions] c_res_bound_exprs
+        CBoundExpressions c_bound_exprs
+    if isinstance(buf, (bytes, memoryview)):
+        c_buffer = pyarrow_unwrap_buffer(py_buffer(buf))
+    elif isinstance(buf, Buffer):
+        c_buffer = pyarrow_unwrap_buffer(buf)
+    else:
+        raise TypeError(
+            f"Expected 'pyarrow.Buffer' or bytes, got '{type(buf)}'")
+    with nogil:
+        c_res_bound_exprs = DeserializeExpressions(deref(c_buffer))
+        c_bound_exprs = GetResultValue(c_res_bound_exprs)
+    return BoundExpressions.wrap(c_bound_exprs)
+def get_supported_functions():
+    """
+    Get a list of Substrait functions that the underlying
+    engine currently supports.
+    Returns
+    -------
+    list[str]
+        A list of function ids encoded as '{uri}#{name}'
+    """
+    cdef:
+        ExtensionIdRegistry* c_id_registry
+        std_vector[c_string] c_ids
+    c_id_registry = default_extension_id_registry()
+    c_ids = c_id_registry.GetSupportedSubstraitFunctions()
+    functions_list = []
+    for c_id in c_ids:
+        functions_list.append(frombytes(c_id))
+    return functions_list

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cffi.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import absolute_import
+import cffi
+c_source = """
+    struct ArrowSchema {
+      // Array type description
+      const char* format;
+      const char* name;
+      const char* metadata;
+      int64_t flags;
+      int64_t n_children;
+      struct ArrowSchema** children;
+      struct ArrowSchema* dictionary;
+      // Release callback
+      void (*release)(struct ArrowSchema*);
+      // Opaque producer-specific data
+      void* private_data;
+    };
+    struct ArrowArray {
+      // Array data description
+      int64_t length;
+      int64_t null_count;
+      int64_t offset;
+      int64_t n_buffers;
+      int64_t n_children;
+      const void** buffers;
+      struct ArrowArray** children;
+      struct ArrowArray* dictionary;
+      // Release callback
+      void (*release)(struct ArrowArray*);
+      // Opaque producer-specific data
+      void* private_data;
+    };
+    struct ArrowArrayStream {
+      int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+      int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+      const char* (*get_last_error)(struct ArrowArrayStream*);
+      // Release callback
+      void (*release)(struct ArrowArrayStream*);
+      // Opaque producer-specific data
+      void* private_data;
+    };
+    typedef int32_t ArrowDeviceType;
+    struct ArrowDeviceArray {
+      struct ArrowArray array;
+      int64_t device_id;
+      ArrowDeviceType device_type;
+      void* sync_event;
+      int64_t reserved[3];
+    };
+    """
+# TODO use out-of-line mode for faster import and avoid C parsing
+ffi = cffi.FFI()
+ffi.cdef(c_source)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/compute.py ADDED Viewed

	@@ -0,0 +1,744 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from pyarrow._compute import (  # noqa
+    Function,
+    FunctionOptions,
+    FunctionRegistry,
+    HashAggregateFunction,
+    HashAggregateKernel,
+    Kernel,
+    ScalarAggregateFunction,
+    ScalarAggregateKernel,
+    ScalarFunction,
+    ScalarKernel,
+    VectorFunction,
+    VectorKernel,
+    # Option classes
+    ArraySortOptions,
+    AssumeTimezoneOptions,
+    CastOptions,
+    CountOptions,
+    CumulativeOptions,
+    CumulativeSumOptions,
+    DayOfWeekOptions,
+    DictionaryEncodeOptions,
+    RunEndEncodeOptions,
+    ElementWiseAggregateOptions,
+    ExtractRegexOptions,
+    FilterOptions,
+    IndexOptions,
+    JoinOptions,
+    ListSliceOptions,
+    ListFlattenOptions,
+    MakeStructOptions,
+    MapLookupOptions,
+    MatchSubstringOptions,
+    ModeOptions,
+    NullOptions,
+    PadOptions,
+    PairwiseOptions,
+    PartitionNthOptions,
+    QuantileOptions,
+    RandomOptions,
+    RankOptions,
+    ReplaceSliceOptions,
+    ReplaceSubstringOptions,
+    RoundBinaryOptions,
+    RoundOptions,
+    RoundTemporalOptions,
+    RoundToMultipleOptions,
+    ScalarAggregateOptions,
+    SelectKOptions,
+    SetLookupOptions,
+    SliceOptions,
+    SortOptions,
+    SplitOptions,
+    SplitPatternOptions,
+    StrftimeOptions,
+    StrptimeOptions,
+    StructFieldOptions,
+    TakeOptions,
+    TDigestOptions,
+    TrimOptions,
+    Utf8NormalizeOptions,
+    VarianceOptions,
+    WeekOptions,
+    # Functions
+    call_function,
+    function_registry,
+    get_function,
+    list_functions,
+    # Udf
+    call_tabular_function,
+    register_scalar_function,
+    register_tabular_function,
+    register_aggregate_function,
+    register_vector_function,
+    UdfContext,
+    # Expressions
+    Expression,
+)
+from collections import namedtuple
+import inspect
+from textwrap import dedent
+import warnings
+import pyarrow as pa
+from pyarrow import _compute_docstrings
+from pyarrow.vendored import docscrape
+def _get_arg_names(func):
+    return func._doc.arg_names
+_OptionsClassDoc = namedtuple('_OptionsClassDoc', ('params',))
+def _scrape_options_class_doc(options_class):
+    if not options_class.__doc__:
+        return None
+    doc = docscrape.NumpyDocString(options_class.__doc__)
+    return _OptionsClassDoc(doc['Parameters'])
+def _decorate_compute_function(wrapper, exposed_name, func, options_class):
+    # Decorate the given compute function wrapper with useful metadata
+    # and documentation.
+    cpp_doc = func._doc
+    wrapper.__arrow_compute_function__ = dict(
+        name=func.name,
+        arity=func.arity,
+        options_class=cpp_doc.options_class,
+        options_required=cpp_doc.options_required)
+    wrapper.__name__ = exposed_name
+    wrapper.__qualname__ = exposed_name
+    doc_pieces = []
+    # 1. One-line summary
+    summary = cpp_doc.summary
+    if not summary:
+        arg_str = "arguments" if func.arity > 1 else "argument"
+        summary = ("Call compute function {!r} with the given {}"
+                   .format(func.name, arg_str))
+    doc_pieces.append(f"{summary}.\n\n")
+    # 2. Multi-line description
+    description = cpp_doc.description
+    if description:
+        doc_pieces.append(f"{description}\n\n")
+    doc_addition = _compute_docstrings.function_doc_additions.get(func.name)
+    # 3. Parameter description
+    doc_pieces.append(dedent("""\
+        Parameters
+        ----------
+        """))
+    # 3a. Compute function parameters
+    arg_names = _get_arg_names(func)
+    for arg_name in arg_names:
+        if func.kind in ('vector', 'scalar_aggregate'):
+            arg_type = 'Array-like'
+        else:
+            arg_type = 'Array-like or scalar-like'
+        doc_pieces.append(f"{arg_name} : {arg_type}\n")
+        doc_pieces.append("    Argument to compute function.\n")
+    # 3b. Compute function option values
+    if options_class is not None:
+        options_class_doc = _scrape_options_class_doc(options_class)
+        if options_class_doc:
+            for p in options_class_doc.params:
+                doc_pieces.append(f"{p.name} : {p.type}\n")
+                for s in p.desc:
+                    doc_pieces.append(f"    {s}\n")
+        else:
+            warnings.warn(f"Options class {options_class.__name__} "
+                          f"does not have a docstring", RuntimeWarning)
+            options_sig = inspect.signature(options_class)
+            for p in options_sig.parameters.values():
+                doc_pieces.append(dedent("""\
+                {0} : optional
+                    Parameter for {1} constructor. Either `options`
+                    or `{0}` can be passed, but not both at the same time.
+                """.format(p.name, options_class.__name__)))
+        doc_pieces.append(dedent(f"""\
+            options : pyarrow.compute.{options_class.__name__}, optional
+                Alternative way of passing options.
+            """))
+    doc_pieces.append(dedent("""\
+        memory_pool : pyarrow.MemoryPool, optional
+            If not passed, will allocate memory from the default memory pool.
+        """))
+    # 4. Custom addition (e.g. examples)
+    if doc_addition is not None:
+        doc_pieces.append("\n{}\n".format(dedent(doc_addition).strip("\n")))
+    wrapper.__doc__ = "".join(doc_pieces)
+    return wrapper
+def _get_options_class(func):
+    class_name = func._doc.options_class
+    if not class_name:
+        return None
+    try:
+        return globals()[class_name]
+    except KeyError:
+        warnings.warn("Python binding for {} not exposed"
+                      .format(class_name), RuntimeWarning)
+        return None
+def _handle_options(name, options_class, options, args, kwargs):
+    if args or kwargs:
+        if options is not None:
+            raise TypeError(
+                "Function {!r} called with both an 'options' argument "
+                "and additional arguments"
+                .format(name))
+        return options_class(*args, **kwargs)
+    if options is not None:
+        if isinstance(options, dict):
+            return options_class(**options)
+        elif isinstance(options, options_class):
+            return options
+        raise TypeError(
+            "Function {!r} expected a {} parameter, got {}"
+            .format(name, options_class, type(options)))
+    return None
+def _make_generic_wrapper(func_name, func, options_class, arity):
+    if options_class is None:
+        def wrapper(*args, memory_pool=None):
+            if arity is not Ellipsis and len(args) != arity:
+                raise TypeError(
+                    f"{func_name} takes {arity} positional argument(s), "
+                    f"but {len(args)} were given"
+                )
+            if args and isinstance(args[0], Expression):
+                return Expression._call(func_name, list(args))
+            return func.call(args, None, memory_pool)
+    else:
+        def wrapper(*args, memory_pool=None, options=None, **kwargs):
+            if arity is not Ellipsis:
+                if len(args) < arity:
+                    raise TypeError(
+                        f"{func_name} takes {arity} positional argument(s), "
+                        f"but {len(args)} were given"
+                    )
+                option_args = args[arity:]
+                args = args[:arity]
+            else:
+                option_args = ()
+            options = _handle_options(func_name, options_class, options,
+                                      option_args, kwargs)
+            if args and isinstance(args[0], Expression):
+                return Expression._call(func_name, list(args), options)
+            return func.call(args, options, memory_pool)
+    return wrapper
+def _make_signature(arg_names, var_arg_names, options_class):
+    from inspect import Parameter
+    params = []
+    for name in arg_names:
+        params.append(Parameter(name, Parameter.POSITIONAL_ONLY))
+    for name in var_arg_names:
+        params.append(Parameter(name, Parameter.VAR_POSITIONAL))
+    if options_class is not None:
+        options_sig = inspect.signature(options_class)
+        for p in options_sig.parameters.values():
+            assert p.kind in (Parameter.POSITIONAL_OR_KEYWORD,
+                              Parameter.KEYWORD_ONLY)
+            if var_arg_names:
+                # Cannot have a positional argument after a *args
+                p = p.replace(kind=Parameter.KEYWORD_ONLY)
+            params.append(p)
+        params.append(Parameter("options", Parameter.KEYWORD_ONLY,
+                                default=None))
+    params.append(Parameter("memory_pool", Parameter.KEYWORD_ONLY,
+                            default=None))
+    return inspect.Signature(params)
+def _wrap_function(name, func):
+    options_class = _get_options_class(func)
+    arg_names = _get_arg_names(func)
+    has_vararg = arg_names and arg_names[-1].startswith('*')
+    if has_vararg:
+        var_arg_names = [arg_names.pop().lstrip('*')]
+    else:
+        var_arg_names = []
+    wrapper = _make_generic_wrapper(
+        name, func, options_class, arity=func.arity)
+    wrapper.__signature__ = _make_signature(arg_names, var_arg_names,
+                                            options_class)
+    return _decorate_compute_function(wrapper, name, func, options_class)
+def _make_global_functions():
+    """
+    Make global functions wrapping each compute function.
+    Note that some of the automatically-generated wrappers may be overridden
+    by custom versions below.
+    """
+    g = globals()
+    reg = function_registry()
+    # Avoid clashes with Python keywords
+    rewrites = {'and': 'and_',
+                'or': 'or_'}
+    for cpp_name in reg.list_functions():
+        name = rewrites.get(cpp_name, cpp_name)
+        func = reg.get_function(cpp_name)
+        if func.kind == "hash_aggregate":
+            # Hash aggregate functions are not callable,
+            # so let's not expose them at module level.
+            continue
+        if func.kind == "scalar_aggregate" and func.arity == 0:
+            # Nullary scalar aggregate functions are not callable
+            # directly so let's not expose them at module level.
+            continue
+        assert name not in g, name
+        g[cpp_name] = g[name] = _wrap_function(name, func)
+_make_global_functions()
+def cast(arr, target_type=None, safe=None, options=None, memory_pool=None):
+    """
+    Cast array values to another data type. Can also be invoked as an array
+    instance method.
+    Parameters
+    ----------
+    arr : Array-like
+    target_type : DataType or str
+        Type to cast to
+    safe : bool, default True
+        Check for overflows or other unsafe conversions
+    options : CastOptions, default None
+        Additional checks pass by CastOptions
+    memory_pool : MemoryPool, optional
+        memory pool to use for allocations during function execution.
+    Examples
+    --------
+    >>> from datetime import datetime
+    >>> import pyarrow as pa
+    >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
+    >>> arr.type
+    TimestampType(timestamp[us])
+    You can use ``pyarrow.DataType`` objects to specify the target type:
+    >>> cast(arr, pa.timestamp('ms'))
+    <pyarrow.lib.TimestampArray object at ...>
+    [
+      2010-01-01 00:00:00.000,
+      2015-01-01 00:00:00.000
+    ]
+    >>> cast(arr, pa.timestamp('ms')).type
+    TimestampType(timestamp[ms])
+    Alternatively, it is also supported to use the string aliases for these
+    types:
+    >>> arr.cast('timestamp[ms]')
+    <pyarrow.lib.TimestampArray object at ...>
+    [
+      2010-01-01 00:00:00.000,
+      2015-01-01 00:00:00.000
+    ]
+    >>> arr.cast('timestamp[ms]').type
+    TimestampType(timestamp[ms])
+    Returns
+    -------
+    casted : Array
+        The cast result as a new Array
+    """
+    safe_vars_passed = (safe is not None) or (target_type is not None)
+    if safe_vars_passed and (options is not None):
+        raise ValueError("Must either pass values for 'target_type' and 'safe'"
+                         " or pass a value for 'options'")
+    if options is None:
+        target_type = pa.types.lib.ensure_type(target_type)
+        if safe is False:
+            options = CastOptions.unsafe(target_type)
+        else:
+            options = CastOptions.safe(target_type)
+    return call_function("cast", [arr], options, memory_pool)
+def index(data, value, start=None, end=None, *, memory_pool=None):
+    """
+    Find the index of the first occurrence of a given value.
+    Parameters
+    ----------
+    data : Array-like
+    value : Scalar-like object
+        The value to search for.
+    start : int, optional
+    end : int, optional
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    Returns
+    -------
+    index : int
+        the index, or -1 if not found
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["Lorem", "ipsum", "dolor", "sit", "Lorem", "ipsum"])
+    >>> pc.index(arr, "ipsum")
+    <pyarrow.Int64Scalar: 1>
+    >>> pc.index(arr, "ipsum", start=2)
+    <pyarrow.Int64Scalar: 5>
+    >>> pc.index(arr, "amet")
+    <pyarrow.Int64Scalar: -1>
+    """
+    if start is not None:
+        if end is not None:
+            data = data.slice(start, end - start)
+        else:
+            data = data.slice(start)
+    elif end is not None:
+        data = data.slice(0, end)
+    if not isinstance(value, pa.Scalar):
+        value = pa.scalar(value, type=data.type)
+    elif data.type != value.type:
+        value = pa.scalar(value.as_py(), type=data.type)
+    options = IndexOptions(value=value)
+    result = call_function('index', [data], options, memory_pool)
+    if start is not None and result.as_py() >= 0:
+        result = pa.scalar(result.as_py() + start, type=pa.int64())
+    return result
+def take(data, indices, *, boundscheck=True, memory_pool=None):
+    """
+    Select values (or records) from array- or table-like data given integer
+    selection indices.
+    The result will be of the same type(s) as the input, with elements taken
+    from the input array (or record batch / table fields) at the given
+    indices. If an index is null then the corresponding value in the output
+    will be null.
+    Parameters
+    ----------
+    data : Array, ChunkedArray, RecordBatch, or Table
+    indices : Array, ChunkedArray
+        Must be of integer type
+    boundscheck : boolean, default True
+        Whether to boundscheck the indices. If False and there is an out of
+        bounds index, will likely cause the process to crash.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    Returns
+    -------
+    result : depends on inputs
+        Selected values for the given indices
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> indices = pa.array([0, None, 4, 3])
+    >>> arr.take(indices)
+    <pyarrow.lib.StringArray object at ...>
+    [
+      "a",
+      null,
+      "e",
+      null
+    ]
+    """
+    options = TakeOptions(boundscheck=boundscheck)
+    return call_function('take', [data, indices], options, memory_pool)
+def fill_null(values, fill_value):
+    """Replace each null element in values with a corresponding
+    element from fill_value.
+    If fill_value is scalar-like, then every null element in values
+    will be replaced with fill_value. If fill_value is array-like,
+    then the i-th element in values will be replaced with the i-th
+    element in fill_value.
+    The fill_value's type must be the same as that of values, or it
+    must be able to be implicitly casted to the array's type.
+    This is an alias for :func:`coalesce`.
+    Parameters
+    ----------
+    values : Array, ChunkedArray, or Scalar-like object
+        Each null element is replaced with the corresponding value
+        from fill_value.
+    fill_value : Array, ChunkedArray, or Scalar-like object
+        If not same type as values, will attempt to cast.
+    Returns
+    -------
+    result : depends on inputs
+        Values with all null elements replaced
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> arr = pa.array([1, 2, None, 3], type=pa.int8())
+    >>> fill_value = pa.scalar(5, type=pa.int8())
+    >>> arr.fill_null(fill_value)
+    <pyarrow.lib.Int8Array object at ...>
+    [
+      1,
+      2,
+      5,
+      3
+    ]
+    >>> arr = pa.array([1, 2, None, 4, None])
+    >>> arr.fill_null(pa.array([10, 20, 30, 40, 50]))
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      1,
+      2,
+      30,
+      4,
+      50
+    ]
+    """
+    if not isinstance(fill_value, (pa.Array, pa.ChunkedArray, pa.Scalar)):
+        fill_value = pa.scalar(fill_value, type=values.type)
+    elif values.type != fill_value.type:
+        fill_value = pa.scalar(fill_value.as_py(), type=values.type)
+    return call_function("coalesce", [values, fill_value])
+def top_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
+    """
+    Select the indices of the top-k ordered elements from array- or table-like
+    data.
+    This is a specialization for :func:`select_k_unstable`. Output is not
+    guaranteed to be stable.
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get top indices from.
+    k : int
+        The number of `k` elements to keep.
+    sort_keys : List-like
+        Column key names to order by when input is table-like data.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    Returns
+    -------
+    result : Array
+        Indices of the top-k ordered elements
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> pc.top_k_unstable(arr, k=3)
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+      5,
+      4,
+      2
+    ]
+    """
+    if sort_keys is None:
+        sort_keys = []
+    if isinstance(values, (pa.Array, pa.ChunkedArray)):
+        sort_keys.append(("dummy", "descending"))
+    else:
+        sort_keys = map(lambda key_name: (key_name, "descending"), sort_keys)
+    options = SelectKOptions(k, sort_keys)
+    return call_function("select_k_unstable", [values], options, memory_pool)
+def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
+    """
+    Select the indices of the bottom-k ordered elements from
+    array- or table-like data.
+    This is a specialization for :func:`select_k_unstable`. Output is not
+    guaranteed to be stable.
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get bottom indices from.
+    k : int
+        The number of `k` elements to keep.
+    sort_keys : List-like
+        Column key names to order by when input is table-like data.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    Returns
+    -------
+    result : Array of indices
+        Indices of the bottom-k ordered elements
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> pc.bottom_k_unstable(arr, k=3)
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+    if sort_keys is None:
+        sort_keys = []
+    if isinstance(values, (pa.Array, pa.ChunkedArray)):
+        sort_keys.append(("dummy", "ascending"))
+    else:
+        sort_keys = map(lambda key_name: (key_name, "ascending"), sort_keys)
+    options = SelectKOptions(k, sort_keys)
+    return call_function("select_k_unstable", [values], options, memory_pool)
+def random(n, *, initializer='system', options=None, memory_pool=None):
+    """
+    Generate numbers in the range [0, 1).
+    Generated values are uniformly-distributed, double-precision
+    in range [0, 1). Algorithm and seed can be changed via RandomOptions.
+    Parameters
+    ----------
+    n : int
+        Number of values to generate, must be greater than or equal to 0
+    initializer : int or str
+        How to initialize the underlying random generator.
+        If an integer is given, it is used as a seed.
+        If "system" is given, the random generator is initialized with
+        a system-specific source of (hopefully true) randomness.
+        Other values are invalid.
+    options : pyarrow.compute.RandomOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+    options = RandomOptions(initializer=initializer)
+    return call_function("random", [], options, memory_pool, length=n)
+def field(*name_or_index):
+    """Reference a column of the dataset.
+    Stores only the field's name. Type and other information is known only when
+    the expression is bound to a dataset having an explicit scheme.
+    Nested references are allowed by passing multiple names or a tuple of
+    names. For example ``('foo', 'bar')`` references the field named "bar"
+    inside the field named "foo".
+    Parameters
+    ----------
+    *name_or_index : string, multiple strings, tuple or int
+        The name or index of the (possibly nested) field the expression
+        references to.
+    Returns
+    -------
+    field_expr : Expression
+        Reference to the given field
+    Examples
+    --------
+    >>> import pyarrow.compute as pc
+    >>> pc.field("a")
+    <pyarrow.compute.Expression a>
+    >>> pc.field(1)
+    <pyarrow.compute.Expression FieldPath(1)>
+    >>> pc.field(("a", "b"))
+    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
+    >>> pc.field("a", "b")
+    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
+    """
+    n = len(name_or_index)
+    if n == 1:
+        if isinstance(name_or_index[0], (str, int)):
+            return Expression._field(name_or_index[0])
+        elif isinstance(name_or_index[0], tuple):
+            return Expression._nested_field(name_or_index[0])
+        else:
+            raise TypeError(
+                "field reference should be str, multiple str, tuple or "
+                f"integer, got {type(name_or_index[0])}"
+            )
+    # In case of multiple strings not supplied in a tuple
+    else:
+        return Expression._nested_field(name_or_index)
+def scalar(value):
+    """Expression representing a scalar value.
+    Parameters
+    ----------
+    value : bool, int, float or string
+        Python value of the scalar. Note that only a subset of types are
+        currently supported.
+    Returns
+    -------
+    scalar_expr : Expression
+        An Expression representing the scalar value
+    """
+    return Expression._scalar(value)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/config.pxi ADDED Viewed

	@@ -0,0 +1,95 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from pyarrow.includes.libarrow cimport GetBuildInfo
+from collections import namedtuple
+import os
+VersionInfo = namedtuple('VersionInfo', ('major', 'minor', 'patch'))
+BuildInfo = namedtuple(
+    'BuildInfo',
+    ('version', 'version_info', 'so_version', 'full_so_version',
+     'compiler_id', 'compiler_version', 'compiler_flags',
+     'git_id', 'git_description', 'package_kind', 'build_type'))
+RuntimeInfo = namedtuple('RuntimeInfo',
+                         ('simd_level', 'detected_simd_level'))
+cdef _build_info():
+    cdef:
+        const CBuildInfo* c_info
+    c_info = &GetBuildInfo()
+    return BuildInfo(version=frombytes(c_info.version_string),
+                     version_info=VersionInfo(c_info.version_major,
+                                              c_info.version_minor,
+                                              c_info.version_patch),
+                     so_version=frombytes(c_info.so_version),
+                     full_so_version=frombytes(c_info.full_so_version),
+                     compiler_id=frombytes(c_info.compiler_id),
+                     compiler_version=frombytes(c_info.compiler_version),
+                     compiler_flags=frombytes(c_info.compiler_flags),
+                     git_id=frombytes(c_info.git_id),
+                     git_description=frombytes(c_info.git_description),
+                     package_kind=frombytes(c_info.package_kind),
+                     build_type=frombytes(c_info.build_type).lower(),
+                     )
+cpp_build_info = _build_info()
+cpp_version = cpp_build_info.version
+cpp_version_info = cpp_build_info.version_info
+def runtime_info():
+    """
+    Get runtime information.
+    Returns
+    -------
+    info : pyarrow.RuntimeInfo
+    """
+    cdef:
+        CRuntimeInfo c_info
+    c_info = GetRuntimeInfo()
+    return RuntimeInfo(
+        simd_level=frombytes(c_info.simd_level),
+        detected_simd_level=frombytes(c_info.detected_simd_level))
+def set_timezone_db_path(path):
+    """
+    Configure the path to text timezone database on Windows.
+    Parameters
+    ----------
+    path : str
+        Path to text timezone database.
+    """
+    cdef:
+        CGlobalOptions options
+    if path is not None:
+        options.timezone_db_path = <c_string>tobytes(path)
+    check_status(Initialize(options))

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/cuda.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# flake8: noqa
+from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,
+                           HostBuffer, BufferReader, BufferWriter,
+                           new_host_buffer,
+                           serialize_record_batch, read_message,
+                           read_record_batch)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/error.pxi ADDED Viewed

	@@ -0,0 +1,274 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from cpython.exc cimport PyErr_CheckSignals, PyErr_SetInterrupt
+from pyarrow.includes.libarrow cimport CStatus
+from pyarrow.includes.libarrow_python cimport IsPyError, RestorePyError
+from pyarrow.includes.common cimport c_string
+from contextlib import contextmanager
+import os
+import signal
+import threading
+from pyarrow.lib import is_threading_enabled
+from pyarrow.util import _break_traceback_cycle_from_frame
+class ArrowException(Exception):
+    pass
+class ArrowInvalid(ValueError, ArrowException):
+    pass
+class ArrowMemoryError(MemoryError, ArrowException):
+    pass
+class ArrowKeyError(KeyError, ArrowException):
+    def __str__(self):
+        # Override KeyError.__str__, as it uses the repr() of the key
+        return ArrowException.__str__(self)
+class ArrowTypeError(TypeError, ArrowException):
+    pass
+class ArrowNotImplementedError(NotImplementedError, ArrowException):
+    pass
+class ArrowCapacityError(ArrowException):
+    pass
+class ArrowIndexError(IndexError, ArrowException):
+    pass
+class ArrowSerializationError(ArrowException):
+    pass
+class ArrowCancelled(ArrowException):
+    def __init__(self, message, signum=None):
+        super().__init__(message)
+        self.signum = signum
+# Compatibility alias
+ArrowIOError = IOError
+# check_status() and convert_status() could be written directly in C++
+# if we didn't define Arrow-specific subclasses (ArrowInvalid etc.)
+cdef int check_status(const CStatus& status) except -1 nogil:
+    if status.ok():
+        return 0
+    with gil:
+        if IsPyError(status):
+            RestorePyError(status)
+            return -1
+        raise convert_status(status)
+cdef object convert_status(const CStatus& status):
+    if IsPyError(status):
+        try:
+            RestorePyError(status)
+        except BaseException as e:
+            return e
+    # We don't use Status::ToString() as it would redundantly include
+    # the C++ class name.
+    message = frombytes(status.message(), safe=True)
+    detail = status.detail()
+    if detail != nullptr:
+        message += ". Detail: " + frombytes(detail.get().ToString(),
+                                            safe=True)
+    if status.IsInvalid():
+        return ArrowInvalid(message)
+    elif status.IsIOError():
+        # Note: OSError constructor is
+        #   OSError(message)
+        # or
+        #   OSError(errno, message, filename=None)
+        # or (on Windows)
+        #   OSError(errno, message, filename, winerror)
+        errno = ErrnoFromStatus(status)
+        winerror = WinErrorFromStatus(status)
+        if winerror != 0:
+            return IOError(errno, message, None, winerror)
+        elif errno != 0:
+            return IOError(errno, message)
+        else:
+            return IOError(message)
+    elif status.IsOutOfMemory():
+        return ArrowMemoryError(message)
+    elif status.IsKeyError():
+        return ArrowKeyError(message)
+    elif status.IsNotImplemented():
+        return ArrowNotImplementedError(message)
+    elif status.IsTypeError():
+        return ArrowTypeError(message)
+    elif status.IsCapacityError():
+        return ArrowCapacityError(message)
+    elif status.IsIndexError():
+        return ArrowIndexError(message)
+    elif status.IsSerializationError():
+        return ArrowSerializationError(message)
+    elif status.IsCancelled():
+        signum = SignalFromStatus(status)
+        if signum > 0:
+            return ArrowCancelled(message, signum)
+        else:
+            return ArrowCancelled(message)
+    else:
+        message = frombytes(status.ToString(), safe=True)
+        return ArrowException(message)
+# These are API functions for C++ PyArrow
+cdef api int pyarrow_internal_check_status(const CStatus& status) \
+        except -1 nogil:
+    return check_status(status)
+cdef api object pyarrow_internal_convert_status(const CStatus& status):
+    return convert_status(status)
+cdef class StopToken:
+    cdef void init(self, CStopToken stop_token):
+        self.stop_token = move(stop_token)
+cdef c_bool signal_handlers_enabled = True
+def enable_signal_handlers(c_bool enable):
+    """
+    Enable or disable interruption of long-running operations.
+    By default, certain long running operations will detect user
+    interruptions, such as by pressing Ctrl-C.  This detection relies
+    on setting a signal handler for the duration of the long-running
+    operation, and may therefore interfere with other frameworks or
+    libraries (such as an event loop).
+    Parameters
+    ----------
+    enable : bool
+        Whether to enable user interruption by setting a temporary
+        signal handler.
+    """
+    global signal_handlers_enabled
+    signal_handlers_enabled = enable
+# For internal use
+# Whether we need a workaround for https://bugs.python.org/issue42248
+have_signal_refcycle = (sys.version_info < (3, 8, 10) or
+                        (3, 9) <= sys.version_info < (3, 9, 5) or
+                        sys.version_info[:2] == (3, 10))
+cdef class SignalStopHandler:
+    cdef:
+        StopToken _stop_token
+        vector[int] _signals
+        c_bool _enabled
+    def __cinit__(self):
+        self._enabled = False
+        self._init_signals()
+        if have_signal_refcycle:
+            _break_traceback_cycle_from_frame(sys._getframe(0))
+        self._stop_token = StopToken()
+        if not self._signals.empty():
+            maybe_source = SetSignalStopSource()
+            if not maybe_source.ok():
+                # See ARROW-11841 / ARROW-17173: in complex interaction
+                # scenarios (such as R calling into Python), SetSignalStopSource()
+                # may have already activated a signal-receiving StopSource.
+                # Just warn instead of erroring out.
+                maybe_source.status().Warn()
+            else:
+                self._stop_token.init(deref(maybe_source).token())
+                # signals don't work on Emscripten without threads.
+                # and possibly other single-thread environments.
+                self._enabled = is_threading_enabled()
+    def _init_signals(self):
+        if (signal_handlers_enabled and
+                threading.current_thread() is threading.main_thread()):
+            self._signals = [
+                sig for sig in (signal.SIGINT, signal.SIGTERM)
+                if signal.getsignal(sig) not in (signal.SIG_DFL,
+                                                 signal.SIG_IGN, None)]
+    def __enter__(self):
+        if self._enabled:
+            check_status(RegisterCancellingSignalHandler(self._signals))
+        return self
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        if self._enabled:
+            UnregisterCancellingSignalHandler()
+        if exc_value is None:
+            # Make sure we didn't lose a signal
+            try:
+                check_status(self._stop_token.stop_token.Poll())
+            except ArrowCancelled as e:
+                exc_value = e
+        if isinstance(exc_value, ArrowCancelled):
+            if exc_value.signum:
+                # Re-emit the exact same signal. We restored the Python signal
+                # handler above, so it should receive it.
+                if os.name == 'nt':
+                    SendSignal(exc_value.signum)
+                else:
+                    SendSignalToThread(exc_value.signum,
+                                       threading.main_thread().ident)
+            else:
+                # Simulate Python receiving a SIGINT
+                # (see https://bugs.python.org/issue43356 for why we can't
+                #  simulate the exact signal number)
+                PyErr_SetInterrupt()
+            # Maximize chances of the Python signal handler being executed now.
+            # Otherwise a potential KeyboardInterrupt might be missed by an
+            # immediately enclosing try/except block.
+            PyErr_CheckSignals()
+            # ArrowCancelled will be re-raised if PyErr_CheckSignals()
+            # returned successfully.
+    def __dealloc__(self):
+        if self._enabled:
+            ResetSignalStopSource()
+    @property
+    def stop_token(self):
+        return self._stop_token

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/fs.py ADDED Viewed

	@@ -0,0 +1,431 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+FileSystem abstraction to interact with various local and remote filesystems.
+"""
+from pyarrow.util import _is_path_like, _stringify_path
+from pyarrow._fs import (  # noqa
+    FileSelector,
+    FileType,
+    FileInfo,
+    FileSystem,
+    LocalFileSystem,
+    SubTreeFileSystem,
+    _MockFileSystem,
+    FileSystemHandler,
+    PyFileSystem,
+    _copy_files,
+    _copy_files_selector,
+)
+# For backward compatibility.
+FileStats = FileInfo
+_not_imported = []
+try:
+    from pyarrow._azurefs import AzureFileSystem  # noqa
+except ImportError:
+    _not_imported.append("AzureFileSystem")
+try:
+    from pyarrow._hdfs import HadoopFileSystem  # noqa
+except ImportError:
+    _not_imported.append("HadoopFileSystem")
+try:
+    from pyarrow._gcsfs import GcsFileSystem  # noqa
+except ImportError:
+    _not_imported.append("GcsFileSystem")
+try:
+    from pyarrow._s3fs import (  # noqa
+        AwsDefaultS3RetryStrategy, AwsStandardS3RetryStrategy,
+        S3FileSystem, S3LogLevel, S3RetryStrategy, ensure_s3_initialized,
+        finalize_s3, ensure_s3_finalized, initialize_s3, resolve_s3_region)
+except ImportError:
+    _not_imported.append("S3FileSystem")
+else:
+    # GH-38364: we don't initialize S3 eagerly as that could lead
+    # to crashes at shutdown even when S3 isn't used.
+    # Instead, S3 is initialized lazily using `ensure_s3_initialized`
+    # in assorted places.
+    import atexit
+    atexit.register(ensure_s3_finalized)
+def __getattr__(name):
+    if name in _not_imported:
+        raise ImportError(
+            "The pyarrow installation is not built with support for "
+            "'{0}'".format(name)
+        )
+    raise AttributeError(
+        "module 'pyarrow.fs' has no attribute '{0}'".format(name)
+    )
+def _filesystem_from_str(uri):
+    # instantiate the file system from an uri, if the uri has a path
+    # component then it will be treated as a path prefix
+    filesystem, prefix = FileSystem.from_uri(uri)
+    prefix = filesystem.normalize_path(prefix)
+    if prefix:
+        # validate that the prefix is pointing to a directory
+        prefix_info = filesystem.get_file_info([prefix])[0]
+        if prefix_info.type != FileType.Directory:
+            raise ValueError(
+                "The path component of the filesystem URI must point to a "
+                "directory but it has a type: `{}`. The path component "
+                "is `{}` and the given filesystem URI is `{}`".format(
+                    prefix_info.type.name, prefix_info.path, uri
+                )
+            )
+        filesystem = SubTreeFileSystem(prefix, filesystem)
+    return filesystem
+def _ensure_filesystem(filesystem, *, use_mmap=False):
+    if isinstance(filesystem, FileSystem):
+        return filesystem
+    elif isinstance(filesystem, str):
+        if use_mmap:
+            raise ValueError(
+                "Specifying to use memory mapping not supported for "
+                "filesystem specified as an URI string"
+            )
+        return _filesystem_from_str(filesystem)
+    # handle fsspec-compatible filesystems
+    try:
+        import fsspec
+    except ImportError:
+        pass
+    else:
+        if isinstance(filesystem, fsspec.AbstractFileSystem):
+            if type(filesystem).__name__ == 'LocalFileSystem':
+                # In case its a simple LocalFileSystem, use native arrow one
+                return LocalFileSystem(use_mmap=use_mmap)
+            return PyFileSystem(FSSpecHandler(filesystem))
+    raise TypeError(
+        "Unrecognized filesystem: {}. `filesystem` argument must be a "
+        "FileSystem instance or a valid file system URI'".format(
+            type(filesystem))
+    )
+def _resolve_filesystem_and_path(path, filesystem=None, *, memory_map=False):
+    """
+    Return filesystem/path from path which could be an URI or a plain
+    filesystem path.
+    """
+    if not _is_path_like(path):
+        if filesystem is not None:
+            raise ValueError(
+                "'filesystem' passed but the specified path is file-like, so"
+                " there is nothing to open with 'filesystem'."
+            )
+        return filesystem, path
+    if filesystem is not None:
+        filesystem = _ensure_filesystem(filesystem, use_mmap=memory_map)
+        if isinstance(filesystem, LocalFileSystem):
+            path = _stringify_path(path)
+        elif not isinstance(path, str):
+            raise TypeError(
+                "Expected string path; path-like objects are only allowed "
+                "with a local filesystem"
+            )
+        path = filesystem.normalize_path(path)
+        return filesystem, path
+    path = _stringify_path(path)
+    # if filesystem is not given, try to automatically determine one
+    # first check if the file exists as a local (relative) file path
+    # if not then try to parse the path as an URI
+    filesystem = LocalFileSystem(use_mmap=memory_map)
+    try:
+        file_info = filesystem.get_file_info(path)
+    except ValueError:  # ValueError means path is likely an URI
+        file_info = None
+        exists_locally = False
+    else:
+        exists_locally = (file_info.type != FileType.NotFound)
+    # if the file or directory doesn't exists locally, then assume that
+    # the path is an URI describing the file system as well
+    if not exists_locally:
+        try:
+            filesystem, path = FileSystem.from_uri(path)
+        except ValueError as e:
+            # neither an URI nor a locally existing path, so assume that
+            # local path was given and propagate a nicer file not found error
+            # instead of a more confusing scheme parsing error
+            if "empty scheme" not in str(e) \
+                    and "Cannot parse URI" not in str(e):
+                raise
+    else:
+        path = filesystem.normalize_path(path)
+    return filesystem, path
+def copy_files(source, destination,
+               source_filesystem=None, destination_filesystem=None,
+               *, chunk_size=1024*1024, use_threads=True):
+    """
+    Copy files between FileSystems.
+    This functions allows you to recursively copy directories of files from
+    one file system to another, such as from S3 to your local machine.
+    Parameters
+    ----------
+    source : string
+        Source file path or URI to a single file or directory.
+        If a directory, files will be copied recursively from this path.
+    destination : string
+        Destination file path or URI. If `source` is a file, `destination`
+        is also interpreted as the destination file (not directory).
+        Directories will be created as necessary.
+    source_filesystem : FileSystem, optional
+        Source filesystem, needs to be specified if `source` is not a URI,
+        otherwise inferred.
+    destination_filesystem : FileSystem, optional
+        Destination filesystem, needs to be specified if `destination` is not
+        a URI, otherwise inferred.
+    chunk_size : int, default 1MB
+        The maximum size of block to read before flushing to the
+        destination file. A larger chunk_size will use more memory while
+        copying but may help accommodate high latency FileSystems.
+    use_threads : bool, default True
+        Whether to use multiple threads to accelerate copying.
+    Examples
+    --------
+    Inspect an S3 bucket's files:
+    >>> s3, path = fs.FileSystem.from_uri(
+    ...            "s3://registry.opendata.aws/roda/ndjson/")
+    >>> selector = fs.FileSelector(path)
+    >>> s3.get_file_info(selector)
+    [<FileInfo for 'registry.opendata.aws/roda/ndjson/index.ndjson':...]
+    Copy one file from S3 bucket to a local directory:
+    >>> fs.copy_files("s3://registry.opendata.aws/roda/ndjson/index.ndjson",
+    ...               "file:///{}/index_copy.ndjson".format(local_path))
+    >>> fs.LocalFileSystem().get_file_info(str(local_path)+
+    ...                                    '/index_copy.ndjson')
+    <FileInfo for '.../index_copy.ndjson': type=FileType.File, size=...>
+    Copy file using a FileSystem object:
+    >>> fs.copy_files("registry.opendata.aws/roda/ndjson/index.ndjson",
+    ...               "file:///{}/index_copy.ndjson".format(local_path),
+    ...               source_filesystem=fs.S3FileSystem())
+    """
+    source_fs, source_path = _resolve_filesystem_and_path(
+        source, source_filesystem
+    )
+    destination_fs, destination_path = _resolve_filesystem_and_path(
+        destination, destination_filesystem
+    )
+    file_info = source_fs.get_file_info(source_path)
+    if file_info.type == FileType.Directory:
+        source_sel = FileSelector(source_path, recursive=True)
+        _copy_files_selector(source_fs, source_sel,
+                             destination_fs, destination_path,
+                             chunk_size, use_threads)
+    else:
+        _copy_files(source_fs, source_path,
+                    destination_fs, destination_path,
+                    chunk_size, use_threads)
+class FSSpecHandler(FileSystemHandler):
+    """
+    Handler for fsspec-based Python filesystems.
+    https://filesystem-spec.readthedocs.io/en/latest/index.html
+    Parameters
+    ----------
+    fs : FSSpec-compliant filesystem instance
+    Examples
+    --------
+    >>> PyFileSystem(FSSpecHandler(fsspec_fs)) # doctest: +SKIP
+    """
+    def __init__(self, fs):
+        self.fs = fs
+    def __eq__(self, other):
+        if isinstance(other, FSSpecHandler):
+            return self.fs == other.fs
+        return NotImplemented
+    def __ne__(self, other):
+        if isinstance(other, FSSpecHandler):
+            return self.fs != other.fs
+        return NotImplemented
+    def get_type_name(self):
+        protocol = self.fs.protocol
+        if isinstance(protocol, list):
+            protocol = protocol[0]
+        return "fsspec+{0}".format(protocol)
+    def normalize_path(self, path):
+        return path
+    @staticmethod
+    def _create_file_info(path, info):
+        size = info["size"]
+        if info["type"] == "file":
+            ftype = FileType.File
+        elif info["type"] == "directory":
+            ftype = FileType.Directory
+            # some fsspec filesystems include a file size for directories
+            size = None
+        else:
+            ftype = FileType.Unknown
+        return FileInfo(path, ftype, size=size, mtime=info.get("mtime", None))
+    def get_file_info(self, paths):
+        infos = []
+        for path in paths:
+            try:
+                info = self.fs.info(path)
+            except FileNotFoundError:
+                infos.append(FileInfo(path, FileType.NotFound))
+            else:
+                infos.append(self._create_file_info(path, info))
+        return infos
+    def get_file_info_selector(self, selector):
+        if not self.fs.isdir(selector.base_dir):
+            if self.fs.exists(selector.base_dir):
+                raise NotADirectoryError(selector.base_dir)
+            else:
+                if selector.allow_not_found:
+                    return []
+                else:
+                    raise FileNotFoundError(selector.base_dir)
+        if selector.recursive:
+            maxdepth = None
+        else:
+            maxdepth = 1
+        infos = []
+        selected_files = self.fs.find(
+            selector.base_dir, maxdepth=maxdepth, withdirs=True, detail=True
+        )
+        for path, info in selected_files.items():
+            _path = path.strip("/")
+            base_dir = selector.base_dir.strip("/")
+            # Need to exclude base directory from selected files if present
+            # (fsspec filesystems, see GH-37555)
+            if _path != base_dir:
+                infos.append(self._create_file_info(path, info))
+        return infos
+    def create_dir(self, path, recursive):
+        # mkdir also raises FileNotFoundError when base directory is not found
+        try:
+            self.fs.mkdir(path, create_parents=recursive)
+        except FileExistsError:
+            pass
+    def delete_dir(self, path):
+        self.fs.rm(path, recursive=True)
+    def _delete_dir_contents(self, path, missing_dir_ok):
+        try:
+            subpaths = self.fs.listdir(path, detail=False)
+        except FileNotFoundError:
+            if missing_dir_ok:
+                return
+            raise
+        for subpath in subpaths:
+            if self.fs.isdir(subpath):
+                self.fs.rm(subpath, recursive=True)
+            elif self.fs.isfile(subpath):
+                self.fs.rm(subpath)
+    def delete_dir_contents(self, path, missing_dir_ok):
+        if path.strip("/") == "":
+            raise ValueError(
+                "delete_dir_contents called on path '", path, "'")
+        self._delete_dir_contents(path, missing_dir_ok)
+    def delete_root_dir_contents(self):
+        self._delete_dir_contents("/")
+    def delete_file(self, path):
+        # fs.rm correctly raises IsADirectoryError when `path` is a directory
+        # instead of a file and `recursive` is not set to True
+        if not self.fs.exists(path):
+            raise FileNotFoundError(path)
+        self.fs.rm(path)
+    def move(self, src, dest):
+        self.fs.mv(src, dest, recursive=True)
+    def copy_file(self, src, dest):
+        # fs.copy correctly raises IsADirectoryError when `src` is a directory
+        # instead of a file
+        self.fs.copy(src, dest)
+    # TODO can we read/pass metadata (e.g. Content-Type) in the methods below?
+    def open_input_stream(self, path):
+        from pyarrow import PythonFile
+        if not self.fs.isfile(path):
+            raise FileNotFoundError(path)
+        return PythonFile(self.fs.open(path, mode="rb"), mode="r")
+    def open_input_file(self, path):
+        from pyarrow import PythonFile
+        if not self.fs.isfile(path):
+            raise FileNotFoundError(path)
+        return PythonFile(self.fs.open(path, mode="rb"), mode="r")
+    def open_output_stream(self, path, metadata):
+        from pyarrow import PythonFile
+        return PythonFile(self.fs.open(path, mode="wb"), mode="w")
+    def open_append_stream(self, path, metadata):
+        from pyarrow import PythonFile
+        return PythonFile(self.fs.open(path, mode="ab"), mode="w")

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.pxi ADDED Viewed

	@@ -0,0 +1,1403 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from cpython.pycapsule cimport PyCapsule_CheckExact, PyCapsule_GetPointer, PyCapsule_New
+from collections import namedtuple
+import warnings
+from cython import sizeof
+cpdef enum MetadataVersion:
+    V1 = <char> CMetadataVersion_V1
+    V2 = <char> CMetadataVersion_V2
+    V3 = <char> CMetadataVersion_V3
+    V4 = <char> CMetadataVersion_V4
+    V5 = <char> CMetadataVersion_V5
+cdef object _wrap_metadata_version(CMetadataVersion version):
+    return MetadataVersion(<char> version)
+cdef CMetadataVersion _unwrap_metadata_version(
+        MetadataVersion version) except *:
+    if version == MetadataVersion.V1:
+        return CMetadataVersion_V1
+    elif version == MetadataVersion.V2:
+        return CMetadataVersion_V2
+    elif version == MetadataVersion.V3:
+        return CMetadataVersion_V3
+    elif version == MetadataVersion.V4:
+        return CMetadataVersion_V4
+    elif version == MetadataVersion.V5:
+        return CMetadataVersion_V5
+    raise ValueError("Not a metadata version: " + repr(version))
+_WriteStats = namedtuple(
+    'WriteStats',
+    ('num_messages', 'num_record_batches', 'num_dictionary_batches',
+     'num_dictionary_deltas', 'num_replaced_dictionaries'))
+class WriteStats(_WriteStats):
+    """IPC write statistics
+    Parameters
+    ----------
+    num_messages : int
+        Number of messages.
+    num_record_batches : int
+        Number of record batches.
+    num_dictionary_batches : int
+        Number of dictionary batches.
+    num_dictionary_deltas : int
+        Delta of dictionaries.
+    num_replaced_dictionaries : int
+        Number of replaced dictionaries.
+    """
+    __slots__ = ()
+@staticmethod
+cdef _wrap_write_stats(CIpcWriteStats c):
+    return WriteStats(c.num_messages, c.num_record_batches,
+                      c.num_dictionary_batches, c.num_dictionary_deltas,
+                      c.num_replaced_dictionaries)
+_ReadStats = namedtuple(
+    'ReadStats',
+    ('num_messages', 'num_record_batches', 'num_dictionary_batches',
+     'num_dictionary_deltas', 'num_replaced_dictionaries'))
+class ReadStats(_ReadStats):
+    """IPC read statistics
+    Parameters
+    ----------
+    num_messages : int
+        Number of messages.
+    num_record_batches : int
+        Number of record batches.
+    num_dictionary_batches : int
+        Number of dictionary batches.
+    num_dictionary_deltas : int
+        Delta of dictionaries.
+    num_replaced_dictionaries : int
+        Number of replaced dictionaries.
+    """
+    __slots__ = ()
+@staticmethod
+cdef _wrap_read_stats(CIpcReadStats c):
+    return ReadStats(c.num_messages, c.num_record_batches,
+                     c.num_dictionary_batches, c.num_dictionary_deltas,
+                     c.num_replaced_dictionaries)
+cdef class IpcReadOptions(_Weakrefable):
+    """
+    Serialization options for reading IPC format.
+    Parameters
+    ----------
+    ensure_native_endian : bool, default True
+        Whether to convert incoming data to platform-native endianness.
+    use_threads : bool
+        Whether to use the global CPU thread pool to parallelize any
+        computational tasks like decompression
+    included_fields : list
+        If empty (the default), return all deserialized fields.
+        If non-empty, the values are the indices of fields to read on
+        the top-level schema
+    """
+    __slots__ = ()
+    # cdef block is in lib.pxd
+    def __init__(self, *, bint ensure_native_endian=True,
+                 bint use_threads=True, list included_fields=None):
+        self.c_options = CIpcReadOptions.Defaults()
+        self.ensure_native_endian = ensure_native_endian
+        self.use_threads = use_threads
+        if included_fields is not None:
+            self.included_fields = included_fields
+    @property
+    def ensure_native_endian(self):
+        return self.c_options.ensure_native_endian
+    @ensure_native_endian.setter
+    def ensure_native_endian(self, bint value):
+        self.c_options.ensure_native_endian = value
+    @property
+    def use_threads(self):
+        return self.c_options.use_threads
+    @use_threads.setter
+    def use_threads(self, bint value):
+        self.c_options.use_threads = value
+    @property
+    def included_fields(self):
+        return self.c_options.included_fields
+    @included_fields.setter
+    def included_fields(self, list value not None):
+        self.c_options.included_fields = value
+cdef class IpcWriteOptions(_Weakrefable):
+    """
+    Serialization options for the IPC format.
+    Parameters
+    ----------
+    metadata_version : MetadataVersion, default MetadataVersion.V5
+        The metadata version to write.  V5 is the current and latest,
+        V4 is the pre-1.0 metadata version (with incompatible Union layout).
+    allow_64bit : bool, default False
+        If true, allow field lengths that don't fit in a signed 32-bit int.
+    use_legacy_format : bool, default False
+        Whether to use the pre-Arrow 0.15 IPC format.
+    compression : str, Codec, or None
+        compression codec to use for record batch buffers.
+        If None then batch buffers will be uncompressed.
+        Must be "lz4", "zstd" or None.
+        To specify a compression_level use `pyarrow.Codec`
+    use_threads : bool
+        Whether to use the global CPU thread pool to parallelize any
+        computational tasks like compression.
+    emit_dictionary_deltas : bool
+        Whether to emit dictionary deltas.  Default is false for maximum
+        stream compatibility.
+    unify_dictionaries : bool
+        If true then calls to write_table will attempt to unify dictionaries
+        across all batches in the table.  This can help avoid the need for
+        replacement dictionaries (which the file format does not support)
+        but requires computing the unified dictionary and then remapping
+        the indices arrays.
+        This parameter is ignored when writing to the IPC stream format as
+        the IPC stream format can support replacement dictionaries.
+    """
+    __slots__ = ()
+    # cdef block is in lib.pxd
+    def __init__(self, *, metadata_version=MetadataVersion.V5,
+                 bint allow_64bit=False, use_legacy_format=False,
+                 compression=None, bint use_threads=True,
+                 bint emit_dictionary_deltas=False,
+                 bint unify_dictionaries=False):
+        self.c_options = CIpcWriteOptions.Defaults()
+        self.allow_64bit = allow_64bit
+        self.use_legacy_format = use_legacy_format
+        self.metadata_version = metadata_version
+        if compression is not None:
+            self.compression = compression
+        self.use_threads = use_threads
+        self.emit_dictionary_deltas = emit_dictionary_deltas
+        self.unify_dictionaries = unify_dictionaries
+    @property
+    def allow_64bit(self):
+        return self.c_options.allow_64bit
+    @allow_64bit.setter
+    def allow_64bit(self, bint value):
+        self.c_options.allow_64bit = value
+    @property
+    def use_legacy_format(self):
+        return self.c_options.write_legacy_ipc_format
+    @use_legacy_format.setter
+    def use_legacy_format(self, bint value):
+        self.c_options.write_legacy_ipc_format = value
+    @property
+    def metadata_version(self):
+        return _wrap_metadata_version(self.c_options.metadata_version)
+    @metadata_version.setter
+    def metadata_version(self, value):
+        self.c_options.metadata_version = _unwrap_metadata_version(value)
+    @property
+    def compression(self):
+        if self.c_options.codec == nullptr:
+            return None
+        else:
+            return frombytes(self.c_options.codec.get().name())
+    @compression.setter
+    def compression(self, value):
+        if value is None:
+            self.c_options.codec.reset()
+        elif isinstance(value, str):
+            codec_type = _ensure_compression(value)
+            if codec_type != CCompressionType_ZSTD and codec_type != CCompressionType_LZ4_FRAME:
+                raise ValueError("Compression type must be lz4, zstd or None")
+            self.c_options.codec = shared_ptr[CCodec](GetResultValue(
+                CCodec.Create(codec_type)).release())
+        elif isinstance(value, Codec):
+            if value.name != "lz4" and value.name != "zstd":
+                raise ValueError("Compression type must be lz4, zstd or None")
+            self.c_options.codec = (<Codec>value).wrapped
+        else:
+            raise TypeError(
+                "Property `compression` must be None, str, or pyarrow.Codec")
+    @property
+    def use_threads(self):
+        return self.c_options.use_threads
+    @use_threads.setter
+    def use_threads(self, bint value):
+        self.c_options.use_threads = value
+    @property
+    def emit_dictionary_deltas(self):
+        return self.c_options.emit_dictionary_deltas
+    @emit_dictionary_deltas.setter
+    def emit_dictionary_deltas(self, bint value):
+        self.c_options.emit_dictionary_deltas = value
+    @property
+    def unify_dictionaries(self):
+        return self.c_options.unify_dictionaries
+    @unify_dictionaries.setter
+    def unify_dictionaries(self, bint value):
+        self.c_options.unify_dictionaries = value
+cdef class Message(_Weakrefable):
+    """
+    Container for an Arrow IPC message with metadata and optional body
+    """
+    def __cinit__(self):
+        pass
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use "
+                        "`pyarrow.ipc.read_message` function instead."
+                        .format(self.__class__.__name__))
+    @property
+    def type(self):
+        return frombytes(FormatMessageType(self.message.get().type()))
+    @property
+    def metadata(self):
+        return pyarrow_wrap_buffer(self.message.get().metadata())
+    @property
+    def metadata_version(self):
+        return _wrap_metadata_version(self.message.get().metadata_version())
+    @property
+    def body(self):
+        cdef shared_ptr[CBuffer] body = self.message.get().body()
+        if body.get() == NULL:
+            return None
+        else:
+            return pyarrow_wrap_buffer(body)
+    def equals(self, Message other):
+        """
+        Returns True if the message contents (metadata and body) are identical
+        Parameters
+        ----------
+        other : Message
+        Returns
+        -------
+        are_equal : bool
+        """
+        cdef c_bool result
+        with nogil:
+            result = self.message.get().Equals(deref(other.message.get()))
+        return result
+    def serialize_to(self, NativeFile sink, alignment=8, memory_pool=None):
+        """
+        Write message to generic OutputStream
+        Parameters
+        ----------
+        sink : NativeFile
+        alignment : int, default 8
+            Byte alignment for metadata and body
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+        """
+        cdef:
+            int64_t output_length = 0
+            COutputStream* out
+            CIpcWriteOptions options
+        options.alignment = alignment
+        out = sink.get_output_stream().get()
+        with nogil:
+            check_status(self.message.get()
+                         .SerializeTo(out, options, &output_length))
+    def serialize(self, alignment=8, memory_pool=None):
+        """
+        Write message as encapsulated IPC message
+        Parameters
+        ----------
+        alignment : int, default 8
+            Byte alignment for metadata and body
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+        Returns
+        -------
+        serialized : Buffer
+        """
+        stream = BufferOutputStream(memory_pool)
+        self.serialize_to(stream, alignment=alignment, memory_pool=memory_pool)
+        return stream.getvalue()
+    def __repr__(self):
+        if self.message == nullptr:
+            return """pyarrow.Message(uninitialized)"""
+        metadata_len = self.metadata.size
+        body = self.body
+        body_len = 0 if body is None else body.size
+        return """pyarrow.Message
+type: {0}
+metadata length: {1}
+body length: {2}""".format(self.type, metadata_len, body_len)
+cdef class MessageReader(_Weakrefable):
+    """
+    Interface for reading Message objects from some source (like an
+    InputStream)
+    """
+    cdef:
+        unique_ptr[CMessageReader] reader
+    def __cinit__(self):
+        pass
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use "
+                        "`pyarrow.ipc.MessageReader.open_stream` function "
+                        "instead.".format(self.__class__.__name__))
+    @staticmethod
+    def open_stream(source):
+        """
+        Open stream from source, if you want to use memory map use
+        MemoryMappedFile as source.
+        Parameters
+        ----------
+        source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+            A readable source, like an InputStream
+        """
+        cdef:
+            MessageReader result = MessageReader.__new__(MessageReader)
+            shared_ptr[CInputStream] in_stream
+            unique_ptr[CMessageReader] reader
+        _get_input_stream(source, &in_stream)
+        with nogil:
+            reader = CMessageReader.Open(in_stream)
+            result.reader.reset(reader.release())
+        return result
+    def __iter__(self):
+        return self
+    def __next__(self):
+        return self.read_next_message()
+    def read_next_message(self):
+        """
+        Read next Message from the stream.
+        Raises
+        ------
+        StopIteration
+            At end of stream
+        """
+        cdef Message result = Message.__new__(Message)
+        with nogil:
+            result.message = move(GetResultValue(self.reader.get()
+                                                 .ReadNextMessage()))
+        if result.message.get() == NULL:
+            raise StopIteration
+        return result
+# ----------------------------------------------------------------------
+# File and stream readers and writers
+cdef class _CRecordBatchWriter(_Weakrefable):
+    """The base RecordBatchWriter wrapper.
+    Provides common implementations of convenience methods. Should not
+    be instantiated directly by user code.
+    """
+    # cdef block is in lib.pxd
+    def write(self, table_or_batch):
+        """
+        Write RecordBatch or Table to stream.
+        Parameters
+        ----------
+        table_or_batch : {RecordBatch, Table}
+        """
+        if isinstance(table_or_batch, RecordBatch):
+            self.write_batch(table_or_batch)
+        elif isinstance(table_or_batch, Table):
+            self.write_table(table_or_batch)
+        else:
+            raise ValueError(type(table_or_batch))
+    def write_batch(self, RecordBatch batch, custom_metadata=None):
+        """
+        Write RecordBatch to stream.
+        Parameters
+        ----------
+        batch : RecordBatch
+        custom_metadata : mapping or KeyValueMetadata
+            Keys and values must be string-like / coercible to bytes
+        """
+        metadata = ensure_metadata(custom_metadata, allow_none=True)
+        c_meta = pyarrow_unwrap_metadata(metadata)
+        with nogil:
+            check_status(self.writer.get()
+                         .WriteRecordBatch(deref(batch.batch), c_meta))
+    def write_table(self, Table table, max_chunksize=None):
+        """
+        Write Table to stream in (contiguous) RecordBatch objects.
+        Parameters
+        ----------
+        table : Table
+        max_chunksize : int, default None
+            Maximum number of rows for RecordBatch chunks. Individual chunks may
+            be smaller depending on the chunk layout of individual columns.
+        """
+        cdef:
+            # max_chunksize must be > 0 to have any impact
+            int64_t c_max_chunksize = -1
+        if max_chunksize is not None:
+            c_max_chunksize = max_chunksize
+        with nogil:
+            check_status(self.writer.get().WriteTable(table.table[0],
+                                                      c_max_chunksize))
+    def close(self):
+        """
+        Close stream and write end-of-stream 0 marker.
+        """
+        with nogil:
+            check_status(self.writer.get().Close())
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+    @property
+    def stats(self):
+        """
+        Current IPC write statistics.
+        """
+        if not self.writer:
+            raise ValueError("Operation on closed writer")
+        return _wrap_write_stats(self.writer.get().stats())
+cdef class _RecordBatchStreamWriter(_CRecordBatchWriter):
+    cdef:
+        CIpcWriteOptions options
+        bint closed
+    def __cinit__(self):
+        pass
+    def __dealloc__(self):
+        pass
+    @property
+    def _use_legacy_format(self):
+        # For testing (see test_ipc.py)
+        return self.options.write_legacy_ipc_format
+    @property
+    def _metadata_version(self):
+        # For testing (see test_ipc.py)
+        return _wrap_metadata_version(self.options.metadata_version)
+    def _open(self, sink, Schema schema not None,
+              IpcWriteOptions options=IpcWriteOptions()):
+        cdef:
+            shared_ptr[COutputStream] c_sink
+        self.options = options.c_options
+        get_writer(sink, &c_sink)
+        with nogil:
+            self.writer = GetResultValue(
+                MakeStreamWriter(c_sink, schema.sp_schema,
+                                 self.options))
+cdef _get_input_stream(object source, shared_ptr[CInputStream]* out):
+    try:
+        source = as_buffer(source)
+    except TypeError:
+        # Non-buffer-like
+        pass
+    get_input_stream(source, True, out)
+class _ReadPandasMixin:
+    def read_pandas(self, **options):
+        """
+        Read contents of stream to a pandas.DataFrame.
+        Read all record batches as a pyarrow.Table then convert it to a
+        pandas.DataFrame using Table.to_pandas.
+        Parameters
+        ----------
+        **options
+            Arguments to forward to :meth:`Table.to_pandas`.
+        Returns
+        -------
+        df : pandas.DataFrame
+        """
+        table = self.read_all()
+        return table.to_pandas(**options)
+cdef class RecordBatchReader(_Weakrefable):
+    """Base class for reading stream of record batches.
+    Record batch readers function as iterators of record batches that also
+    provide the schema (without the need to get any batches).
+    Warnings
+    --------
+    Do not call this class's constructor directly, use one of the
+    ``RecordBatchReader.from_*`` functions instead.
+    Notes
+    -----
+    To import and export using the Arrow C stream interface, use the
+    ``_import_from_c`` and ``_export_to_c`` methods. However, keep in mind this
+    interface is intended for expert users.
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> schema = pa.schema([('x', pa.int64())])
+    >>> def iter_record_batches():
+    ...     for i in range(2):
+    ...         yield pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], schema=schema)
+    >>> reader = pa.RecordBatchReader.from_batches(schema, iter_record_batches())
+    >>> print(reader.schema)
+    x: int64
+    >>> for batch in reader:
+    ...     print(batch)
+    pyarrow.RecordBatch
+    x: int64
+    ----
+    x: [1,2,3]
+    pyarrow.RecordBatch
+    x: int64
+    ----
+    x: [1,2,3]
+    """
+    # cdef block is in lib.pxd
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, "
+                        "use one of the RecordBatchReader.from_* functions instead."
+                        .format(self.__class__.__name__))
+    def __iter__(self):
+        return self
+    def __next__(self):
+        return self.read_next_batch()
+    @property
+    def schema(self):
+        """
+        Shared schema of the record batches in the stream.
+        Returns
+        -------
+        Schema
+        """
+        cdef shared_ptr[CSchema] c_schema
+        with nogil:
+            c_schema = self.reader.get().schema()
+        return pyarrow_wrap_schema(c_schema)
+    def read_next_batch(self):
+        """
+        Read next RecordBatch from the stream.
+        Raises
+        ------
+        StopIteration:
+            At end of stream.
+        Returns
+        -------
+        RecordBatch
+        """
+        cdef shared_ptr[CRecordBatch] batch
+        with nogil:
+            check_status(self.reader.get().ReadNext(&batch))
+        if batch.get() == NULL:
+            raise StopIteration
+        return pyarrow_wrap_batch(batch)
+    def read_next_batch_with_custom_metadata(self):
+        """
+        Read next RecordBatch from the stream along with its custom metadata.
+        Raises
+        ------
+        StopIteration:
+            At end of stream.
+        Returns
+        -------
+        batch : RecordBatch
+        custom_metadata : KeyValueMetadata
+        """
+        cdef:
+            CRecordBatchWithMetadata batch_with_metadata
+        with nogil:
+            batch_with_metadata = GetResultValue(self.reader.get().ReadNext())
+        if batch_with_metadata.batch.get() == NULL:
+            raise StopIteration
+        return _wrap_record_batch_with_metadata(batch_with_metadata)
+    def iter_batches_with_custom_metadata(self):
+        """
+        Iterate over record batches from the stream along with their custom
+        metadata.
+        Yields
+        ------
+        RecordBatchWithMetadata
+        """
+        while True:
+            try:
+                yield self.read_next_batch_with_custom_metadata()
+            except StopIteration:
+                return
+    def read_all(self):
+        """
+        Read all record batches as a pyarrow.Table.
+        Returns
+        -------
+        Table
+        """
+        cdef shared_ptr[CTable] table
+        with nogil:
+            check_status(self.reader.get().ToTable().Value(&table))
+        return pyarrow_wrap_table(table)
+    read_pandas = _ReadPandasMixin.read_pandas
+    def close(self):
+        """
+        Release any resources associated with the reader.
+        """
+        with nogil:
+            check_status(self.reader.get().Close())
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+    def cast(self, target_schema):
+        """
+        Wrap this reader with one that casts each batch lazily as it is pulled.
+        Currently only a safe cast to target_schema is implemented.
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+        Returns
+        -------
+        RecordBatchReader
+        """
+        cdef:
+            shared_ptr[CSchema] c_schema
+            shared_ptr[CRecordBatchReader] c_reader
+            RecordBatchReader out
+        if self.schema.names != target_schema.names:
+            raise ValueError("Target schema's field names are not matching "
+                             f"the table's field names: {self.schema.names}, "
+                             f"{target_schema.names}")
+        c_schema = pyarrow_unwrap_schema(target_schema)
+        c_reader = GetResultValue(CCastingRecordBatchReader.Make(
+            self.reader, c_schema))
+        out = RecordBatchReader.__new__(RecordBatchReader)
+        out.reader = c_reader
+        return out
+    def _export_to_c(self, out_ptr):
+        """
+        Export to a C ArrowArrayStream struct, given its pointer.
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowArrayStream struct.
+        Be careful: if you don't pass the ArrowArrayStream struct to a
+        consumer, array memory will leak.  This is a low-level function
+        intended for expert users.
+        """
+        cdef:
+            void* c_ptr = _as_c_pointer(out_ptr)
+        with nogil:
+            check_status(ExportRecordBatchReader(
+                self.reader, <ArrowArrayStream*> c_ptr))
+    @staticmethod
+    def _import_from_c(in_ptr):
+        """
+        Import RecordBatchReader from a C ArrowArrayStream struct,
+        given its pointer.
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowArrayStream struct.
+        This is a low-level function intended for expert users.
+        """
+        cdef:
+            void* c_ptr = _as_c_pointer(in_ptr)
+            shared_ptr[CRecordBatchReader] c_reader
+            RecordBatchReader self
+        with nogil:
+            c_reader = GetResultValue(ImportRecordBatchReader(
+                <ArrowArrayStream*> c_ptr))
+        self = RecordBatchReader.__new__(RecordBatchReader)
+        self.reader = c_reader
+        return self
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export to a C ArrowArrayStream PyCapsule.
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+        Returns
+        -------
+        PyCapsule
+            A capsule containing a C ArrowArrayStream struct.
+        """
+        cdef:
+            ArrowArrayStream* c_stream
+        if requested_schema is not None:
+            out_schema = Schema._import_from_c_capsule(requested_schema)
+            if self.schema != out_schema:
+                return self.cast(out_schema).__arrow_c_stream__()
+        stream_capsule = alloc_c_stream(&c_stream)
+        with nogil:
+            check_status(ExportRecordBatchReader(self.reader, c_stream))
+        return stream_capsule
+    @staticmethod
+    def _import_from_c_capsule(stream):
+        """
+        Import RecordBatchReader from a C ArrowArrayStream PyCapsule.
+        Parameters
+        ----------
+        stream: PyCapsule
+            A capsule containing a C ArrowArrayStream PyCapsule.
+        Returns
+        -------
+        RecordBatchReader
+        """
+        cdef:
+            ArrowArrayStream* c_stream
+            shared_ptr[CRecordBatchReader] c_reader
+            RecordBatchReader self
+        c_stream = <ArrowArrayStream*>PyCapsule_GetPointer(
+            stream, 'arrow_array_stream'
+        )
+        with nogil:
+            c_reader = GetResultValue(ImportRecordBatchReader(c_stream))
+        self = RecordBatchReader.__new__(RecordBatchReader)
+        self.reader = c_reader
+        return self
+    @staticmethod
+    def from_stream(data, schema=None):
+        """
+        Create RecordBatchReader from a Arrow-compatible stream object.
+        This accepts objects implementing the Arrow PyCapsule Protocol for
+        streams, i.e. objects that have a ``__arrow_c_stream__`` method.
+        Parameters
+        ----------
+        data : Arrow-compatible stream object
+            Any object that implements the Arrow PyCapsule Protocol for
+            streams.
+        schema : Schema, default None
+            The schema to which the stream should be casted, if supported
+            by the stream object.
+        Returns
+        -------
+        RecordBatchReader
+        """
+        if not hasattr(data, "__arrow_c_stream__"):
+            raise TypeError(
+                "Expected an object implementing the Arrow PyCapsule Protocol for "
+                "streams (i.e. having a `__arrow_c_stream__` method), "
+                f"got {type(data)!r}."
+            )
+        if schema is not None:
+            if not hasattr(schema, "__arrow_c_schema__"):
+                raise TypeError(
+                    "Expected an object implementing the Arrow PyCapsule Protocol for "
+                    "schema (i.e. having a `__arrow_c_schema__` method), "
+                    f"got {type(schema)!r}."
+                )
+            requested = schema.__arrow_c_schema__()
+        else:
+            requested = None
+        capsule = data.__arrow_c_stream__(requested)
+        return RecordBatchReader._import_from_c_capsule(capsule)
+    @staticmethod
+    def from_batches(Schema schema not None, batches):
+        """
+        Create RecordBatchReader from an iterable of batches.
+        Parameters
+        ----------
+        schema : Schema
+            The shared schema of the record batches
+        batches : Iterable[RecordBatch]
+            The batches that this reader will return.
+        Returns
+        -------
+        reader : RecordBatchReader
+        """
+        cdef:
+            shared_ptr[CSchema] c_schema
+            shared_ptr[CRecordBatchReader] c_reader
+            RecordBatchReader self
+        c_schema = pyarrow_unwrap_schema(schema)
+        c_reader = GetResultValue(CPyRecordBatchReader.Make(
+            c_schema, batches))
+        self = RecordBatchReader.__new__(RecordBatchReader)
+        self.reader = c_reader
+        return self
+cdef class _RecordBatchStreamReader(RecordBatchReader):
+    cdef:
+        shared_ptr[CInputStream] in_stream
+        CIpcReadOptions options
+        CRecordBatchStreamReader* stream_reader
+    def __cinit__(self):
+        pass
+    def _open(self, source, IpcReadOptions options=IpcReadOptions(),
+              MemoryPool memory_pool=None):
+        self.options = options.c_options
+        self.options.memory_pool = maybe_unbox_memory_pool(memory_pool)
+        _get_input_stream(source, &self.in_stream)
+        with nogil:
+            self.reader = GetResultValue(CRecordBatchStreamReader.Open(
+                self.in_stream, self.options))
+            self.stream_reader = <CRecordBatchStreamReader*> self.reader.get()
+    @property
+    def stats(self):
+        """
+        Current IPC read statistics.
+        """
+        if not self.reader:
+            raise ValueError("Operation on closed reader")
+        return _wrap_read_stats(self.stream_reader.stats())
+cdef class _RecordBatchFileWriter(_RecordBatchStreamWriter):
+    def _open(self, sink, Schema schema not None,
+              IpcWriteOptions options=IpcWriteOptions()):
+        cdef:
+            shared_ptr[COutputStream] c_sink
+        self.options = options.c_options
+        get_writer(sink, &c_sink)
+        with nogil:
+            self.writer = GetResultValue(
+                MakeFileWriter(c_sink, schema.sp_schema, self.options))
+_RecordBatchWithMetadata = namedtuple(
+    'RecordBatchWithMetadata',
+    ('batch', 'custom_metadata'))
+class RecordBatchWithMetadata(_RecordBatchWithMetadata):
+    """RecordBatch with its custom metadata
+    Parameters
+    ----------
+    batch : RecordBatch
+    custom_metadata : KeyValueMetadata
+    """
+    __slots__ = ()
+@staticmethod
+cdef _wrap_record_batch_with_metadata(CRecordBatchWithMetadata c):
+    return RecordBatchWithMetadata(pyarrow_wrap_batch(c.batch),
+                                   pyarrow_wrap_metadata(c.custom_metadata))
+cdef class _RecordBatchFileReader(_Weakrefable):
+    cdef:
+        SharedPtrNoGIL[CRecordBatchFileReader] reader
+        shared_ptr[CRandomAccessFile] file
+        CIpcReadOptions options
+    cdef readonly:
+        Schema schema
+    def __cinit__(self):
+        pass
+    def _open(self, source, footer_offset=None,
+              IpcReadOptions options=IpcReadOptions(),
+              MemoryPool memory_pool=None):
+        self.options = options.c_options
+        self.options.memory_pool = maybe_unbox_memory_pool(memory_pool)
+        try:
+            source = as_buffer(source)
+        except TypeError:
+            pass
+        get_reader(source, False, &self.file)
+        cdef int64_t offset = 0
+        if footer_offset is not None:
+            offset = footer_offset
+        with nogil:
+            if offset != 0:
+                self.reader = GetResultValue(
+                    CRecordBatchFileReader.Open2(self.file.get(), offset,
+                                                 self.options))
+            else:
+                self.reader = GetResultValue(
+                    CRecordBatchFileReader.Open(self.file.get(),
+                                                self.options))
+        self.schema = pyarrow_wrap_schema(self.reader.get().schema())
+    @property
+    def num_record_batches(self):
+        """
+        The number of record batches in the IPC file.
+        """
+        return self.reader.get().num_record_batches()
+    def get_batch(self, int i):
+        """
+        Read the record batch with the given index.
+        Parameters
+        ----------
+        i : int
+            The index of the record batch in the IPC file.
+        Returns
+        -------
+        batch : RecordBatch
+        """
+        cdef shared_ptr[CRecordBatch] batch
+        if i < 0 or i >= self.num_record_batches:
+            raise ValueError('Batch number {0} out of range'.format(i))
+        with nogil:
+            batch = GetResultValue(self.reader.get().ReadRecordBatch(i))
+        return pyarrow_wrap_batch(batch)
+    # TODO(wesm): ARROW-503: Function was renamed. Remove after a period of
+    # time has passed
+    get_record_batch = get_batch
+    def get_batch_with_custom_metadata(self, int i):
+        """
+        Read the record batch with the given index along with
+        its custom metadata
+        Parameters
+        ----------
+        i : int
+            The index of the record batch in the IPC file.
+        Returns
+        -------
+        batch : RecordBatch
+        custom_metadata : KeyValueMetadata
+        """
+        cdef:
+            CRecordBatchWithMetadata batch_with_metadata
+        if i < 0 or i >= self.num_record_batches:
+            raise ValueError('Batch number {0} out of range'.format(i))
+        with nogil:
+            batch_with_metadata = GetResultValue(
+                self.reader.get().ReadRecordBatchWithCustomMetadata(i))
+        return _wrap_record_batch_with_metadata(batch_with_metadata)
+    def read_all(self):
+        """
+        Read all record batches as a pyarrow.Table
+        """
+        cdef:
+            vector[shared_ptr[CRecordBatch]] batches
+            shared_ptr[CTable] table
+            int i, nbatches
+        nbatches = self.num_record_batches
+        batches.resize(nbatches)
+        with nogil:
+            for i in range(nbatches):
+                batches[i] = GetResultValue(self.reader.get()
+                                            .ReadRecordBatch(i))
+            table = GetResultValue(
+                CTable.FromRecordBatches(self.schema.sp_schema, move(batches)))
+        return pyarrow_wrap_table(table)
+    read_pandas = _ReadPandasMixin.read_pandas
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+    @property
+    def stats(self):
+        """
+        Current IPC read statistics.
+        """
+        if not self.reader:
+            raise ValueError("Operation on closed reader")
+        return _wrap_read_stats(self.reader.get().stats())
+def get_tensor_size(Tensor tensor):
+    """
+    Return total size of serialized Tensor including metadata and padding.
+    Parameters
+    ----------
+    tensor : Tensor
+        The tensor for which we want to known the size.
+    """
+    cdef int64_t size
+    with nogil:
+        check_status(GetTensorSize(deref(tensor.tp), &size))
+    return size
+def get_record_batch_size(RecordBatch batch):
+    """
+    Return total size of serialized RecordBatch including metadata and padding.
+    Parameters
+    ----------
+    batch : RecordBatch
+        The recordbatch for which we want to know the size.
+    """
+    cdef int64_t size
+    with nogil:
+        check_status(GetRecordBatchSize(deref(batch.batch), &size))
+    return size
+def write_tensor(Tensor tensor, NativeFile dest):
+    """
+    Write pyarrow.Tensor to pyarrow.NativeFile object its current position.
+    Parameters
+    ----------
+    tensor : pyarrow.Tensor
+    dest : pyarrow.NativeFile
+    Returns
+    -------
+    bytes_written : int
+        Total number of bytes written to the file
+    """
+    cdef:
+        int32_t metadata_length
+        int64_t body_length
+    handle = dest.get_output_stream()
+    with nogil:
+        check_status(
+            WriteTensor(deref(tensor.tp), handle.get(),
+                        &metadata_length, &body_length))
+    return metadata_length + body_length
+cdef NativeFile as_native_file(source):
+    if not isinstance(source, NativeFile):
+        if hasattr(source, 'read'):
+            source = PythonFile(source)
+        else:
+            source = BufferReader(source)
+    if not isinstance(source, NativeFile):
+        raise ValueError('Unable to read message from object with type: {0}'
+                         .format(type(source)))
+    return source
+def read_tensor(source):
+    """Read pyarrow.Tensor from pyarrow.NativeFile object from current
+    position. If the file source supports zero copy (e.g. a memory map), then
+    this operation does not allocate any memory. This function not assume that
+    the stream is aligned
+    Parameters
+    ----------
+    source : pyarrow.NativeFile
+    Returns
+    -------
+    tensor : Tensor
+    """
+    cdef:
+        shared_ptr[CTensor] sp_tensor
+        CInputStream* c_stream
+        NativeFile nf = as_native_file(source)
+    c_stream = nf.get_input_stream().get()
+    with nogil:
+        sp_tensor = GetResultValue(ReadTensor(c_stream))
+    return pyarrow_wrap_tensor(sp_tensor)
+def read_message(source):
+    """
+    Read length-prefixed message from file or buffer-like object
+    Parameters
+    ----------
+    source : pyarrow.NativeFile, file-like object, or buffer-like object
+    Returns
+    -------
+    message : Message
+    """
+    cdef:
+        Message result = Message.__new__(Message)
+        CInputStream* c_stream
+    cdef NativeFile nf = as_native_file(source)
+    c_stream = nf.get_input_stream().get()
+    with nogil:
+        result.message = move(
+            GetResultValue(ReadMessage(c_stream, c_default_memory_pool())))
+    if result.message == nullptr:
+        raise EOFError("End of Arrow stream")
+    return result
+def read_schema(obj, DictionaryMemo dictionary_memo=None):
+    """
+    Read Schema from message or buffer
+    Parameters
+    ----------
+    obj : buffer or Message
+    dictionary_memo : DictionaryMemo, optional
+        Needed to be able to reconstruct dictionary-encoded fields
+        with read_record_batch
+    Returns
+    -------
+    schema : Schema
+    """
+    cdef:
+        shared_ptr[CSchema] result
+        shared_ptr[CRandomAccessFile] cpp_file
+        Message message
+        CDictionaryMemo temp_memo
+        CDictionaryMemo* arg_dict_memo
+    if dictionary_memo is not None:
+        arg_dict_memo = dictionary_memo.memo
+    else:
+        arg_dict_memo = &temp_memo
+    if isinstance(obj, Message):
+        message = obj
+        with nogil:
+            result = GetResultValue(ReadSchema(
+                deref(message.message.get()), arg_dict_memo))
+    else:
+        get_reader(obj, False, &cpp_file)
+        with nogil:
+            result = GetResultValue(ReadSchema(cpp_file.get(), arg_dict_memo))
+    return pyarrow_wrap_schema(result)
+def read_record_batch(obj, Schema schema,
+                      DictionaryMemo dictionary_memo=None):
+    """
+    Read RecordBatch from message, given a known schema. If reading data from a
+    complete IPC stream, use ipc.open_stream instead
+    Parameters
+    ----------
+    obj : Message or Buffer-like
+    schema : Schema
+    dictionary_memo : DictionaryMemo, optional
+        If message contains dictionaries, must pass a populated
+        DictionaryMemo
+    Returns
+    -------
+    batch : RecordBatch
+    """
+    cdef:
+        shared_ptr[CRecordBatch] result
+        Message message
+        CDictionaryMemo temp_memo
+        CDictionaryMemo* arg_dict_memo
+    if isinstance(obj, Message):
+        message = obj
+    else:
+        message = read_message(obj)
+    if dictionary_memo is not None:
+        arg_dict_memo = dictionary_memo.memo
+    else:
+        arg_dict_memo = &temp_memo
+    with nogil:
+        result = GetResultValue(
+            ReadRecordBatch(deref(message.message.get()),
+                            schema.sp_schema,
+                            arg_dict_memo,
+                            CIpcReadOptions.Defaults()))
+    return pyarrow_wrap_batch(result)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/ipc.py ADDED Viewed

	@@ -0,0 +1,285 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# Arrow file and stream reader/writer classes, and other messaging tools
+import os
+import pyarrow as pa
+from pyarrow.lib import (IpcReadOptions, IpcWriteOptions, ReadStats, WriteStats,  # noqa
+                         Message, MessageReader,
+                         RecordBatchReader, _ReadPandasMixin,
+                         MetadataVersion,
+                         read_message, read_record_batch, read_schema,
+                         read_tensor, write_tensor,
+                         get_record_batch_size, get_tensor_size)
+import pyarrow.lib as lib
+class RecordBatchStreamReader(lib._RecordBatchStreamReader):
+    """
+    Reader for the Arrow streaming binary format.
+    Parameters
+    ----------
+    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+        Either an in-memory buffer, or a readable file object.
+        If you want to use memory map use MemoryMappedFile as source.
+    options : pyarrow.ipc.IpcReadOptions
+        Options for IPC deserialization.
+        If None, default values will be used.
+    memory_pool : MemoryPool, default None
+        If None, default memory pool is used.
+    """
+    def __init__(self, source, *, options=None, memory_pool=None):
+        options = _ensure_default_ipc_read_options(options)
+        self._open(source, options=options, memory_pool=memory_pool)
+_ipc_writer_class_doc = """\
+Parameters
+----------
+sink : str, pyarrow.NativeFile, or file-like Python object
+    Either a file path, or a writable file object.
+schema : pyarrow.Schema
+    The Arrow schema for data to be written to the file.
+use_legacy_format : bool, default None
+    Deprecated in favor of setting options. Cannot be provided with
+    options.
+    If None, False will be used unless this default is overridden by
+    setting the environment variable ARROW_PRE_0_15_IPC_FORMAT=1
+options : pyarrow.ipc.IpcWriteOptions
+    Options for IPC serialization.
+    If None, default values will be used: the legacy format will not
+    be used unless overridden by setting the environment variable
+    ARROW_PRE_0_15_IPC_FORMAT=1, and the V5 metadata version will be
+    used unless overridden by setting the environment variable
+    ARROW_PRE_1_0_METADATA_VERSION=1."""
+class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
+    __doc__ = """Writer for the Arrow streaming binary format
+{}""".format(_ipc_writer_class_doc)
+    def __init__(self, sink, schema, *, use_legacy_format=None, options=None):
+        options = _get_legacy_format_default(use_legacy_format, options)
+        self._open(sink, schema, options=options)
+class RecordBatchFileReader(lib._RecordBatchFileReader):
+    """
+    Class for reading Arrow record batch data from the Arrow binary file format
+    Parameters
+    ----------
+    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+        Either an in-memory buffer, or a readable file object.
+        If you want to use memory map use MemoryMappedFile as source.
+    footer_offset : int, default None
+        If the file is embedded in some larger file, this is the byte offset to
+        the very end of the file data
+    options : pyarrow.ipc.IpcReadOptions
+        Options for IPC serialization.
+        If None, default values will be used.
+    memory_pool : MemoryPool, default None
+        If None, default memory pool is used.
+    """
+    def __init__(self, source, footer_offset=None, *, options=None,
+                 memory_pool=None):
+        options = _ensure_default_ipc_read_options(options)
+        self._open(source, footer_offset=footer_offset,
+                   options=options, memory_pool=memory_pool)
+class RecordBatchFileWriter(lib._RecordBatchFileWriter):
+    __doc__ = """Writer to create the Arrow binary file format
+{}""".format(_ipc_writer_class_doc)
+    def __init__(self, sink, schema, *, use_legacy_format=None, options=None):
+        options = _get_legacy_format_default(use_legacy_format, options)
+        self._open(sink, schema, options=options)
+def _get_legacy_format_default(use_legacy_format, options):
+    if use_legacy_format is not None and options is not None:
+        raise ValueError(
+            "Can provide at most one of options and use_legacy_format")
+    elif options:
+        if not isinstance(options, IpcWriteOptions):
+            raise TypeError("expected IpcWriteOptions, got {}"
+                            .format(type(options)))
+        return options
+    metadata_version = MetadataVersion.V5
+    if use_legacy_format is None:
+        use_legacy_format = \
+            bool(int(os.environ.get('ARROW_PRE_0_15_IPC_FORMAT', '0')))
+    if bool(int(os.environ.get('ARROW_PRE_1_0_METADATA_VERSION', '0'))):
+        metadata_version = MetadataVersion.V4
+    return IpcWriteOptions(use_legacy_format=use_legacy_format,
+                           metadata_version=metadata_version)
+def _ensure_default_ipc_read_options(options):
+    if options and not isinstance(options, IpcReadOptions):
+        raise TypeError(
+            "expected IpcReadOptions, got {}".format(type(options))
+        )
+    return options or IpcReadOptions()
+def new_stream(sink, schema, *, use_legacy_format=None, options=None):
+    return RecordBatchStreamWriter(sink, schema,
+                                   use_legacy_format=use_legacy_format,
+                                   options=options)
+new_stream.__doc__ = """\
+Create an Arrow columnar IPC stream writer instance
+{}
+Returns
+-------
+writer : RecordBatchStreamWriter
+    A writer for the given sink
+""".format(_ipc_writer_class_doc)
+def open_stream(source, *, options=None, memory_pool=None):
+    """
+    Create reader for Arrow streaming format.
+    Parameters
+    ----------
+    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+        Either an in-memory buffer, or a readable file object.
+    options : pyarrow.ipc.IpcReadOptions
+        Options for IPC serialization.
+        If None, default values will be used.
+    memory_pool : MemoryPool, default None
+        If None, default memory pool is used.
+    Returns
+    -------
+    reader : RecordBatchStreamReader
+        A reader for the given source
+    """
+    return RecordBatchStreamReader(source, options=options,
+                                   memory_pool=memory_pool)
+def new_file(sink, schema, *, use_legacy_format=None, options=None):
+    return RecordBatchFileWriter(sink, schema,
+                                 use_legacy_format=use_legacy_format,
+                                 options=options)
+new_file.__doc__ = """\
+Create an Arrow columnar IPC file writer instance
+{}
+Returns
+-------
+writer : RecordBatchFileWriter
+    A writer for the given sink
+""".format(_ipc_writer_class_doc)
+def open_file(source, footer_offset=None, *, options=None, memory_pool=None):
+    """
+    Create reader for Arrow file format.
+    Parameters
+    ----------
+    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+        Either an in-memory buffer, or a readable file object.
+    footer_offset : int, default None
+        If the file is embedded in some larger file, this is the byte offset to
+        the very end of the file data.
+    options : pyarrow.ipc.IpcReadOptions
+        Options for IPC serialization.
+        If None, default values will be used.
+    memory_pool : MemoryPool, default None
+        If None, default memory pool is used.
+    Returns
+    -------
+    reader : RecordBatchFileReader
+        A reader for the given source
+    """
+    return RecordBatchFileReader(
+        source, footer_offset=footer_offset,
+        options=options, memory_pool=memory_pool)
+def serialize_pandas(df, *, nthreads=None, preserve_index=None):
+    """
+    Serialize a pandas DataFrame into a buffer protocol compatible object.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+    nthreads : int, default None
+        Number of threads to use for conversion to Arrow, default all CPUs.
+    preserve_index : bool, default None
+        The default of None will store the index as a column, except for
+        RangeIndex which is stored as metadata only. If True, always
+        preserve the pandas index data as a column. If False, no index
+        information is saved and the result will have a default RangeIndex.
+    Returns
+    -------
+    buf : buffer
+        An object compatible with the buffer protocol.
+    """
+    batch = pa.RecordBatch.from_pandas(df, nthreads=nthreads,
+                                       preserve_index=preserve_index)
+    sink = pa.BufferOutputStream()
+    with pa.RecordBatchStreamWriter(sink, batch.schema) as writer:
+        writer.write_batch(batch)
+    return sink.getvalue()
+def deserialize_pandas(buf, *, use_threads=True):
+    """Deserialize a buffer protocol compatible object into a pandas DataFrame.
+    Parameters
+    ----------
+    buf : buffer
+        An object compatible with the buffer protocol.
+    use_threads : bool, default True
+        Whether to parallelize the conversion using multiple threads.
+    Returns
+    -------
+    df : pandas.DataFrame
+        The buffer deserialized as pandas DataFrame
+    """
+    buffer_reader = pa.BufferReader(buf)
+    with pa.RecordBatchStreamReader(buffer_reader) as reader:
+        table = reader.read_all()
+    return table.to_pandas(use_threads=use_threads)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/jvm.py ADDED Viewed

	@@ -0,0 +1,335 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Functions to interact with Arrow memory allocated by Arrow Java.
+These functions convert the objects holding the metadata, the actual
+data is not copied at all.
+This will only work with a JVM running in the same process such as provided
+through jpype. Modules that talk to a remote JVM like py4j will not work as the
+memory addresses reported by them are not reachable in the python process.
+"""
+import pyarrow as pa
+class _JvmBufferNanny:
+    """
+    An object that keeps a org.apache.arrow.memory.ArrowBuf's underlying
+    memory alive.
+    """
+    ref_manager = None
+    def __init__(self, jvm_buf):
+        ref_manager = jvm_buf.getReferenceManager()
+        # Will raise a java.lang.IllegalArgumentException if the buffer
+        # is already freed.  It seems that exception cannot easily be
+        # caught...
+        ref_manager.retain()
+        self.ref_manager = ref_manager
+    def __del__(self):
+        if self.ref_manager is not None:
+            self.ref_manager.release()
+def jvm_buffer(jvm_buf):
+    """
+    Construct an Arrow buffer from org.apache.arrow.memory.ArrowBuf
+    Parameters
+    ----------
+    jvm_buf: org.apache.arrow.memory.ArrowBuf
+        Arrow Buffer representation on the JVM.
+    Returns
+    -------
+    pyarrow.Buffer
+        Python Buffer that references the JVM memory.
+    """
+    nanny = _JvmBufferNanny(jvm_buf)
+    address = jvm_buf.memoryAddress()
+    size = jvm_buf.capacity()
+    return pa.foreign_buffer(address, size, base=nanny)
+def _from_jvm_int_type(jvm_type):
+    """
+    Convert a JVM int type to its Python equivalent.
+    Parameters
+    ----------
+    jvm_type : org.apache.arrow.vector.types.pojo.ArrowType$Int
+    Returns
+    -------
+    typ : pyarrow.DataType
+    """
+    bit_width = jvm_type.getBitWidth()
+    if jvm_type.getIsSigned():
+        if bit_width == 8:
+            return pa.int8()
+        elif bit_width == 16:
+            return pa.int16()
+        elif bit_width == 32:
+            return pa.int32()
+        elif bit_width == 64:
+            return pa.int64()
+    else:
+        if bit_width == 8:
+            return pa.uint8()
+        elif bit_width == 16:
+            return pa.uint16()
+        elif bit_width == 32:
+            return pa.uint32()
+        elif bit_width == 64:
+            return pa.uint64()
+def _from_jvm_float_type(jvm_type):
+    """
+    Convert a JVM float type to its Python equivalent.
+    Parameters
+    ----------
+    jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$FloatingPoint
+    Returns
+    -------
+    typ: pyarrow.DataType
+    """
+    precision = jvm_type.getPrecision().toString()
+    if precision == 'HALF':
+        return pa.float16()
+    elif precision == 'SINGLE':
+        return pa.float32()
+    elif precision == 'DOUBLE':
+        return pa.float64()
+def _from_jvm_time_type(jvm_type):
+    """
+    Convert a JVM time type to its Python equivalent.
+    Parameters
+    ----------
+    jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Time
+    Returns
+    -------
+    typ: pyarrow.DataType
+    """
+    time_unit = jvm_type.getUnit().toString()
+    if time_unit == 'SECOND':
+        assert jvm_type.getBitWidth() == 32
+        return pa.time32('s')
+    elif time_unit == 'MILLISECOND':
+        assert jvm_type.getBitWidth() == 32
+        return pa.time32('ms')
+    elif time_unit == 'MICROSECOND':
+        assert jvm_type.getBitWidth() == 64
+        return pa.time64('us')
+    elif time_unit == 'NANOSECOND':
+        assert jvm_type.getBitWidth() == 64
+        return pa.time64('ns')
+def _from_jvm_timestamp_type(jvm_type):
+    """
+    Convert a JVM timestamp type to its Python equivalent.
+    Parameters
+    ----------
+    jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Timestamp
+    Returns
+    -------
+    typ: pyarrow.DataType
+    """
+    time_unit = jvm_type.getUnit().toString()
+    timezone = jvm_type.getTimezone()
+    if timezone is not None:
+        timezone = str(timezone)
+    if time_unit == 'SECOND':
+        return pa.timestamp('s', tz=timezone)
+    elif time_unit == 'MILLISECOND':
+        return pa.timestamp('ms', tz=timezone)
+    elif time_unit == 'MICROSECOND':
+        return pa.timestamp('us', tz=timezone)
+    elif time_unit == 'NANOSECOND':
+        return pa.timestamp('ns', tz=timezone)
+def _from_jvm_date_type(jvm_type):
+    """
+    Convert a JVM date type to its Python equivalent
+    Parameters
+    ----------
+    jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Date
+    Returns
+    -------
+    typ: pyarrow.DataType
+    """
+    day_unit = jvm_type.getUnit().toString()
+    if day_unit == 'DAY':
+        return pa.date32()
+    elif day_unit == 'MILLISECOND':
+        return pa.date64()
+def field(jvm_field):
+    """
+    Construct a Field from a org.apache.arrow.vector.types.pojo.Field
+    instance.
+    Parameters
+    ----------
+    jvm_field: org.apache.arrow.vector.types.pojo.Field
+    Returns
+    -------
+    pyarrow.Field
+    """
+    name = str(jvm_field.getName())
+    jvm_type = jvm_field.getType()
+    typ = None
+    if not jvm_type.isComplex():
+        type_str = jvm_type.getTypeID().toString()
+        if type_str == 'Null':
+            typ = pa.null()
+        elif type_str == 'Int':
+            typ = _from_jvm_int_type(jvm_type)
+        elif type_str == 'FloatingPoint':
+            typ = _from_jvm_float_type(jvm_type)
+        elif type_str == 'Utf8':
+            typ = pa.string()
+        elif type_str == 'Binary':
+            typ = pa.binary()
+        elif type_str == 'FixedSizeBinary':
+            typ = pa.binary(jvm_type.getByteWidth())
+        elif type_str == 'Bool':
+            typ = pa.bool_()
+        elif type_str == 'Time':
+            typ = _from_jvm_time_type(jvm_type)
+        elif type_str == 'Timestamp':
+            typ = _from_jvm_timestamp_type(jvm_type)
+        elif type_str == 'Date':
+            typ = _from_jvm_date_type(jvm_type)
+        elif type_str == 'Decimal':
+            typ = pa.decimal128(jvm_type.getPrecision(), jvm_type.getScale())
+        else:
+            raise NotImplementedError(
+                "Unsupported JVM type: {}".format(type_str))
+    else:
+        # TODO: The following JVM types are not implemented:
+        #       Struct, List, FixedSizeList, Union, Dictionary
+        raise NotImplementedError(
+            "JVM field conversion only implemented for primitive types.")
+    nullable = jvm_field.isNullable()
+    jvm_metadata = jvm_field.getMetadata()
+    if jvm_metadata.isEmpty():
+        metadata = None
+    else:
+        metadata = {str(entry.getKey()): str(entry.getValue())
+                    for entry in jvm_metadata.entrySet()}
+    return pa.field(name, typ, nullable, metadata)
+def schema(jvm_schema):
+    """
+    Construct a Schema from a org.apache.arrow.vector.types.pojo.Schema
+    instance.
+    Parameters
+    ----------
+    jvm_schema: org.apache.arrow.vector.types.pojo.Schema
+    Returns
+    -------
+    pyarrow.Schema
+    """
+    fields = jvm_schema.getFields()
+    fields = [field(f) for f in fields]
+    jvm_metadata = jvm_schema.getCustomMetadata()
+    if jvm_metadata.isEmpty():
+        metadata = None
+    else:
+        metadata = {str(entry.getKey()): str(entry.getValue())
+                    for entry in jvm_metadata.entrySet()}
+    return pa.schema(fields, metadata)
+def array(jvm_array):
+    """
+    Construct an (Python) Array from its JVM equivalent.
+    Parameters
+    ----------
+    jvm_array : org.apache.arrow.vector.ValueVector
+    Returns
+    -------
+    array : Array
+    """
+    if jvm_array.getField().getType().isComplex():
+        minor_type_str = jvm_array.getMinorType().toString()
+        raise NotImplementedError(
+            "Cannot convert JVM Arrow array of type {},"
+            " complex types not yet implemented.".format(minor_type_str))
+    dtype = field(jvm_array.getField()).type
+    buffers = [jvm_buffer(buf)
+               for buf in list(jvm_array.getBuffers(False))]
+    # If JVM has an empty Vector, buffer list will be empty so create manually
+    if len(buffers) == 0:
+        return pa.array([], type=dtype)
+    length = jvm_array.getValueCount()
+    null_count = jvm_array.getNullCount()
+    return pa.Array.from_buffers(dtype, length, buffers, null_count)
+def record_batch(jvm_vector_schema_root):
+    """
+    Construct a (Python) RecordBatch from a JVM VectorSchemaRoot
+    Parameters
+    ----------
+    jvm_vector_schema_root : org.apache.arrow.vector.VectorSchemaRoot
+    Returns
+    -------
+    record_batch: pyarrow.RecordBatch
+    """
+    pa_schema = schema(jvm_vector_schema_root.getSchema())
+    arrays = []
+    for name in pa_schema.names:
+        arrays.append(array(jvm_vector_schema_root.getVector(name)))
+    return pa.RecordBatch.from_arrays(
+        arrays,
+        pa_schema.names,
+        metadata=pa_schema.metadata
+    )

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib.pxd ADDED Viewed

	@@ -0,0 +1,770 @@

+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# cython: language_level = 3
+from cpython cimport PyObject
+from libcpp cimport nullptr, bool as c_bool
+from libcpp.cast cimport dynamic_cast
+from libcpp.memory cimport dynamic_pointer_cast
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_python cimport *
+# Will be available in Cython 3, not backported
+# ref: https://github.com/cython/cython/issues/3293#issuecomment-1223058101
+cdef extern from "<optional>" namespace "std" nogil:
+    cdef cppclass nullopt_t:
+        nullopt_t()
+    cdef nullopt_t nullopt
+    cdef cppclass optional[T]:
+        ctypedef T value_type
+        optional()
+        optional(nullopt_t)
+        optional(optional&) except +
+        optional(T&) except +
+        c_bool has_value()
+        T& value()
+        T& value_or[U](U& default_value)
+        void swap(optional&)
+        void reset()
+        T& emplace(...)
+        T& operator*()
+        # T* operator->() # Not Supported
+        optional& operator=(optional&)
+        optional& operator=[U](U&)
+        c_bool operator bool()
+        c_bool operator!()
+        c_bool operator==[U](optional&, U&)
+        c_bool operator!=[U](optional&, U&)
+        c_bool operator<[U](optional&, U&)
+        c_bool operator>[U](optional&, U&)
+        c_bool operator<=[U](optional&, U&)
+        c_bool operator>=[U](optional&, U&)
+    optional[T] make_optional[T](...) except +
+cdef extern from "Python.h":
+    int PySlice_Check(object)
+cdef int check_status(const CStatus& status) except -1 nogil
+cdef object convert_status(const CStatus& status)
+cdef class _Weakrefable:
+    cdef object __weakref__
+cdef class IpcWriteOptions(_Weakrefable):
+    cdef:
+        CIpcWriteOptions c_options
+cdef class IpcReadOptions(_Weakrefable):
+    cdef:
+        CIpcReadOptions c_options
+cdef class Message(_Weakrefable):
+    cdef:
+        unique_ptr[CMessage] message
+cdef class MemoryPool(_Weakrefable):
+    cdef:
+        CMemoryPool* pool
+    cdef void init(self, CMemoryPool* pool)
+cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool)
+cdef object box_memory_pool(CMemoryPool* pool)
+cdef class DataType(_Weakrefable):
+    cdef:
+        shared_ptr[CDataType] sp_type
+        CDataType* type
+        bytes pep3118_format
+    cdef void init(self, const shared_ptr[CDataType]& type) except *
+    cpdef Field field(self, i)
+cdef class ListType(DataType):
+    cdef:
+        const CListType* list_type
+cdef class LargeListType(DataType):
+    cdef:
+        const CLargeListType* list_type
+cdef class ListViewType(DataType):
+    cdef:
+        const CListViewType* list_view_type
+cdef class LargeListViewType(DataType):
+    cdef:
+        const CLargeListViewType* list_view_type
+cdef class MapType(DataType):
+    cdef:
+        const CMapType* map_type
+cdef class FixedSizeListType(DataType):
+    cdef:
+        const CFixedSizeListType* list_type
+cdef class StructType(DataType):
+    cdef:
+        const CStructType* struct_type
+    cdef Field field_by_name(self, name)
+cdef class DictionaryMemo(_Weakrefable):
+    cdef:
+        # Even though the CDictionaryMemo instance is private, we allocate
+        # it on the heap so as to avoid C++ ABI issues with Python wheels.
+        shared_ptr[CDictionaryMemo] sp_memo
+        CDictionaryMemo* memo
+cdef class DictionaryType(DataType):
+    cdef:
+        const CDictionaryType* dict_type
+cdef class TimestampType(DataType):
+    cdef:
+        const CTimestampType* ts_type
+cdef class Time32Type(DataType):
+    cdef:
+        const CTime32Type* time_type
+cdef class Time64Type(DataType):
+    cdef:
+        const CTime64Type* time_type
+cdef class DurationType(DataType):
+    cdef:
+        const CDurationType* duration_type
+cdef class FixedSizeBinaryType(DataType):
+    cdef:
+        const CFixedSizeBinaryType* fixed_size_binary_type
+cdef class Decimal32Type(FixedSizeBinaryType):
+    cdef:
+        const CDecimal32Type* decimal32_type
+cdef class Decimal64Type(FixedSizeBinaryType):
+    cdef:
+        const CDecimal64Type* decimal64_type
+cdef class Decimal128Type(FixedSizeBinaryType):
+    cdef:
+        const CDecimal128Type* decimal128_type
+cdef class Decimal256Type(FixedSizeBinaryType):
+    cdef:
+        const CDecimal256Type* decimal256_type
+cdef class RunEndEncodedType(DataType):
+    cdef:
+        const CRunEndEncodedType* run_end_encoded_type
+cdef class BaseExtensionType(DataType):
+    cdef:
+        const CExtensionType* ext_type
+cdef class ExtensionType(BaseExtensionType):
+    cdef:
+        const CPyExtensionType* cpy_ext_type
+cdef class FixedShapeTensorType(BaseExtensionType):
+    cdef:
+        const CFixedShapeTensorType* tensor_ext_type
+cdef class Bool8Type(BaseExtensionType):
+    cdef:
+        const CBool8Type* bool8_ext_type
+cdef class OpaqueType(BaseExtensionType):
+    cdef:
+        const COpaqueType* opaque_ext_type
+cdef class UuidType(BaseExtensionType):
+    cdef:
+        const CUuidType* uuid_ext_type
+cdef class JsonType(BaseExtensionType):
+    cdef:
+        const CJsonType* json_ext_type
+cdef class PyExtensionType(ExtensionType):
+    pass
+cdef class _Metadata(_Weakrefable):
+    # required because KeyValueMetadata also extends collections.abc.Mapping
+    # and the first parent class must be an extension type
+    pass
+cdef class KeyValueMetadata(_Metadata):
+    cdef:
+        shared_ptr[const CKeyValueMetadata] wrapped
+        const CKeyValueMetadata* metadata
+    cdef void init(self, const shared_ptr[const CKeyValueMetadata]& wrapped)
+    @staticmethod
+    cdef wrap(const shared_ptr[const CKeyValueMetadata]& sp)
+    cdef inline shared_ptr[const CKeyValueMetadata] unwrap(self) nogil
+cdef class Field(_Weakrefable):
+    cdef:
+        shared_ptr[CField] sp_field
+        CField* field
+    cdef readonly:
+        DataType type
+    cdef void init(self, const shared_ptr[CField]& field)
+cdef class Schema(_Weakrefable):
+    cdef:
+        shared_ptr[CSchema] sp_schema
+        CSchema* schema
+    cdef void init(self, const vector[shared_ptr[CField]]& fields)
+    cdef void init_schema(self, const shared_ptr[CSchema]& schema)
+cdef class Scalar(_Weakrefable):
+    cdef:
+        shared_ptr[CScalar] wrapped
+    cdef void init(self, const shared_ptr[CScalar]& wrapped)
+    @staticmethod
+    cdef wrap(const shared_ptr[CScalar]& wrapped)
+    cdef inline shared_ptr[CScalar] unwrap(self) nogil
+cdef class _PandasConvertible(_Weakrefable):
+    pass
+cdef class Array(_PandasConvertible):
+    cdef:
+        shared_ptr[CArray] sp_array
+        CArray* ap
+    cdef readonly:
+        DataType type
+        # To allow Table to propagate metadata to pandas.Series
+        object _name
+    cdef void init(self, const shared_ptr[CArray]& sp_array) except *
+    cdef getitem(self, int64_t i)
+    cdef int64_t length(self)
+    cdef void _assert_cpu(self) except *
+cdef class Tensor(_Weakrefable):
+    cdef:
+        shared_ptr[CTensor] sp_tensor
+        CTensor* tp
+    cdef readonly:
+        DataType type
+        bytes _ssize_t_shape
+        bytes _ssize_t_strides
+    cdef void init(self, const shared_ptr[CTensor]& sp_tensor)
+cdef class SparseCSRMatrix(_Weakrefable):
+    cdef:
+        shared_ptr[CSparseCSRMatrix] sp_sparse_tensor
+        CSparseCSRMatrix* stp
+    cdef readonly:
+        DataType type
+    cdef void init(self, const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor)
+cdef class SparseCSCMatrix(_Weakrefable):
+    cdef:
+        shared_ptr[CSparseCSCMatrix] sp_sparse_tensor
+        CSparseCSCMatrix* stp
+    cdef readonly:
+        DataType type
+    cdef void init(self, const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor)
+cdef class SparseCOOTensor(_Weakrefable):
+    cdef:
+        shared_ptr[CSparseCOOTensor] sp_sparse_tensor
+        CSparseCOOTensor* stp
+    cdef readonly:
+        DataType type
+    cdef void init(self, const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor)
+cdef class SparseCSFTensor(_Weakrefable):
+    cdef:
+        shared_ptr[CSparseCSFTensor] sp_sparse_tensor
+        CSparseCSFTensor* stp
+    cdef readonly:
+        DataType type
+    cdef void init(self, const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor)
+cdef class NullArray(Array):
+    pass
+cdef class BooleanArray(Array):
+    pass
+cdef class NumericArray(Array):
+    pass
+cdef class IntegerArray(NumericArray):
+    pass
+cdef class FloatingPointArray(NumericArray):
+    pass
+cdef class Int8Array(IntegerArray):
+    pass
+cdef class UInt8Array(IntegerArray):
+    pass
+cdef class Int16Array(IntegerArray):
+    pass
+cdef class UInt16Array(IntegerArray):
+    pass
+cdef class Int32Array(IntegerArray):
+    pass
+cdef class UInt32Array(IntegerArray):
+    pass
+cdef class Int64Array(IntegerArray):
+    pass
+cdef class UInt64Array(IntegerArray):
+    pass
+cdef class HalfFloatArray(FloatingPointArray):
+    pass
+cdef class FloatArray(FloatingPointArray):
+    pass
+cdef class DoubleArray(FloatingPointArray):
+    pass
+cdef class FixedSizeBinaryArray(Array):
+    pass
+cdef class Decimal32Array(FixedSizeBinaryArray):
+    pass
+cdef class Decimal64Array(FixedSizeBinaryArray):
+    pass
+cdef class Decimal128Array(FixedSizeBinaryArray):
+    pass
+cdef class Decimal256Array(FixedSizeBinaryArray):
+    pass
+cdef class StructArray(Array):
+    pass
+cdef class BaseListArray(Array):
+    pass
+cdef class ListArray(BaseListArray):
+    pass
+cdef class LargeListArray(BaseListArray):
+    pass
+cdef class ListViewArray(BaseListArray):
+    pass
+cdef class LargeListViewArray(BaseListArray):
+    pass
+cdef class MapArray(ListArray):
+    pass
+cdef class FixedSizeListArray(BaseListArray):
+    pass
+cdef class UnionArray(Array):
+    pass
+cdef class StringArray(Array):
+    pass
+cdef class BinaryArray(Array):
+    pass
+cdef class StringViewArray(Array):
+    pass
+cdef class BinaryViewArray(Array):
+    pass
+cdef class DictionaryArray(Array):
+    cdef:
+        object _indices, _dictionary
+cdef class ExtensionArray(Array):
+    pass
+cdef class MonthDayNanoIntervalArray(Array):
+    pass
+cdef wrap_array_output(PyObject* output)
+cdef wrap_datum(const CDatum& datum)
+cdef class ChunkedArray(_PandasConvertible):
+    cdef:
+        shared_ptr[CChunkedArray] sp_chunked_array
+        CChunkedArray* chunked_array
+        c_bool _is_cpu
+        c_bool _init_is_cpu
+    cdef readonly:
+        # To allow Table to propagate metadata to pandas.Series
+        object _name
+    cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array)
+    cdef getitem(self, int64_t i)
+cdef class _Tabular(_PandasConvertible):
+    cdef void _assert_cpu(self) except *
+cdef class Table(_Tabular):
+    cdef:
+        shared_ptr[CTable] sp_table
+        CTable* table
+        c_bool _is_cpu
+        c_bool _init_is_cpu
+    cdef void init(self, const shared_ptr[CTable]& table)
+cdef class RecordBatch(_Tabular):
+    cdef:
+        shared_ptr[CRecordBatch] sp_batch
+        CRecordBatch* batch
+        Schema _schema
+    cdef void init(self, const shared_ptr[CRecordBatch]& table)
+cdef class Device(_Weakrefable):
+    cdef:
+        shared_ptr[CDevice] device
+    cdef void init(self, const shared_ptr[CDevice]& device)
+    @staticmethod
+    cdef wrap(const shared_ptr[CDevice]& device)
+    cdef inline shared_ptr[CDevice] unwrap(self) nogil
+cdef class MemoryManager(_Weakrefable):
+    cdef:
+        shared_ptr[CMemoryManager] memory_manager
+    cdef void init(self, const shared_ptr[CMemoryManager]& memory_manager)
+    @staticmethod
+    cdef wrap(const shared_ptr[CMemoryManager]& mm)
+    cdef inline shared_ptr[CMemoryManager] unwrap(self) nogil
+cdef class Buffer(_Weakrefable):
+    cdef:
+        shared_ptr[CBuffer] buffer
+        Py_ssize_t shape[1]
+        Py_ssize_t strides[1]
+    cdef void init(self, const shared_ptr[CBuffer]& buffer)
+    cdef getitem(self, int64_t i)
+cdef class ResizableBuffer(Buffer):
+    cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer)
+cdef class NativeFile(_Weakrefable):
+    cdef:
+        shared_ptr[CInputStream] input_stream
+        shared_ptr[CRandomAccessFile] random_access
+        shared_ptr[COutputStream] output_stream
+        bint is_readable
+        bint is_writable
+        bint is_seekable
+        bint _is_appending
+        bint own_file
+    # By implementing these "virtual" functions (all functions in Cython
+    # extension classes are technically virtual in the C++ sense) we can expose
+    # the arrow::io abstract file interfaces to other components throughout the
+    # suite of Arrow C++ libraries
+    cdef set_random_access_file(self, shared_ptr[CRandomAccessFile] handle)
+    cdef set_input_stream(self, shared_ptr[CInputStream] handle)
+    cdef set_output_stream(self, shared_ptr[COutputStream] handle)
+    cdef shared_ptr[CRandomAccessFile] get_random_access_file(self) except *
+    cdef shared_ptr[CInputStream] get_input_stream(self) except *
+    cdef shared_ptr[COutputStream] get_output_stream(self) except *
+cdef class BufferedInputStream(NativeFile):
+    pass
+cdef class BufferedOutputStream(NativeFile):
+    pass
+cdef class CompressedInputStream(NativeFile):
+    pass
+cdef class CompressedOutputStream(NativeFile):
+    pass
+cdef class _CRecordBatchWriter(_Weakrefable):
+    cdef:
+        SharedPtrNoGIL[CRecordBatchWriter] writer
+cdef class RecordBatchReader(_Weakrefable):
+    cdef:
+        SharedPtrNoGIL[CRecordBatchReader] reader
+cdef class CacheOptions(_Weakrefable):
+    cdef:
+        CCacheOptions wrapped
+    cdef void init(self, CCacheOptions options)
+    cdef inline CCacheOptions unwrap(self)
+    @staticmethod
+    cdef wrap(const CCacheOptions options)
+cdef class Codec(_Weakrefable):
+    cdef:
+        shared_ptr[CCodec] wrapped
+    cdef inline CCodec* unwrap(self) nogil
+# This class is only used internally for now
+cdef class StopToken:
+    cdef:
+        CStopToken stop_token
+    cdef void init(self, CStopToken stop_token)
+cdef get_input_stream(object source, c_bool use_memory_map,
+                      shared_ptr[CInputStream]* reader)
+cdef get_reader(object source, c_bool use_memory_map,
+                shared_ptr[CRandomAccessFile]* reader)
+cdef get_writer(object source, shared_ptr[COutputStream]* writer)
+cdef NativeFile get_native_file(object source, c_bool use_memory_map)
+cdef shared_ptr[CInputStream] native_transcoding_input_stream(
+    shared_ptr[CInputStream] stream, src_encoding,
+    dest_encoding) except *
+cdef shared_ptr[function[StreamWrapFunc]] make_streamwrap_func(
+    src_encoding, dest_encoding) except *
+# Default is allow_none=False
+cpdef DataType ensure_type(object type, bint allow_none=*)
+cdef timeunit_to_string(TimeUnit unit)
+cdef TimeUnit string_to_timeunit(unit) except *
+# Exceptions may be raised when converting dict values, so need to
+# check exception state on return
+cdef shared_ptr[const CKeyValueMetadata] pyarrow_unwrap_metadata(
+    object meta) except *
+cdef object pyarrow_wrap_metadata(
+    const shared_ptr[const CKeyValueMetadata]& meta)
+#
+# Public Cython API for 3rd party code
+#
+# If you add functions to this list, please also update
+# `cpp/src/arrow/python/pyarrow.{h, cc}`
+#
+# Wrapping C++ -> Python
+cdef public object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf)
+cdef public object pyarrow_wrap_resizable_buffer(
+    const shared_ptr[CResizableBuffer]& buf)
+cdef public object pyarrow_wrap_data_type(const shared_ptr[CDataType]& type)
+cdef public object pyarrow_wrap_field(const shared_ptr[CField]& field)
+cdef public object pyarrow_wrap_schema(const shared_ptr[CSchema]& type)
+cdef public object pyarrow_wrap_scalar(const shared_ptr[CScalar]& sp_scalar)
+cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array)
+cdef public object pyarrow_wrap_chunked_array(
+    const shared_ptr[CChunkedArray]& sp_array)
+cdef public object pyarrow_wrap_sparse_coo_tensor(
+    const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor)
+cdef public object pyarrow_wrap_sparse_csc_matrix(
+    const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor)
+cdef public object pyarrow_wrap_sparse_csf_tensor(
+    const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor)
+cdef public object pyarrow_wrap_sparse_csr_matrix(
+    const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor)
+cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
+cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
+cdef public object pyarrow_wrap_table(const shared_ptr[CTable]& ctable)
+# Unwrapping Python -> C++
+cdef public shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer)
+cdef public shared_ptr[CDataType] pyarrow_unwrap_data_type(object data_type)
+cdef public shared_ptr[CField] pyarrow_unwrap_field(object field)
+cdef public shared_ptr[CSchema] pyarrow_unwrap_schema(object schema)
+cdef public shared_ptr[CScalar] pyarrow_unwrap_scalar(object scalar)
+cdef public shared_ptr[CArray] pyarrow_unwrap_array(object array)
+cdef public shared_ptr[CChunkedArray] pyarrow_unwrap_chunked_array(
+    object array)
+cdef public shared_ptr[CSparseCOOTensor] pyarrow_unwrap_sparse_coo_tensor(
+    object sparse_tensor)
+cdef public shared_ptr[CSparseCSCMatrix] pyarrow_unwrap_sparse_csc_matrix(
+    object sparse_tensor)
+cdef public shared_ptr[CSparseCSFTensor] pyarrow_unwrap_sparse_csf_tensor(
+    object sparse_tensor)
+cdef public shared_ptr[CSparseCSRMatrix] pyarrow_unwrap_sparse_csr_matrix(
+    object sparse_tensor)
+cdef public shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor)
+cdef public shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch)
+cdef public shared_ptr[CTable] pyarrow_unwrap_table(object table)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow/lib_api.h ADDED Viewed

	@@ -0,0 +1,201 @@

+/* Generated by Cython 3.0.12 */
+#ifndef __PYX_HAVE_API__pyarrow__lib
+#define __PYX_HAVE_API__pyarrow__lib
+#ifdef __MINGW64__
+#define MS_WIN64
+#endif
+#include "Python.h"
+#include "lib.h"
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_box_memory_pool)( arrow::MemoryPool *) = 0;
+#define box_memory_pool __pyx_api_f_7pyarrow_3lib_box_memory_pool
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer)(std::shared_ptr< arrow::Buffer>  const &) = 0;
+#define pyarrow_wrap_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer)(std::shared_ptr< arrow::ResizableBuffer>  const &) = 0;
+#define pyarrow_wrap_resizable_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type)(std::shared_ptr< arrow::DataType>  const &) = 0;
+#define pyarrow_wrap_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field)(std::shared_ptr< arrow::Field>  const &) = 0;
+#define pyarrow_wrap_field __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema)(std::shared_ptr< arrow::Schema>  const &) = 0;
+#define pyarrow_wrap_schema __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar)(std::shared_ptr< arrow::Scalar>  const &) = 0;
+#define pyarrow_wrap_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array)(std::shared_ptr< arrow::Array>  const &) = 0;
+#define pyarrow_wrap_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array)(std::shared_ptr< arrow::ChunkedArray>  const &) = 0;
+#define pyarrow_wrap_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor)(std::shared_ptr< arrow::SparseCOOTensor>  const &) = 0;
+#define pyarrow_wrap_sparse_coo_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix)(std::shared_ptr< arrow::SparseCSCMatrix>  const &) = 0;
+#define pyarrow_wrap_sparse_csc_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor)(std::shared_ptr< arrow::SparseCSFTensor>  const &) = 0;
+#define pyarrow_wrap_sparse_csf_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix)(std::shared_ptr< arrow::SparseCSRMatrix>  const &) = 0;
+#define pyarrow_wrap_sparse_csr_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor)(std::shared_ptr< arrow::Tensor>  const &) = 0;
+#define pyarrow_wrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch)(std::shared_ptr< arrow::RecordBatch>  const &) = 0;
+#define pyarrow_wrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table)(std::shared_ptr< arrow::Table>  const &) = 0;
+#define pyarrow_wrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table
+static std::shared_ptr< arrow::Buffer>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer)(PyObject *) = 0;
+#define pyarrow_unwrap_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer
+static std::shared_ptr< arrow::DataType>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type)(PyObject *) = 0;
+#define pyarrow_unwrap_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type
+static std::shared_ptr< arrow::Field>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field)(PyObject *) = 0;
+#define pyarrow_unwrap_field __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field
+static std::shared_ptr< arrow::Schema>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema)(PyObject *) = 0;
+#define pyarrow_unwrap_schema __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema
+static std::shared_ptr< arrow::Scalar>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar)(PyObject *) = 0;
+#define pyarrow_unwrap_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar
+static std::shared_ptr< arrow::Array>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array)(PyObject *) = 0;
+#define pyarrow_unwrap_array __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array
+static std::shared_ptr< arrow::ChunkedArray>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array)(PyObject *) = 0;
+#define pyarrow_unwrap_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array
+static std::shared_ptr< arrow::SparseCOOTensor>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor)(PyObject *) = 0;
+#define pyarrow_unwrap_sparse_coo_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor
+static std::shared_ptr< arrow::SparseCSCMatrix>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix)(PyObject *) = 0;
+#define pyarrow_unwrap_sparse_csc_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix
+static std::shared_ptr< arrow::SparseCSFTensor>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor)(PyObject *) = 0;
+#define pyarrow_unwrap_sparse_csf_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor
+static std::shared_ptr< arrow::SparseCSRMatrix>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix)(PyObject *) = 0;
+#define pyarrow_unwrap_sparse_csr_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix
+static std::shared_ptr< arrow::Tensor>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor)(PyObject *) = 0;
+#define pyarrow_unwrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor
+static std::shared_ptr< arrow::RecordBatch>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch)(PyObject *) = 0;
+#define pyarrow_unwrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch
+static std::shared_ptr< arrow::Table>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table)(PyObject *) = 0;
+#define pyarrow_unwrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status)(arrow::Status const &) = 0;
+#define pyarrow_internal_check_status __pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status)(arrow::Status const &) = 0;
+#define pyarrow_internal_convert_status __pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer)(PyObject *) = 0;
+#define pyarrow_is_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type)(PyObject *) = 0;
+#define pyarrow_is_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata)(PyObject *) = 0;
+#define pyarrow_is_metadata __pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_field)(PyObject *) = 0;
+#define pyarrow_is_field __pyx_api_f_7pyarrow_3lib_pyarrow_is_field
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_schema)(PyObject *) = 0;
+#define pyarrow_is_schema __pyx_api_f_7pyarrow_3lib_pyarrow_is_schema
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_array)(PyObject *) = 0;
+#define pyarrow_is_array __pyx_api_f_7pyarrow_3lib_pyarrow_is_array
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array)(PyObject *) = 0;
+#define pyarrow_is_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar)(PyObject *) = 0;
+#define pyarrow_is_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor)(PyObject *) = 0;
+#define pyarrow_is_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor)(PyObject *) = 0;
+#define pyarrow_is_sparse_coo_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix)(PyObject *) = 0;
+#define pyarrow_is_sparse_csr_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix)(PyObject *) = 0;
+#define pyarrow_is_sparse_csc_matrix __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor)(PyObject *) = 0;
+#define pyarrow_is_sparse_csf_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_table)(PyObject *) = 0;
+#define pyarrow_is_table __pyx_api_f_7pyarrow_3lib_pyarrow_is_table
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch)(PyObject *) = 0;
+#define pyarrow_is_batch __pyx_api_f_7pyarrow_3lib_pyarrow_is_batch
+#ifndef __PYX_HAVE_RT_ImportFunction_3_0_12
+#define __PYX_HAVE_RT_ImportFunction_3_0_12
+static int __Pyx_ImportFunction_3_0_12(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
+    PyObject *d = 0;
+    PyObject *cobj = 0;
+    union {
+        void (*fp)(void);
+        void *p;
+    } tmp;
+    d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
+    if (!d)
+        goto bad;
+    cobj = PyDict_GetItemString(d, funcname);
+    if (!cobj) {
+        PyErr_Format(PyExc_ImportError,
+            "%.200s does not export expected C function %.200s",
+                PyModule_GetName(module), funcname);
+        goto bad;
+    }
+    if (!PyCapsule_IsValid(cobj, sig)) {
+        PyErr_Format(PyExc_TypeError,
+            "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+             PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
+        goto bad;
+    }
+    tmp.p = PyCapsule_GetPointer(cobj, sig);
+    *f = tmp.fp;
+    if (!(*f))
+        goto bad;
+    Py_DECREF(d);
+    return 0;
+bad:
+    Py_XDECREF(d);
+    return -1;
+}
+#endif
+static int import_pyarrow__lib(void) {
+  PyObject *module = 0;
+  module = PyImport_ImportModule("pyarrow.lib");
+  if (!module) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "box_memory_pool", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_box_memory_pool, "PyObject *( arrow::MemoryPool *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer, "PyObject *(std::shared_ptr< arrow::Buffer>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_resizable_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer, "PyObject *(std::shared_ptr< arrow::ResizableBuffer>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type, "PyObject *(std::shared_ptr< arrow::DataType>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field, "PyObject *(std::shared_ptr< arrow::Field>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema, "PyObject *(std::shared_ptr< arrow::Schema>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar, "PyObject *(std::shared_ptr< arrow::Scalar>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array, "PyObject *(std::shared_ptr< arrow::Array>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array, "PyObject *(std::shared_ptr< arrow::ChunkedArray>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_coo_tensor, "PyObject *(std::shared_ptr< arrow::SparseCOOTensor>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csc_matrix, "PyObject *(std::shared_ptr< arrow::SparseCSCMatrix>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csf_tensor, "PyObject *(std::shared_ptr< arrow::SparseCSFTensor>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_csr_matrix, "PyObject *(std::shared_ptr< arrow::SparseCSRMatrix>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor, "PyObject *(std::shared_ptr< arrow::Tensor>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch, "PyObject *(std::shared_ptr< arrow::RecordBatch>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_wrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table, "PyObject *(std::shared_ptr< arrow::Table>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer, "std::shared_ptr< arrow::Buffer>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type, "std::shared_ptr< arrow::DataType>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field, "std::shared_ptr< arrow::Field>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema, "std::shared_ptr< arrow::Schema>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_scalar, "std::shared_ptr< arrow::Scalar>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array, "std::shared_ptr< arrow::Array>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_chunked_array, "std::shared_ptr< arrow::ChunkedArray>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_coo_tensor, "std::shared_ptr< arrow::SparseCOOTensor>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csc_matrix, "std::shared_ptr< arrow::SparseCSCMatrix>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csf_tensor, "std::shared_ptr< arrow::SparseCSFTensor>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_csr_matrix, "std::shared_ptr< arrow::SparseCSRMatrix>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor, "std::shared_ptr< arrow::Tensor>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch, "std::shared_ptr< arrow::RecordBatch>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_unwrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table, "std::shared_ptr< arrow::Table>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_internal_check_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status, "int (arrow::Status const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_internal_convert_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_convert_status, "PyObject *(arrow::Status const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_metadata", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_metadata, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_field, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_schema, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_array, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_chunked_array, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_scalar, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_coo_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_coo_tensor, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_csr_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csr_matrix, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_csc_matrix", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csc_matrix, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_sparse_csf_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_csf_tensor, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_table, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_0_12(module, "pyarrow_is_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch, "int (PyObject *)") < 0) goto bad;
+  Py_DECREF(module); module = 0;
+  return 0;
+  bad:
+  Py_XDECREF(module);
+  return -1;
+}
+#endif /* !__PYX_HAVE_API__pyarrow__lib */